refactor(cm): remove el1 context when SPMD_SPM_AT_SEL2=1

* Currently, EL1 context is included in cpu_context_t by default
  for all the build configurations.
  As part of the cpu context structure, we hold a copy of EL1, EL2
  system registers, per world per PE. This context structure is
  enormous and will continue to grow bigger with the addition of
  new features incorporating  new registers.

* Ideally, EL3 should save and restore the system registers at its next
  lower exception level, which is EL2 in majority of the configurations.

* This patch aims at optimising the memory allocation in cases, when
  the members from the context structure are unused. So el1 system
  register context must be omitted when lower EL is always x-EL2.

* "CTX_INCLUDE_EL2_REGS" is the internal build flag which gets set,
  when SPD=spmd and SPMD_SPM_AT_SEL2=1 or ENABLE_RME=1.
  It indicates, the system registers at EL2 are context switched for
  the respective build configuration. Here, there is no need  to save
  and restore EL1 system registers, while x-EL2 is enabled.

Henceforth, this patch addresses this issue, by taking out the EL1
context at all possible places, while EL2 (CTX_INCLUDE_EL2_REGS) is
enabled, there by saving memory.

Change-Id: Ifddc497d3c810e22a15b1c227a731bcc133c2f4a
Signed-off-by: Jayanth Dodderi Chidanand <jayanthdodderi.chidanand@arm.com>
diff --git a/include/lib/el3_runtime/aarch64/context.h b/include/lib/el3_runtime/aarch64/context.h
index 49ebedc..87f1541 100644
--- a/include/lib/el3_runtime/aarch64/context.h
+++ b/include/lib/el3_runtime/aarch64/context.h
@@ -7,8 +7,16 @@
 #ifndef CONTEXT_H
 #define CONTEXT_H
 
-#include <lib/el3_runtime/context_el1.h>
+#if (CTX_INCLUDE_EL2_REGS && IMAGE_BL31)
 #include <lib/el3_runtime/context_el2.h>
+#else
+/**
+ * El1 context is required either when:
+ * IMAGE_BL1 || ((!CTX_INCLUDE_EL2_REGS) && IMAGE_BL31)
+ */
+#include <lib/el3_runtime/context_el1.h>
+#endif /* (CTX_INCLUDE_EL2_REGS && IMAGE_BL31) */
+
 #include <lib/el3_runtime/cpu_data.h>
 #include <lib/el3_runtime/simd_ctx.h>
 #include <lib/utils_def.h>
@@ -250,10 +258,16 @@
 	pauth_t pauth_ctx;
 #endif
 
-	el1_sysregs_t el1_sysregs_ctx;
-
-#if CTX_INCLUDE_EL2_REGS
+#if (CTX_INCLUDE_EL2_REGS && IMAGE_BL31)
 	el2_sysregs_t el2_sysregs_ctx;
+#else
+	/* El1 context should be included only either for IMAGE_BL1,
+	 * or for IMAGE_BL31 when CTX_INCLUDE_EL2_REGS=0:
+	 * When SPMD_SPM_AT_SEL2=1, SPMC at S-EL2 takes care of saving
+	 * and restoring EL1 registers. In this case, BL31 at EL3 can
+	 * exclude save and restore of EL1 context registers.
+	 */
+	el1_sysregs_t el1_sysregs_ctx;
 #endif
 
 } cpu_context_t;
@@ -272,10 +286,13 @@
 
 /* Macros to access members of the 'cpu_context_t' structure */
 #define get_el3state_ctx(h)	(&((cpu_context_t *) h)->el3state_ctx)
+
+#if (CTX_INCLUDE_EL2_REGS && IMAGE_BL31)
+#define get_el2_sysregs_ctx(h)	(&((cpu_context_t *) h)->el2_sysregs_ctx)
+#else
 #define get_el1_sysregs_ctx(h)	(&((cpu_context_t *) h)->el1_sysregs_ctx)
-#if CTX_INCLUDE_EL2_REGS
-# define get_el2_sysregs_ctx(h)	(&((cpu_context_t *) h)->el2_sysregs_ctx)
 #endif
+
 #define get_gpregs_ctx(h)	(&((cpu_context_t *) h)->gpregs_ctx)
 #define get_cve_2018_3639_ctx(h)	(&((cpu_context_t *) h)->cve_2018_3639_ctx)
 
@@ -356,6 +373,27 @@
 void fpregs_context_restore(simd_regs_t *regs);
 #endif
 
+/*******************************************************************************
+ * The next four inline functions are required for IMAGE_BL1, as well as for
+ * IMAGE_BL31 for the below combinations.
+ * ============================================================================
+ * | ERRATA_SPECULATIVE_AT| CTX_INCLUDE_EL2_REGS |   Combination              |
+ * ============================================================================
+ * |       0              |       0              |   Valid (EL1 ctx)          |
+ * |______________________|______________________|____________________________|
+ * |                      |                      | Invalid (No Errata/EL1 Ctx)|
+ * |       0              |       1              | Hence commented out.       |
+ * |______________________|______________________|____________________________|
+ * |                      |                      |                            |
+ * |       1              |       0              |   Valid (Errata ctx)       |
+ * |______________________|______________________|____________________________|
+ * |                      |                      |                            |
+ * |       1              |       1              |   Valid (Errata ctx)       |
+ * |______________________|______________________|____________________________|
+ * ============================================================================
+ ******************************************************************************/
+#if (IMAGE_BL1 || ((ERRATA_SPECULATIVE_AT) || (!CTX_INCLUDE_EL2_REGS)))
+
 static inline void write_ctx_sctlr_el1_reg_errata(cpu_context_t *ctx, u_register_t val)
 {
 #if (ERRATA_SPECULATIVE_AT)
@@ -396,6 +434,8 @@
 #endif /* ERRATA_SPECULATIVE_AT */
 }
 
+#endif /* (IMAGE_BL1 || ((ERRATA_SPECULATIVE_AT) || (!CTX_INCLUDE_EL2_REGS))) */
+
 #endif /* __ASSEMBLER__ */
 
 #endif /* CONTEXT_H */
diff --git a/include/lib/el3_runtime/context_el2.h b/include/lib/el3_runtime/context_el2.h
index ca1ea4e..14c1fb6 100644
--- a/include/lib/el3_runtime/context_el2.h
+++ b/include/lib/el3_runtime/context_el2.h
@@ -13,7 +13,6 @@
  * AArch64 EL2 system register context structure for preserving the
  * architectural state during world switches.
  ******************************************************************************/
-#if CTX_INCLUDE_EL2_REGS
 typedef struct el2_common_regs {
 	uint64_t actlr_el2;
 	uint64_t afsr0_el2;
@@ -359,7 +358,6 @@
 #define write_el2_ctx_mpam(ctx, reg, val)
 #endif /* CTX_INCLUDE_MPAM_REGS */
 
-#endif /* CTX_INCLUDE_EL2_REGS */
 /******************************************************************************/
 
 #endif /* __ASSEMBLER__ */
diff --git a/include/lib/el3_runtime/context_mgmt.h b/include/lib/el3_runtime/context_mgmt.h
index b7b73e6..70dbd46 100644
--- a/include/lib/el3_runtime/context_mgmt.h
+++ b/include/lib/el3_runtime/context_mgmt.h
@@ -47,13 +47,14 @@
 void cm_handle_asymmetric_features(void);
 #endif
 
-#if CTX_INCLUDE_EL2_REGS
+#if (CTX_INCLUDE_EL2_REGS && IMAGE_BL31)
 void cm_el2_sysregs_context_save(uint32_t security_state);
 void cm_el2_sysregs_context_restore(uint32_t security_state);
-#endif
-
+#else
 void cm_el1_sysregs_context_save(uint32_t security_state);
 void cm_el1_sysregs_context_restore(uint32_t security_state);
+#endif /* (CTX_INCLUDE_EL2_REGS && IMAGE_BL31) */
+
 void cm_set_elr_el3(uint32_t security_state, uintptr_t entrypoint);
 void cm_set_elr_spsr_el3(uint32_t security_state,
 			uintptr_t entrypoint, uint32_t spsr);
diff --git a/lib/el3_runtime/aarch64/context_debug.c b/lib/el3_runtime/aarch64/context_debug.c
index 9ffa297..0a14e1f 100644
--- a/lib/el3_runtime/aarch64/context_debug.c
+++ b/lib/el3_runtime/aarch64/context_debug.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2023-2024, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -28,19 +28,11 @@
 	return state_names[security_state_idx];
 }
 
-#if CTX_INCLUDE_EL2_REGS
 #define PRINT_MEM_USAGE_SEPARATOR()					\
 	do {								\
-		printf("+-----------+-----------+-----------"		\
-			"+-----------+-----------+-----------+\n");	\
-	} while (false)
-#else
-#define PRINT_MEM_USAGE_SEPARATOR()					\
-	do {								\
 		printf("+-----------+-----------"			\
 		"+-----------+-----------+-----------+\n");		\
 	} while (false)
-#endif /* CTX_INCLUDE_EL2_REGS */
 
 #define NAME_PLACEHOLDER_LEN 14
 
@@ -60,8 +52,9 @@
 	size_t el3_total = 0U;
 #if CTX_INCLUDE_EL2_REGS
 	size_t el2_total = 0U;
-#endif /* CTX_INCLUDE_EL2_REGS */
+#else
 	size_t el1_total = 0U;
+#endif /* CTX_INCLUDE_EL2_REGS */
 	size_t other_total = 0U;
 	size_t total = 0U;
 	size_t per_world_ctx_size = 0U;
@@ -70,8 +63,10 @@
 	printf("|    Core   |    EL3    ");
 #if CTX_INCLUDE_EL2_REGS
 	printf("|    EL2    ");
+#else
+	printf("|    EL1    ");
 #endif /* CTX_INCLUDE_EL2_REGS */
-	printf("|    EL1    |   Other   |   Total   |\n");
+	printf("|   Other   |   Total   |\n");
 
 	/* Compute memory usage for each core's context */
 	for (unsigned int i = 0U; i < PLATFORM_CORE_COUNT; i++) {
@@ -79,8 +74,9 @@
 		size_t el3_size = 0U;
 #if CTX_INCLUDE_EL2_REGS
 		size_t el2_size = 0U;
-#endif /* CTX_INCLUDE_EL2_REGS */
+#else
 		size_t el1_size = 0U;
+#endif /* CTX_INCLUDE_EL2_REGS */
 
 		PRINT_MEM_USAGE_SEPARATOR();
 		cpu_context_t *ctx = (cpu_context_t *)cm_get_context_by_index(i,
@@ -89,22 +85,26 @@
 		el3_size = sizeof(ctx->el3state_ctx);
 #if CTX_INCLUDE_EL2_REGS
 		el2_size = sizeof(ctx->el2_sysregs_ctx);
-#endif /* CTX_INCLUDE_EL2_REGS */
+#else
 		el1_size = sizeof(ctx->el1_sysregs_ctx);
-
-		size_other = core_total - el3_size - el1_size;
+#endif /* CTX_INCLUDE_EL2_REGS */
+		size_other = core_total - el3_size;
 		printf("| %9u | %8luB ", i, el3_size);
 #if CTX_INCLUDE_EL2_REGS
 		size_other -= el2_size;
 		printf("| %8luB ", el2_size);
+#else
+		size_other -= el1_size;
+		printf("| %8luB ", el1_size);
 #endif /* CTX_INCLUDE_EL2_REGS */
-		printf("| %8luB | %8luB | %8luB |\n", el1_size, size_other, core_total);
+		printf("| %8luB | %8luB |\n", size_other, core_total);
 
 		el3_total += el3_size;
 #if CTX_INCLUDE_EL2_REGS
 		el2_total += el2_size;
-#endif /* CTX_INCLUDE_EL2_REGS */
+#else
 		el1_total += el1_size;
+#endif /* CTX_INCLUDE_EL2_REGS */
 		other_total += size_other;
 		total += core_total;
 	}
@@ -113,8 +113,10 @@
 	printf("|    All    | %8luB ", el3_total);
 #if CTX_INCLUDE_EL2_REGS
 	printf("| %8luB ", el2_total);
+#else
+	printf("| %8luB ", el1_total);
 #endif /* CTX_INCLUDE_EL2_REGS */
-	printf("| %8luB | %8luB | %8luB |\n", el1_total, other_total, total);
+	printf("| %8luB | %8luB |\n", other_total, total);
 	PRINT_MEM_USAGE_SEPARATOR();
 	printf("\n");
 
@@ -146,18 +148,10 @@
 
 		printf("Memory usage for %s:\n", context_name);
 		total += report_allocated_memory(i);
-			printf("------------------------"
-#if CTX_INCLUDE_EL2_REGS
-				"------"
-#endif /* CTX_INCLUDE_EL2_REGS */
-			      );
+			printf("------------------------");
 			len = NAME_PLACEHOLDER_LEN - printf("End %s", context_name);
 			PRINT_DASH(len);
-			printf(
-#if CTX_INCLUDE_EL2_REGS
-				"------"
-#endif /* CTX_INCLUDE_EL2_REGS */
-				"-----------------------\n\n");
+			printf("-----------------------\n\n");
 	}
 
 	printf("Total context memory allocated: %luB\n\n", total);
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index cde86d6..6f3b51a 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -51,6 +51,7 @@
 static void manage_extensions_secure(cpu_context_t *ctx);
 static void manage_extensions_secure_per_world(void);
 
+#if ((IMAGE_BL1) || (IMAGE_BL31 && (!CTX_INCLUDE_EL2_REGS)))
 static void setup_el1_context(cpu_context_t *ctx, const struct entry_point_info *ep)
 {
 	u_register_t sctlr_elx, actlr_elx;
@@ -108,6 +109,7 @@
 	actlr_elx = read_actlr_el1();
 	write_el1_ctx_common(get_el1_sysregs_ctx(ctx), actlr_el1, actlr_elx);
 }
+#endif /* (IMAGE_BL1) || (IMAGE_BL31 && (!CTX_INCLUDE_EL2_REGS)) */
 
 /******************************************************************************
  * This function performs initializations that are specific to SECURE state
@@ -140,7 +142,7 @@
 	 * Initialize EL1 context registers unless SPMC is running
 	 * at S-EL2.
 	 */
-#if !SPMD_SPM_AT_SEL2
+#if (!SPMD_SPM_AT_SEL2)
 	setup_el1_context(ctx, ep);
 #endif
 
@@ -156,7 +158,6 @@
 	if (!has_secure_perworld_init) {
 		manage_extensions_secure_per_world();
 	}
-
 }
 
 #if ENABLE_RME
@@ -260,11 +261,8 @@
 #endif
 	write_ctx_reg(state, CTX_SCR_EL3, scr_el3);
 
-	/* Initialize EL1 context registers */
-	setup_el1_context(ctx, ep);
-
 	/* Initialize EL2 context registers */
-#if CTX_INCLUDE_EL2_REGS
+#if (CTX_INCLUDE_EL2_REGS && IMAGE_BL31)
 
 	/*
 	 * Initialize SCTLR_EL2 context register with reset value.
@@ -297,8 +295,10 @@
 		write_el2_ctx_fgt(get_el2_sysregs_ctx(ctx), hfgwtr_el2,
 			HFGWTR_EL2_INIT_VAL);
 	}
-
-#endif /* CTX_INCLUDE_EL2_REGS */
+#else
+	/* Initialize EL1 context registers */
+	setup_el1_context(ctx, ep);
+#endif /* (CTX_INCLUDE_EL2_REGS && IMAGE_BL31) */
 
 	manage_extensions_nonsecure(ctx);
 }
@@ -329,7 +329,7 @@
 	 * to boot correctly. However, there are very few registers where this
 	 * is not true and some values need to be recreated.
 	 */
-#if CTX_INCLUDE_EL2_REGS
+#if (CTX_INCLUDE_EL2_REGS && IMAGE_BL31)
 	el2_sysregs_t *el2_ctx = get_el2_sysregs_ctx(ctx);
 
 	/*
@@ -345,7 +345,7 @@
 	 * and it may contain access control bits (e.g. CLUSTERPMUEN bit).
 	 */
 	write_el2_ctx_common(el2_ctx, actlr_el2, read_actlr_el2());
-#endif /* CTX_INCLUDE_EL2_REGS */
+#endif /* (CTX_INCLUDE_EL2_REGS && IMAGE_BL31) */
 
 	/* Start with a clean SCR_EL3 copy as all relevant values are set */
 	scr_el3 = SCR_RESET_VAL;
@@ -1087,11 +1087,14 @@
 			}
 		}
 	}
+#if (!CTX_INCLUDE_EL2_REGS)
+	/* Restore EL1 system registers, only when CTX_INCLUDE_EL2_REGS=0 */
 	cm_el1_sysregs_context_restore(security_state);
+#endif
 	cm_set_next_eret_context(security_state);
 }
 
-#if CTX_INCLUDE_EL2_REGS
+#if (CTX_INCLUDE_EL2_REGS && IMAGE_BL31)
 
 static void el2_sysregs_context_save_fgt(el2_sysregs_t *ctx)
 {
@@ -1519,7 +1522,7 @@
 		write_gcspr_el2(read_el2_ctx_gcs(el2_sysregs_ctx, gcspr_el2));
 	}
 }
-#endif /* CTX_INCLUDE_EL2_REGS */
+#endif /* (CTX_INCLUDE_EL2_REGS && IMAGE_BL31) */
 
 #if IMAGE_BL31
 /*********************************************************************************
@@ -1580,7 +1583,7 @@
 	cm_handle_asymmetric_features();
 #endif
 
-#if CTX_INCLUDE_EL2_REGS
+#if (CTX_INCLUDE_EL2_REGS && IMAGE_BL31)
 #if ENABLE_ASSERTIONS
 	cpu_context_t *ctx = cm_get_context(NON_SECURE);
 	assert(ctx != NULL);
@@ -1591,15 +1594,19 @@
 			(el_implemented(2U) != EL_IMPL_NONE));
 #endif /* ENABLE_ASSERTIONS */
 
-	/* Restore EL2 and EL1 sysreg contexts */
+	/* Restore EL2 sysreg contexts */
 	cm_el2_sysregs_context_restore(NON_SECURE);
-	cm_el1_sysregs_context_restore(NON_SECURE);
 	cm_set_next_eret_context(NON_SECURE);
 #else
 	cm_prepare_el3_exit(NON_SECURE);
-#endif /* CTX_INCLUDE_EL2_REGS */
+#endif /* (CTX_INCLUDE_EL2_REGS && IMAGE_BL31) */
 }
 
+#if ((IMAGE_BL1) || (IMAGE_BL31 && (!CTX_INCLUDE_EL2_REGS)))
+/*******************************************************************************
+ * The next set of six functions are used by runtime services to save and restore
+ * EL1 context on the 'cpu_context' structure for the specified security state.
+ ******************************************************************************/
 static void el1_sysregs_context_save(el1_sysregs_t *ctx)
 {
 	write_el1_ctx_common(ctx, spsr_el1, read_spsr_el1());
@@ -1791,9 +1798,8 @@
 }
 
 /*******************************************************************************
- * The next four functions are used by runtime services to save and restore
- * EL1 context on the 'cpu_context' structure for the specified security
- * state.
+ * The next couple of functions are used by runtime services to save and restore
+ * EL1 context on the 'cpu_context' structure for the specified security state.
  ******************************************************************************/
 void cm_el1_sysregs_context_save(uint32_t security_state)
 {
@@ -1829,6 +1835,8 @@
 #endif
 }
 
+#endif /* ((IMAGE_BL1) || (IMAGE_BL31 && (!CTX_INCLUDE_EL2_REGS))) */
+
 /*******************************************************************************
  * This function populates ELR_EL3 member of 'cpu_context' pertaining to the
  * given security state with the given entrypoint
diff --git a/lib/extensions/pmuv3/aarch64/pmuv3.c b/lib/extensions/pmuv3/aarch64/pmuv3.c
index 71aa303..f9e32ca 100644
--- a/lib/extensions/pmuv3/aarch64/pmuv3.c
+++ b/lib/extensions/pmuv3/aarch64/pmuv3.c
@@ -23,13 +23,13 @@
 
 void pmuv3_enable(cpu_context_t *ctx)
 {
-#if CTX_INCLUDE_EL2_REGS
+#if (CTX_INCLUDE_EL2_REGS && IMAGE_BL31)
 	u_register_t mdcr_el2_val;
 
 	mdcr_el2_val = read_el2_ctx_common(get_el2_sysregs_ctx(ctx), mdcr_el2);
 	mdcr_el2_val = init_mdcr_el2_hpmn(mdcr_el2_val);
 	write_el2_ctx_common(get_el2_sysregs_ctx(ctx), mdcr_el2, mdcr_el2_val);
-#endif /* CTX_INCLUDE_EL2_REGS */
+#endif /* (CTX_INCLUDE_EL2_REGS && IMAGE_BL31) */
 }
 
 static u_register_t mtpmu_disable_el3(u_register_t mdcr_el3)
diff --git a/plat/arm/board/neoverse_rd/common/ras/nrd_ras_cpu.c b/plat/arm/board/neoverse_rd/common/ras/nrd_ras_cpu.c
index a888df5..dcee92c 100644
--- a/plat/arm/board/neoverse_rd/common/ras/nrd_ras_cpu.c
+++ b/plat/arm/board/neoverse_rd/common/ras/nrd_ras_cpu.c
@@ -92,7 +92,7 @@
 	cpu_info->ErrCtxEl1Reg[16] = read_el1_ctx_common(get_el1_sysregs_ctx(ctx),
 						  ttbr1_el1);
 
-#if CTX_INCLUDE_EL2_REGS
+#if (CTX_INCLUDE_EL2_REGS && IMAGE_BL31)
 	cpu_info->ErrCtxEl2Reg[0]   = read_el2_ctx_common(get_el2_sysregs_ctx(ctx),
 						elr_el2);
 	cpu_info->ErrCtxEl2Reg[1]   = read_el2_ctx_common(get_el2_sysregs_ctx(ctx),
@@ -125,7 +125,7 @@
 						vttbr_el2);
 	cpu_info->ErrCtxEl2Reg[15]  = read_el2_ctx_common(get_el2_sysregs_ctx(ctx),
 						esr_el2);
-#endif /* CTX_INCLUDE_EL2_REGS */
+#endif /* (CTX_INCLUDE_EL2_REGS && IMAGE_BL31) */
 
 	cpu_info->ErrCtxEl3Reg[0]   = read_ctx_reg(get_el3state_ctx(ctx),
 						   CTX_ELR_EL3);