refactor(cm): move EL3 registers to global context

Currently, EL3 context registers are duplicated per-world per-cpu.
Some registers have the same value across all CPUs, so this patch
moves these registers out into a per-world context to reduce
memory usage.

Change-Id: I91294e3d5f4af21a58c23599af2bdbd2a747c54a
Signed-off-by: Elizabeth Ho <elizabeth.ho@arm.com>
Signed-off-by: Jayanth Dodderi Chidanand <jayanthdodderi.chidanand@arm.com>
diff --git a/lib/extensions/amu/aarch64/amu.c b/lib/extensions/amu/aarch64/amu.c
index 53bdb55..cb9a0f2 100644
--- a/lib/extensions/amu/aarch64/amu.c
+++ b/lib/extensions/amu/aarch64/amu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -180,15 +180,6 @@
  */
 void amu_enable(cpu_context_t *ctx)
 {
-	/*
-	 * Set CPTR_EL3.TAM to zero so that any accesses to the Activity Monitor
-	 * registers do not trap to EL3.
-	 */
-	u_register_t cptr_el3 = read_ctx_reg(get_el3state_ctx(ctx), CTX_CPTR_EL3);
-
-	cptr_el3 &= ~TAM_BIT;
-	write_ctx_reg(get_el3state_ctx(ctx), CTX_CPTR_EL3, cptr_el3);
-
 	/* Initialize FEAT_AMUv1p1 features if present. */
 	if (is_feat_amuv1p1_supported()) {
 		/*
@@ -199,6 +190,18 @@
 	}
 }
 
+void amu_enable_per_world(per_world_context_t *per_world_ctx)
+{
+	/*
+	 * Set CPTR_EL3.TAM to zero so that any accesses to the Activity Monitor
+	 * registers do not trap to EL3.
+	 */
+	uint64_t cptr_el3 = per_world_ctx->ctx_cptr_el3;
+
+	cptr_el3 &= ~TAM_BIT;
+	per_world_ctx->ctx_cptr_el3 = cptr_el3;
+}
+
 void amu_init_el3(void)
 {
 	uint64_t group0_impl_ctr = read_amcgcr_el0_cg0nc();
diff --git a/lib/extensions/sme/sme.c b/lib/extensions/sme/sme.c
index d705b64..b1409b9 100644
--- a/lib/extensions/sme/sme.c
+++ b/lib/extensions/sme/sme.c
@@ -22,17 +22,22 @@
 	/* Get the context state. */
 	state = get_el3state_ctx(context);
 
-	/* Enable SME in CPTR_EL3. */
-	reg = read_ctx_reg(state, CTX_CPTR_EL3);
-	reg |= ESM_BIT;
-	write_ctx_reg(state, CTX_CPTR_EL3, reg);
-
 	/* Set the ENTP2 bit in SCR_EL3 to enable access to TPIDR2_EL0. */
 	reg = read_ctx_reg(state, CTX_SCR_EL3);
 	reg |= SCR_ENTP2_BIT;
 	write_ctx_reg(state, CTX_SCR_EL3, reg);
 }
 
+void sme_enable_per_world(per_world_context_t *per_world_ctx)
+{
+	u_register_t reg;
+
+	/* Enable SME in CPTR_EL3. */
+	reg = per_world_ctx->ctx_cptr_el3;
+	reg |= ESM_BIT;
+	per_world_ctx->ctx_cptr_el3 = reg;
+}
+
 void sme_init_el3(void)
 {
 	u_register_t cptr_el3 = read_cptr_el3();
@@ -43,7 +48,7 @@
 	isb();
 
 	/*
-	 * Set the max LEN value and FA64 bit. This register is set up globally
+	 * Set the max LEN value and FA64 bit. This register is set up per_world
 	 * to be the least restrictive, then lower ELs can restrict as needed
 	 * using SMCR_EL2 and SMCR_EL1.
 	 */
@@ -87,15 +92,20 @@
 	/* Get the context state. */
 	state = get_el3state_ctx(context);
 
-	/* Disable SME, SVE, and FPU since they all share registers. */
-	reg = read_ctx_reg(state, CTX_CPTR_EL3);
-	reg &= ~ESM_BIT;	/* Trap SME */
-	reg &= ~CPTR_EZ_BIT;	/* Trap SVE */
-	reg |= TFP_BIT;		/* Trap FPU/SIMD */
-	write_ctx_reg(state, CTX_CPTR_EL3, reg);
-
 	/* Disable access to TPIDR2_EL0. */
 	reg = read_ctx_reg(state, CTX_SCR_EL3);
 	reg &= ~SCR_ENTP2_BIT;
 	write_ctx_reg(state, CTX_SCR_EL3, reg);
 }
+
+void sme_disable_per_world(per_world_context_t *per_world_ctx)
+{
+	u_register_t reg;
+
+	/* Disable SME, SVE, and FPU since they all share registers. */
+	reg = per_world_ctx->ctx_cptr_el3;
+	reg &= ~ESM_BIT;	/* Trap SME */
+	reg &= ~CPTR_EZ_BIT;	/* Trap SVE */
+	reg |= TFP_BIT;		/* Trap FPU/SIMD */
+	per_world_ctx->ctx_cptr_el3 = reg;
+}
diff --git a/lib/extensions/sve/sve.c b/lib/extensions/sve/sve.c
index eb4ac8d..143717e 100644
--- a/lib/extensions/sve/sve.c
+++ b/lib/extensions/sve/sve.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -22,19 +22,17 @@
  */
 #define CONVERT_SVE_LENGTH(x)	(((x / 128) - 1))
 
-void sve_enable(cpu_context_t *context)
+void sve_enable_per_world(per_world_context_t *per_world_ctx)
 {
 	u_register_t cptr_el3;
 
-	cptr_el3 = read_ctx_reg(get_el3state_ctx(context), CTX_CPTR_EL3);
-
 	/* Enable access to SVE functionality for all ELs. */
+	cptr_el3 = per_world_ctx->ctx_cptr_el3;
 	cptr_el3 = (cptr_el3 | CPTR_EZ_BIT) & ~(TFP_BIT);
-	write_ctx_reg(get_el3state_ctx(context), CTX_CPTR_EL3, cptr_el3);
+	per_world_ctx->ctx_cptr_el3 = cptr_el3;
 
 	/* Restrict maximum SVE vector length (SVE_VECTOR_LEN+1) * 128. */
-	write_ctx_reg(get_el3state_ctx(context), CTX_ZCR_EL3,
-		(ZCR_EL3_LEN_MASK & CONVERT_SVE_LENGTH(SVE_VECTOR_LEN)));
+	per_world_ctx->ctx_zcr_el3 = (ZCR_EL3_LEN_MASK & CONVERT_SVE_LENGTH(SVE_VECTOR_LEN));
 }
 
 void sve_init_el2_unused(void)
@@ -47,17 +45,13 @@
 	write_cptr_el2(read_cptr_el2() & ~CPTR_EL2_TFP_BIT);
 }
 
-void sve_disable(cpu_context_t *context)
+void sve_disable_per_world(per_world_context_t *per_world_ctx)
 {
 	u_register_t reg;
-	el3_state_t *state;
-
-	/* Get the context state. */
-	state = get_el3state_ctx(context);
 
 	/* Disable SVE and FPU since they share registers. */
-	reg = read_ctx_reg(state, CTX_CPTR_EL3);
+	reg = per_world_ctx->ctx_cptr_el3;
 	reg &= ~CPTR_EZ_BIT;	/* Trap SVE */
 	reg |= TFP_BIT;		/* Trap FPU/SIMD */
-	write_ctx_reg(state, CTX_CPTR_EL3, reg);
+	per_world_ctx->ctx_cptr_el3 = reg;
 }
diff --git a/lib/extensions/sys_reg_trace/aarch64/sys_reg_trace.c b/lib/extensions/sys_reg_trace/aarch64/sys_reg_trace.c
index 1349566..2170763 100644
--- a/lib/extensions/sys_reg_trace/aarch64/sys_reg_trace.c
+++ b/lib/extensions/sys_reg_trace/aarch64/sys_reg_trace.c
@@ -10,29 +10,27 @@
 #include <arch_helpers.h>
 #include <lib/extensions/sys_reg_trace.h>
 
-void sys_reg_trace_enable(cpu_context_t *ctx)
+void sys_reg_trace_enable_per_world(per_world_context_t *per_world_ctx)
 {
 	/*
 	 * CPTR_EL3.TTA: Set to zero so that System register accesses to the
 	 *  trace registers do not trap to EL3.
 	 */
-	uint64_t val = read_ctx_reg(get_el3state_ctx(ctx), CTX_CPTR_EL3);
-
+	uint64_t val = per_world_ctx->ctx_cptr_el3;
 	val &= ~(TTA_BIT);
-	write_ctx_reg(get_el3state_ctx(ctx), CTX_CPTR_EL3, val);
+	per_world_ctx->ctx_cptr_el3 = val;
 }
 
-void sys_reg_trace_disable(cpu_context_t *ctx)
+void sys_reg_trace_disable_per_world(per_world_context_t *per_world_ctx)
 {
 	/*
 	 * CPTR_EL3.TTA: Set to one so that System register accesses to the
 	 *  trace registers trap to EL3, unless it is trapped by CPACR.TRCDIS,
 	 *  CPACR_EL1.TTA, or CPTR_EL2.TTA
 	 */
-	uint64_t val = read_ctx_reg(get_el3state_ctx(ctx), CTX_CPTR_EL3);
-
+	uint64_t val = per_world_ctx->ctx_cptr_el3;
 	val |= TTA_BIT;
-	write_ctx_reg(get_el3state_ctx(ctx), CTX_CPTR_EL3, val);
+	per_world_ctx->ctx_cptr_el3 = val;
 }
 
 void sys_reg_trace_init_el2_unused(void)