refactor(cm): move EL3 registers to global context

Currently, EL3 context registers are duplicated per-world per-cpu.
Some registers have the same value across all CPUs, so this patch
moves these registers out into a per-world context to reduce
memory usage.

Change-Id: I91294e3d5f4af21a58c23599af2bdbd2a747c54a
Signed-off-by: Elizabeth Ho <elizabeth.ho@arm.com>
Signed-off-by: Jayanth Dodderi Chidanand <jayanthdodderi.chidanand@arm.com>
diff --git a/bl31/aarch64/runtime_exceptions.S b/bl31/aarch64/runtime_exceptions.S
index 7336b91..8298696 100644
--- a/bl31/aarch64/runtime_exceptions.S
+++ b/bl31/aarch64/runtime_exceptions.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2023, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -401,7 +401,7 @@
 
 #if ENABLE_RME
 	/* Copy SCR_EL3.NSE bit to the flag to indicate caller's security */
-	ubfx	x7, x18, #SCR_NSE_SHIFT, 1
+	ubfx	x7, x18, #SCR_NSE_SHIFT, #1
 
 	/*
 	 * Shift copied SCR_EL3.NSE bit by 5 to create space for
diff --git a/bl31/bl31_main.c b/bl31/bl31_main.c
index cae55f3..925c6a6 100644
--- a/bl31/bl31_main.c
+++ b/bl31/bl31_main.c
@@ -121,6 +121,9 @@
 	/* Init registers that never change for the lifetime of TF-A */
 	cm_manage_extensions_el3();
 
+	/* Init per-world context registers for non-secure world */
+	manage_extensions_nonsecure_per_world();
+
 	NOTICE("BL31: %s\n", version_string);
 	NOTICE("BL31: %s\n", build_message);
 
diff --git a/include/arch/aarch64/el3_common_macros.S b/include/arch/aarch64/el3_common_macros.S
index 536d807..9c9c00f 100644
--- a/include/arch/aarch64/el3_common_macros.S
+++ b/include/arch/aarch64/el3_common_macros.S
@@ -441,4 +441,20 @@
 #endif
 	.endm
 
+/* -----------------------------------------------------------------
+ * The below macro reads SCR_EL3 from the context structure to
+ * determine the security state of the context upon ERET.
+ * ------------------------------------------------------------------
+ */
+	.macro get_security_state _ret:req, _scr_reg:req
+		ubfx 	\_ret, \_scr_reg, #SCR_NSE_SHIFT, #1
+		cmp 	\_ret, #1
+		beq 	realm_state
+		bfi	\_ret, \_scr_reg, #0, #1
+		b 	end
+	realm_state:
+		mov 	\_ret, #2
+	end:
+	.endm
+
 #endif /* EL3_COMMON_MACROS_S */
diff --git a/include/lib/el3_runtime/aarch64/context.h b/include/lib/el3_runtime/aarch64/context.h
index e7e9f58..470d113 100644
--- a/include/lib/el3_runtime/aarch64/context.h
+++ b/include/lib/el3_runtime/aarch64/context.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2022, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -7,6 +7,7 @@
 #ifndef CONTEXT_H
 #define CONTEXT_H
 
+#include <lib/el3_runtime/cpu_data.h>
 #include <lib/utils_def.h>
 
 /*******************************************************************************
@@ -61,10 +62,8 @@
 #define CTX_ELR_EL3		U(0x20)
 #define CTX_PMCR_EL0		U(0x28)
 #define CTX_IS_IN_EL3		U(0x30)
-#define CTX_CPTR_EL3		U(0x38)
-#define CTX_ZCR_EL3		U(0x40)
-#define CTX_MPAM3_EL3		U(0x48)
-#define CTX_EL3STATE_END	U(0x50) /* Align to the next 16 byte boundary */
+#define CTX_MPAM3_EL3		U(0x38)
+#define CTX_EL3STATE_END	U(0x40) /* Align to the next 16 byte boundary */
 
 /*******************************************************************************
  * Constants that allow assembler code to access members of and the
@@ -324,6 +323,13 @@
 #define CTX_PAUTH_REGS_END	U(0)
 #endif /* CTX_INCLUDE_PAUTH_REGS */
 
+/*******************************************************************************
+ * Registers initialised in a per-world context.
+ ******************************************************************************/
+#define CTX_CPTR_EL3		U(0x0)
+#define CTX_ZCR_EL3		U(0x8)
+#define CTX_GLOBAL_EL3STATE_END	U(0x10)
+
 #ifndef __ASSEMBLER__
 
 #include <stdint.h>
@@ -434,6 +440,17 @@
 #endif
 } cpu_context_t;
 
+/*
+ * Per-World Context.
+ * It stores registers whose values can be shared across CPUs.
+ */
+typedef struct per_world_context {
+	uint64_t ctx_cptr_el3;
+	uint64_t ctx_zcr_el3;
+} per_world_context_t;
+
+extern per_world_context_t per_world_context[CPU_DATA_CONTEXT_NUM];
+
 /* Macros to access members of the 'cpu_context_t' structure */
 #define get_el3state_ctx(h)	(&((cpu_context_t *) h)->el3state_ctx)
 #if CTX_INCLUDE_FPREGS
diff --git a/include/lib/el3_runtime/context_mgmt.h b/include/lib/el3_runtime/context_mgmt.h
index aa76f3b..b2bdaf5 100644
--- a/include/lib/el3_runtime/context_mgmt.h
+++ b/include/lib/el3_runtime/context_mgmt.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2022, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -39,6 +39,7 @@
 #ifdef __aarch64__
 #if IMAGE_BL31
 void cm_manage_extensions_el3(void);
+void manage_extensions_nonsecure_per_world(void);
 #endif
 #if CTX_INCLUDE_EL2_REGS
 void cm_el2_sysregs_context_save(uint32_t security_state);
@@ -88,6 +89,7 @@
 void *cm_get_next_context(void);
 void cm_set_next_context(void *context);
 static inline void cm_manage_extensions_el3(void) {}
+static inline void manage_extensions_nonsecure_per_world(void) {}
 #endif /* __aarch64__ */
 
 #endif /* CONTEXT_MGMT_H */
diff --git a/include/lib/extensions/amu.h b/include/lib/extensions/amu.h
index 09d8dee..a396b99 100644
--- a/include/lib/extensions/amu.h
+++ b/include/lib/extensions/amu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -19,9 +19,11 @@
 void amu_enable(cpu_context_t *ctx);
 void amu_init_el3(void);
 void amu_init_el2_unused(void);
+void amu_enable_per_world(per_world_context_t *per_world_ctx);
 #else
 void amu_enable(bool el2_unused);
-#endif
+#endif /* __aarch64__ */
+
 #else
 #if __aarch64__
 void amu_enable(cpu_context_t *ctx)
@@ -33,12 +35,15 @@
 void amu_init_el2_unused(void)
 {
 }
+void amu_enable_per_world(per_world_context_t *per_world_ctx)
+{
+}
 #else
 static inline void amu_enable(bool el2_unused)
 {
 }
-#endif
-#endif
+#endif /*__aarch64__ */
+#endif /* ENABLE_FEAT_AMU */
 
 #if ENABLE_AMU_AUXILIARY_COUNTERS
 /*
diff --git a/include/lib/extensions/sme.h b/include/lib/extensions/sme.h
index dbefdfc..bd7948e 100644
--- a/include/lib/extensions/sme.h
+++ b/include/lib/extensions/sme.h
@@ -21,23 +21,31 @@
 #define SME_SMCR_LEN_MAX	U(0x1FF)
 
 #if ENABLE_SME_FOR_NS
-void sme_enable(cpu_context_t *context);
 void sme_init_el3(void);
 void sme_init_el2_unused(void);
+void sme_enable(cpu_context_t *context);
 void sme_disable(cpu_context_t *context);
+void sme_enable_per_world(per_world_context_t *per_world_ctx);
+void sme_disable_per_world(per_world_context_t *per_world_ctx);
 #else
-static inline void sme_enable(cpu_context_t *context)
-{
-}
 static inline void sme_init_el3(void)
 {
 }
 static inline void sme_init_el2_unused(void)
 {
 }
+static inline void sme_enable(cpu_context_t *context)
+{
+}
 static inline void sme_disable(cpu_context_t *context)
 {
 }
+static inline void sme_enable_per_world(per_world_context_t *per_world_ctx)
+{
+}
+static inline void sme_disable_per_world(per_world_context_t *per_world_ctx)
+{
+}
 #endif /* ENABLE_SME_FOR_NS */
 
 #endif /* SME_H */
diff --git a/include/lib/extensions/sve.h b/include/lib/extensions/sve.h
index fc76a16..947c905 100644
--- a/include/lib/extensions/sve.h
+++ b/include/lib/extensions/sve.h
@@ -10,17 +10,17 @@
 #include <context.h>
 
 #if (ENABLE_SME_FOR_NS || ENABLE_SVE_FOR_NS)
-void sve_enable(cpu_context_t *context);
 void sve_init_el2_unused(void);
-void sve_disable(cpu_context_t *context);
+void sve_enable_per_world(per_world_context_t *per_world_ctx);
+void sve_disable_per_world(per_world_context_t *per_world_ctx);
 #else
-static inline void sve_enable(cpu_context_t *context)
+static inline void sve_init_el2_unused(void)
 {
 }
-static inline void sve_init_el2_unused(void)
+static inline void sve_enable_per_world(per_world_context_t *per_world_ctx)
 {
 }
-static inline void sve_disable(cpu_context_t *context)
+static inline void sve_disable_per_world(per_world_context_t *per_world_ctx)
 {
 }
 #endif /* ( ENABLE_SME_FOR_NS | ENABLE_SVE_FOR_NS ) */
diff --git a/include/lib/extensions/sys_reg_trace.h b/include/lib/extensions/sys_reg_trace.h
index beda88a..7004267 100644
--- a/include/lib/extensions/sys_reg_trace.h
+++ b/include/lib/extensions/sys_reg_trace.h
@@ -12,8 +12,8 @@
 #if ENABLE_SYS_REG_TRACE_FOR_NS
 
 #if __aarch64__
-void sys_reg_trace_enable(cpu_context_t *context);
-void sys_reg_trace_disable(cpu_context_t *context);
+void sys_reg_trace_enable_per_world(per_world_context_t *per_world_ctx);
+void sys_reg_trace_disable_per_world(per_world_context_t *per_world_ctx);
 void sys_reg_trace_init_el2_unused(void);
 #else
 void sys_reg_trace_init_el3(void);
@@ -22,10 +22,10 @@
 #else /* !ENABLE_SYS_REG_TRACE_FOR_NS */
 
 #if __aarch64__
-static inline void sys_reg_trace_enable(cpu_context_t *context)
+static inline void sys_reg_trace_enable_per_world(per_world_context_t *per_world_ctx)
 {
 }
-static inline void sys_reg_trace_disable(cpu_context_t *context)
+static inline void sys_reg_trace_disable_per_world(per_world_context_t *per_world_ctx)
 {
 }
 static inline void sys_reg_trace_init_el2_unused(void)
diff --git a/lib/el3_runtime/aarch64/context.S b/lib/el3_runtime/aarch64/context.S
index 758355a..41b25d6 100644
--- a/lib/el3_runtime/aarch64/context.S
+++ b/lib/el3_runtime/aarch64/context.S
@@ -570,6 +570,25 @@
 	ret
 endfunc save_and_update_ptw_el1_sys_regs
 
+/* -----------------------------------------------------------------
+* The below macro returns the address of the per_world context for
+* the security state, retrieved through "get_security_state" macro.
+* The per_world context address is returned in the register argument.
+* Clobbers: x9, x10
+* ------------------------------------------------------------------
+*/
+
+.macro get_per_world_context _reg:req
+	ldr 	x10, [sp, #CTX_EL3STATE_OFFSET + CTX_SCR_EL3]
+	get_security_state x9, x10
+	mov_imm	x10, (CTX_GLOBAL_EL3STATE_END - CTX_CPTR_EL3)
+	mul	x9, x9, x10
+	adrp	x10, per_world_context
+	add	x10, x10, :lo12:per_world_context
+	add	x9, x9, x10
+	mov 	\_reg, x9
+.endm
+
 /* ------------------------------------------------------------------
  * This routine assumes that the SP_EL3 is pointing to a valid
  * context structure from where the gp regs and other special
@@ -600,7 +619,11 @@
 	 * Synchronization is required before zcr_el3 is addressed.
 	 * ----------------------------------------------------------
 	 */
-	ldp	x19, x20, [sp, #CTX_EL3STATE_OFFSET + CTX_CPTR_EL3]
+
+	/* The address of the per_world context is stored in x9 */
+	get_per_world_context x9
+
+	ldp	x19, x20, [x9, #CTX_CPTR_EL3]
 	msr	cptr_el3, x19
 
 #if IMAGE_BL31
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index 98cee16..6231898 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -20,6 +20,7 @@
 #include <context.h>
 #include <drivers/arm/gicv3.h>
 #include <lib/el3_runtime/context_mgmt.h>
+#include <lib/el3_runtime/cpu_data.h>
 #include <lib/el3_runtime/pubsub_events.h>
 #include <lib/extensions/amu.h>
 #include <lib/extensions/brbe.h>
@@ -38,8 +39,12 @@
 CASSERT(((TWED_DELAY & ~SCR_TWEDEL_MASK) == 0U), assert_twed_delay_value_check);
 #endif /* ENABLE_FEAT_TWED */
 
+per_world_context_t per_world_context[CPU_DATA_CONTEXT_NUM];
+static bool has_secure_perworld_init;
+
 static void manage_extensions_nonsecure(cpu_context_t *ctx);
 static void manage_extensions_secure(cpu_context_t *ctx);
+static void manage_extensions_secure_per_world(void);
 
 static void setup_el1_context(cpu_context_t *ctx, const struct entry_point_info *ep)
 {
@@ -146,6 +151,18 @@
 #endif
 
 	manage_extensions_secure(ctx);
+
+	/**
+	 * manage_extensions_secure_per_world api has to be executed once,
+	 * as the registers getting initialised, maintain constant value across
+	 * all the cpus for the secure world.
+	 * Henceforth, this check ensures that the registers are initialised once
+	 * and avoids re-initialization from multiple cores.
+	 */
+	if (!has_secure_perworld_init) {
+		manage_extensions_secure_per_world();
+	}
+
 }
 
 #if ENABLE_RME
@@ -301,7 +318,6 @@
  ******************************************************************************/
 static void setup_context_common(cpu_context_t *ctx, const entry_point_info_t *ep)
 {
-	u_register_t cptr_el3;
 	u_register_t scr_el3;
 	el3_state_t *state;
 	gp_regs_t *gp_regs;
@@ -425,21 +441,6 @@
 	}
 
 	/*
-	 * Initialise CPTR_EL3, setting all fields rather than relying on hw.
-	 * All fields are architecturally UNKNOWN on reset.
-	 *
-	 * CPTR_EL3.TFP: Set to zero so that accesses to the V- or Z- registers
-	 *  by Advanced SIMD, floating-point or SVE instructions (if
-	 *  implemented) do not trap to EL3.
-	 *
-	 * CPTR_EL3.TCPAC: Set to zero so that accesses to CPACR_EL1,
-	 *  CPTR_EL2,CPACR, or HCPTR do not trap to EL3.
-	 */
-	cptr_el3 = CPTR_EL3_RESET_VAL & ~(TFP_BIT | TCPAC_BIT);
-
-	write_ctx_reg(state, CTX_CPTR_EL3, cptr_el3);
-
-	/*
 	 * SCR_EL3.HCE: Enable HVC instructions if next execution state is
 	 * AArch64 and next EL is EL2, or if next execution state is AArch32 and
 	 * next mode is Hyp.
@@ -600,28 +601,95 @@
 #endif /* IMAGE_BL31 */
 
 /*******************************************************************************
- * Enable architecture extensions on first entry to Non-secure world.
+ * Initialise per_world_context for Non-Secure world.
+ * This function enables the architecture extensions, which have same value
+ * across the cores for the non-secure world.
  ******************************************************************************/
-static void manage_extensions_nonsecure(cpu_context_t *ctx)
-{
 #if IMAGE_BL31
-	if (is_feat_amu_supported()) {
-		amu_enable(ctx);
+void manage_extensions_nonsecure_per_world(void)
+{
+	if (is_feat_sme_supported()) {
+		sme_enable_per_world(&per_world_context[CPU_CONTEXT_NS]);
 	}
 
-	/* Enable SVE and FPU/SIMD */
 	if (is_feat_sve_supported()) {
-		sve_enable(ctx);
+		sve_enable_per_world(&per_world_context[CPU_CONTEXT_NS]);
+	}
+
+	if (is_feat_amu_supported()) {
+		amu_enable_per_world(&per_world_context[CPU_CONTEXT_NS]);
+	}
+
+	if (is_feat_sys_reg_trace_supported()) {
+		sys_reg_trace_enable_per_world(&per_world_context[CPU_CONTEXT_NS]);
 	}
+}
+#endif /* IMAGE_BL31 */
 
+/*******************************************************************************
+ * Initialise per_world_context for Secure world.
+ * This function enables the architecture extensions, which have same value
+ * across the cores for the secure world.
+ ******************************************************************************/
+
+static void manage_extensions_secure_per_world(void)
+{
+#if IMAGE_BL31
 	if (is_feat_sme_supported()) {
-		sme_enable(ctx);
+
+		if (ENABLE_SME_FOR_SWD) {
+		/*
+		 * Enable SME, SVE, FPU/SIMD in secure context, SPM must ensure
+		 * SME, SVE, and FPU/SIMD context properly managed.
+		 */
+			sme_enable_per_world(&per_world_context[CPU_CONTEXT_SECURE]);
+		} else {
+		/*
+		 * Disable SME, SVE, FPU/SIMD in secure context so non-secure
+		 * world can safely use the associated registers.
+		 */
+			sme_disable_per_world(&per_world_context[CPU_CONTEXT_SECURE]);
+		}
+	}
+	if (is_feat_sve_supported()) {
+		if (ENABLE_SVE_FOR_SWD) {
+		/*
+		 * Enable SVE and FPU in secure context, SPM must ensure
+		 * that the SVE and FPU register contexts are properly managed.
+		 */
+			sve_enable_per_world(&per_world_context[CPU_CONTEXT_SECURE]);
+		} else {
+		/*
+		 * Disable SVE and FPU in secure context so non-secure world
+		 * can safely use them.
+		 */
+			sve_disable_per_world(&per_world_context[CPU_CONTEXT_SECURE]);
+		}
 	}
 
+	/* NS can access this but Secure shouldn't */
 	if (is_feat_sys_reg_trace_supported()) {
-		sys_reg_trace_enable(ctx);
+		sys_reg_trace_disable_per_world(&per_world_context[CPU_CONTEXT_SECURE]);
 	}
 
+	has_secure_perworld_init = true;
+#endif /* IMAGE_BL31 */
+}
+
+/*******************************************************************************
+ * Enable architecture extensions on first entry to Non-secure world.
+ ******************************************************************************/
+static void manage_extensions_nonsecure(cpu_context_t *ctx)
+{
+#if IMAGE_BL31
+	if (is_feat_amu_supported()) {
+		amu_enable(ctx);
+	}
+
+	if (is_feat_sme_supported()) {
+		sme_enable(ctx);
+	}
+
 	if (is_feat_mpam_supported()) {
 		mpam_enable(ctx);
 	}
@@ -696,23 +764,6 @@
 static void manage_extensions_secure(cpu_context_t *ctx)
 {
 #if IMAGE_BL31
-	if (is_feat_sve_supported()) {
-		if (ENABLE_SVE_FOR_SWD) {
-		/*
-		 * Enable SVE and FPU in secure context, secure manager must
-		 * ensure that the SVE and FPU register contexts are properly
-		 * managed.
-		 */
-			sve_enable(ctx);
-		} else {
-		/*
-		 * Disable SVE and FPU in secure context so non-secure world
-		 * can safely use them.
-		 */
-			sve_disable(ctx);
-		}
-	}
-
 	if (is_feat_sme_supported()) {
 		if (ENABLE_SME_FOR_SWD) {
 		/*
@@ -729,11 +780,6 @@
 			sme_disable(ctx);
 		}
 	}
-
-	/* NS can access this but Secure shouldn't */
-	if (is_feat_sys_reg_trace_supported()) {
-		sys_reg_trace_disable(ctx);
-	}
 #endif /* IMAGE_BL31 */
 }
 
diff --git a/lib/extensions/amu/aarch64/amu.c b/lib/extensions/amu/aarch64/amu.c
index 53bdb55..cb9a0f2 100644
--- a/lib/extensions/amu/aarch64/amu.c
+++ b/lib/extensions/amu/aarch64/amu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -180,15 +180,6 @@
  */
 void amu_enable(cpu_context_t *ctx)
 {
-	/*
-	 * Set CPTR_EL3.TAM to zero so that any accesses to the Activity Monitor
-	 * registers do not trap to EL3.
-	 */
-	u_register_t cptr_el3 = read_ctx_reg(get_el3state_ctx(ctx), CTX_CPTR_EL3);
-
-	cptr_el3 &= ~TAM_BIT;
-	write_ctx_reg(get_el3state_ctx(ctx), CTX_CPTR_EL3, cptr_el3);
-
 	/* Initialize FEAT_AMUv1p1 features if present. */
 	if (is_feat_amuv1p1_supported()) {
 		/*
@@ -199,6 +190,18 @@
 	}
 }
 
+void amu_enable_per_world(per_world_context_t *per_world_ctx)
+{
+	/*
+	 * Set CPTR_EL3.TAM to zero so that any accesses to the Activity Monitor
+	 * registers do not trap to EL3.
+	 */
+	uint64_t cptr_el3 = per_world_ctx->ctx_cptr_el3;
+
+	cptr_el3 &= ~TAM_BIT;
+	per_world_ctx->ctx_cptr_el3 = cptr_el3;
+}
+
 void amu_init_el3(void)
 {
 	uint64_t group0_impl_ctr = read_amcgcr_el0_cg0nc();
diff --git a/lib/extensions/sme/sme.c b/lib/extensions/sme/sme.c
index d705b64..b1409b9 100644
--- a/lib/extensions/sme/sme.c
+++ b/lib/extensions/sme/sme.c
@@ -22,17 +22,22 @@
 	/* Get the context state. */
 	state = get_el3state_ctx(context);
 
-	/* Enable SME in CPTR_EL3. */
-	reg = read_ctx_reg(state, CTX_CPTR_EL3);
-	reg |= ESM_BIT;
-	write_ctx_reg(state, CTX_CPTR_EL3, reg);
-
 	/* Set the ENTP2 bit in SCR_EL3 to enable access to TPIDR2_EL0. */
 	reg = read_ctx_reg(state, CTX_SCR_EL3);
 	reg |= SCR_ENTP2_BIT;
 	write_ctx_reg(state, CTX_SCR_EL3, reg);
 }
 
+void sme_enable_per_world(per_world_context_t *per_world_ctx)
+{
+	u_register_t reg;
+
+	/* Enable SME in CPTR_EL3. */
+	reg = per_world_ctx->ctx_cptr_el3;
+	reg |= ESM_BIT;
+	per_world_ctx->ctx_cptr_el3 = reg;
+}
+
 void sme_init_el3(void)
 {
 	u_register_t cptr_el3 = read_cptr_el3();
@@ -43,7 +48,7 @@
 	isb();
 
 	/*
-	 * Set the max LEN value and FA64 bit. This register is set up globally
+	 * Set the max LEN value and FA64 bit. This register is set up per_world
 	 * to be the least restrictive, then lower ELs can restrict as needed
 	 * using SMCR_EL2 and SMCR_EL1.
 	 */
@@ -87,15 +92,20 @@
 	/* Get the context state. */
 	state = get_el3state_ctx(context);
 
-	/* Disable SME, SVE, and FPU since they all share registers. */
-	reg = read_ctx_reg(state, CTX_CPTR_EL3);
-	reg &= ~ESM_BIT;	/* Trap SME */
-	reg &= ~CPTR_EZ_BIT;	/* Trap SVE */
-	reg |= TFP_BIT;		/* Trap FPU/SIMD */
-	write_ctx_reg(state, CTX_CPTR_EL3, reg);
-
 	/* Disable access to TPIDR2_EL0. */
 	reg = read_ctx_reg(state, CTX_SCR_EL3);
 	reg &= ~SCR_ENTP2_BIT;
 	write_ctx_reg(state, CTX_SCR_EL3, reg);
 }
+
+void sme_disable_per_world(per_world_context_t *per_world_ctx)
+{
+	u_register_t reg;
+
+	/* Disable SME, SVE, and FPU since they all share registers. */
+	reg = per_world_ctx->ctx_cptr_el3;
+	reg &= ~ESM_BIT;	/* Trap SME */
+	reg &= ~CPTR_EZ_BIT;	/* Trap SVE */
+	reg |= TFP_BIT;		/* Trap FPU/SIMD */
+	per_world_ctx->ctx_cptr_el3 = reg;
+}
diff --git a/lib/extensions/sve/sve.c b/lib/extensions/sve/sve.c
index eb4ac8d..143717e 100644
--- a/lib/extensions/sve/sve.c
+++ b/lib/extensions/sve/sve.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -22,19 +22,17 @@
  */
 #define CONVERT_SVE_LENGTH(x)	(((x / 128) - 1))
 
-void sve_enable(cpu_context_t *context)
+void sve_enable_per_world(per_world_context_t *per_world_ctx)
 {
 	u_register_t cptr_el3;
 
-	cptr_el3 = read_ctx_reg(get_el3state_ctx(context), CTX_CPTR_EL3);
-
 	/* Enable access to SVE functionality for all ELs. */
+	cptr_el3 = per_world_ctx->ctx_cptr_el3;
 	cptr_el3 = (cptr_el3 | CPTR_EZ_BIT) & ~(TFP_BIT);
-	write_ctx_reg(get_el3state_ctx(context), CTX_CPTR_EL3, cptr_el3);
+	per_world_ctx->ctx_cptr_el3 = cptr_el3;
 
 	/* Restrict maximum SVE vector length (SVE_VECTOR_LEN+1) * 128. */
-	write_ctx_reg(get_el3state_ctx(context), CTX_ZCR_EL3,
-		(ZCR_EL3_LEN_MASK & CONVERT_SVE_LENGTH(SVE_VECTOR_LEN)));
+	per_world_ctx->ctx_zcr_el3 = (ZCR_EL3_LEN_MASK & CONVERT_SVE_LENGTH(SVE_VECTOR_LEN));
 }
 
 void sve_init_el2_unused(void)
@@ -47,17 +45,13 @@
 	write_cptr_el2(read_cptr_el2() & ~CPTR_EL2_TFP_BIT);
 }
 
-void sve_disable(cpu_context_t *context)
+void sve_disable_per_world(per_world_context_t *per_world_ctx)
 {
 	u_register_t reg;
-	el3_state_t *state;
-
-	/* Get the context state. */
-	state = get_el3state_ctx(context);
 
 	/* Disable SVE and FPU since they share registers. */
-	reg = read_ctx_reg(state, CTX_CPTR_EL3);
+	reg = per_world_ctx->ctx_cptr_el3;
 	reg &= ~CPTR_EZ_BIT;	/* Trap SVE */
 	reg |= TFP_BIT;		/* Trap FPU/SIMD */
-	write_ctx_reg(state, CTX_CPTR_EL3, reg);
+	per_world_ctx->ctx_cptr_el3 = reg;
 }
diff --git a/lib/extensions/sys_reg_trace/aarch64/sys_reg_trace.c b/lib/extensions/sys_reg_trace/aarch64/sys_reg_trace.c
index 1349566..2170763 100644
--- a/lib/extensions/sys_reg_trace/aarch64/sys_reg_trace.c
+++ b/lib/extensions/sys_reg_trace/aarch64/sys_reg_trace.c
@@ -10,29 +10,27 @@
 #include <arch_helpers.h>
 #include <lib/extensions/sys_reg_trace.h>
 
-void sys_reg_trace_enable(cpu_context_t *ctx)
+void sys_reg_trace_enable_per_world(per_world_context_t *per_world_ctx)
 {
 	/*
 	 * CPTR_EL3.TTA: Set to zero so that System register accesses to the
 	 *  trace registers do not trap to EL3.
 	 */
-	uint64_t val = read_ctx_reg(get_el3state_ctx(ctx), CTX_CPTR_EL3);
-
+	uint64_t val = per_world_ctx->ctx_cptr_el3;
 	val &= ~(TTA_BIT);
-	write_ctx_reg(get_el3state_ctx(ctx), CTX_CPTR_EL3, val);
+	per_world_ctx->ctx_cptr_el3 = val;
 }
 
-void sys_reg_trace_disable(cpu_context_t *ctx)
+void sys_reg_trace_disable_per_world(per_world_context_t *per_world_ctx)
 {
 	/*
 	 * CPTR_EL3.TTA: Set to one so that System register accesses to the
 	 *  trace registers trap to EL3, unless it is trapped by CPACR.TRCDIS,
 	 *  CPACR_EL1.TTA, or CPTR_EL2.TTA
 	 */
-	uint64_t val = read_ctx_reg(get_el3state_ctx(ctx), CTX_CPTR_EL3);
-
+	uint64_t val = per_world_ctx->ctx_cptr_el3;
 	val |= TTA_BIT;
-	write_ctx_reg(get_el3state_ctx(ctx), CTX_CPTR_EL3, val);
+	per_world_ctx->ctx_cptr_el3 = val;
 }
 
 void sys_reg_trace_init_el2_unused(void)
diff --git a/services/std_svc/rmmd/rmmd_main.c b/services/std_svc/rmmd/rmmd_main.c
index fa24a91..d6d25d8 100644
--- a/services/std_svc/rmmd/rmmd_main.c
+++ b/services/std_svc/rmmd/rmmd_main.c
@@ -17,6 +17,7 @@
 #include <common/runtime_svc.h>
 #include <context.h>
 #include <lib/el3_runtime/context_mgmt.h>
+#include <lib/el3_runtime/cpu_data.h>
 #include <lib/el3_runtime/pubsub.h>
 #include <lib/extensions/pmuv3.h>
 #include <lib/extensions/sys_reg_trace.h>
@@ -118,34 +119,41 @@
 /*******************************************************************************
  * Enable architecture extensions on first entry to Realm world.
  ******************************************************************************/
+
 static void manage_extensions_realm(cpu_context_t *ctx)
 {
+	pmuv3_enable(ctx);
+
+	/*
+	 * If SME/SME2 is supported and enabled for NS world, then enables SME
+	 * for Realm world. RMM will save/restore required registers that are
+	 * shared with SVE/FPU so that Realm can use FPU or SVE.
+	 */
+	if (is_feat_sme_supported()) {
+		/* sme_enable() also enables SME2 if supported by hardware */
+		sme_enable(ctx);
+	}
+}
+
+#if IMAGE_BL31
+static void manage_extensions_realm_per_world(void)
+{
 	if (is_feat_sve_supported()) {
 	/*
 	 * Enable SVE and FPU in realm context when it is enabled for NS.
 	 * Realm manager must ensure that the SVE and FPU register
 	 * contexts are properly managed.
 	 */
-		sve_enable(ctx);
+		sve_enable_per_world(&per_world_context[CPU_CONTEXT_REALM]);
 	}
 
 	/* NS can access this but Realm shouldn't */
 	if (is_feat_sys_reg_trace_supported()) {
-		sys_reg_trace_disable(ctx);
+		sys_reg_trace_disable_per_world(&per_world_context[CPU_CONTEXT_REALM]);
 	}
 
-	pmuv3_enable(ctx);
-
-	/*
-	 * If SME/SME2 is supported and enabled for NS world, then enables SME
-	 * for Realm world. RMM will save/restore required registers that are
-	 * shared with SVE/FPU so that Realm can use FPU or SVE.
-	 */
-	if (is_feat_sme_supported()) {
-		/* sme_enable() also enables SME2 if supported by hardware */
-		sme_enable(ctx);
-	}
 }
+#endif /* IMAGE_BL31 */
 
 /*******************************************************************************
  * Jump to the RMM for the first time.
@@ -160,6 +168,8 @@
 	/* Enable architecture extensions */
 	manage_extensions_realm(&ctx->cpu_ctx);
 
+	manage_extensions_realm_per_world();
+
 	/* Initialize RMM EL2 context. */
 	rmm_el2_context_init(&ctx->cpu_ctx.el2_sysregs_ctx);