refactor(cm): move EL3 registers to global context

Currently, EL3 context registers are duplicated per-world per-cpu.
Some registers have the same value across all CPUs, so this patch
moves these registers out into a per-world context to reduce
memory usage.

Change-Id: I91294e3d5f4af21a58c23599af2bdbd2a747c54a
Signed-off-by: Elizabeth Ho <elizabeth.ho@arm.com>
Signed-off-by: Jayanth Dodderi Chidanand <jayanthdodderi.chidanand@arm.com>
diff --git a/include/lib/el3_runtime/aarch64/context.h b/include/lib/el3_runtime/aarch64/context.h
index e7e9f58..470d113 100644
--- a/include/lib/el3_runtime/aarch64/context.h
+++ b/include/lib/el3_runtime/aarch64/context.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2022, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -7,6 +7,7 @@
 #ifndef CONTEXT_H
 #define CONTEXT_H
 
+#include <lib/el3_runtime/cpu_data.h>
 #include <lib/utils_def.h>
 
 /*******************************************************************************
@@ -61,10 +62,8 @@
 #define CTX_ELR_EL3		U(0x20)
 #define CTX_PMCR_EL0		U(0x28)
 #define CTX_IS_IN_EL3		U(0x30)
-#define CTX_CPTR_EL3		U(0x38)
-#define CTX_ZCR_EL3		U(0x40)
-#define CTX_MPAM3_EL3		U(0x48)
-#define CTX_EL3STATE_END	U(0x50) /* Align to the next 16 byte boundary */
+#define CTX_MPAM3_EL3		U(0x38)
+#define CTX_EL3STATE_END	U(0x40) /* Align to the next 16 byte boundary */
 
 /*******************************************************************************
  * Constants that allow assembler code to access members of and the
@@ -324,6 +323,13 @@
 #define CTX_PAUTH_REGS_END	U(0)
 #endif /* CTX_INCLUDE_PAUTH_REGS */
 
+/*******************************************************************************
+ * Registers initialised in a per-world context.
+ ******************************************************************************/
+#define CTX_CPTR_EL3		U(0x0)
+#define CTX_ZCR_EL3		U(0x8)
+#define CTX_GLOBAL_EL3STATE_END	U(0x10)
+
 #ifndef __ASSEMBLER__
 
 #include <stdint.h>
@@ -434,6 +440,17 @@
 #endif
 } cpu_context_t;
 
+/*
+ * Per-World Context.
+ * It stores registers whose values can be shared across CPUs.
+ */
+typedef struct per_world_context {
+	uint64_t ctx_cptr_el3;
+	uint64_t ctx_zcr_el3;
+} per_world_context_t;
+
+extern per_world_context_t per_world_context[CPU_DATA_CONTEXT_NUM];
+
 /* Macros to access members of the 'cpu_context_t' structure */
 #define get_el3state_ctx(h)	(&((cpu_context_t *) h)->el3state_ctx)
 #if CTX_INCLUDE_FPREGS
diff --git a/include/lib/el3_runtime/context_mgmt.h b/include/lib/el3_runtime/context_mgmt.h
index aa76f3b..b2bdaf5 100644
--- a/include/lib/el3_runtime/context_mgmt.h
+++ b/include/lib/el3_runtime/context_mgmt.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2022, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -39,6 +39,7 @@
 #ifdef __aarch64__
 #if IMAGE_BL31
 void cm_manage_extensions_el3(void);
+void manage_extensions_nonsecure_per_world(void);
 #endif
 #if CTX_INCLUDE_EL2_REGS
 void cm_el2_sysregs_context_save(uint32_t security_state);
@@ -88,6 +89,7 @@
 void *cm_get_next_context(void);
 void cm_set_next_context(void *context);
 static inline void cm_manage_extensions_el3(void) {}
+static inline void manage_extensions_nonsecure_per_world(void) {}
 #endif /* __aarch64__ */
 
 #endif /* CONTEXT_MGMT_H */
diff --git a/include/lib/extensions/amu.h b/include/lib/extensions/amu.h
index 09d8dee..a396b99 100644
--- a/include/lib/extensions/amu.h
+++ b/include/lib/extensions/amu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -19,9 +19,11 @@
 void amu_enable(cpu_context_t *ctx);
 void amu_init_el3(void);
 void amu_init_el2_unused(void);
+void amu_enable_per_world(per_world_context_t *per_world_ctx);
 #else
 void amu_enable(bool el2_unused);
-#endif
+#endif /* __aarch64__ */
+
 #else
 #if __aarch64__
 void amu_enable(cpu_context_t *ctx)
@@ -33,12 +35,15 @@
 void amu_init_el2_unused(void)
 {
 }
+void amu_enable_per_world(per_world_context_t *per_world_ctx)
+{
+}
 #else
 static inline void amu_enable(bool el2_unused)
 {
 }
-#endif
-#endif
+#endif /*__aarch64__ */
+#endif /* ENABLE_FEAT_AMU */
 
 #if ENABLE_AMU_AUXILIARY_COUNTERS
 /*
diff --git a/include/lib/extensions/sme.h b/include/lib/extensions/sme.h
index dbefdfc..bd7948e 100644
--- a/include/lib/extensions/sme.h
+++ b/include/lib/extensions/sme.h
@@ -21,23 +21,31 @@
 #define SME_SMCR_LEN_MAX	U(0x1FF)
 
 #if ENABLE_SME_FOR_NS
-void sme_enable(cpu_context_t *context);
 void sme_init_el3(void);
 void sme_init_el2_unused(void);
+void sme_enable(cpu_context_t *context);
 void sme_disable(cpu_context_t *context);
+void sme_enable_per_world(per_world_context_t *per_world_ctx);
+void sme_disable_per_world(per_world_context_t *per_world_ctx);
 #else
-static inline void sme_enable(cpu_context_t *context)
-{
-}
 static inline void sme_init_el3(void)
 {
 }
 static inline void sme_init_el2_unused(void)
 {
 }
+static inline void sme_enable(cpu_context_t *context)
+{
+}
 static inline void sme_disable(cpu_context_t *context)
 {
 }
+static inline void sme_enable_per_world(per_world_context_t *per_world_ctx)
+{
+}
+static inline void sme_disable_per_world(per_world_context_t *per_world_ctx)
+{
+}
 #endif /* ENABLE_SME_FOR_NS */
 
 #endif /* SME_H */
diff --git a/include/lib/extensions/sve.h b/include/lib/extensions/sve.h
index fc76a16..947c905 100644
--- a/include/lib/extensions/sve.h
+++ b/include/lib/extensions/sve.h
@@ -10,17 +10,17 @@
 #include <context.h>
 
 #if (ENABLE_SME_FOR_NS || ENABLE_SVE_FOR_NS)
-void sve_enable(cpu_context_t *context);
 void sve_init_el2_unused(void);
-void sve_disable(cpu_context_t *context);
+void sve_enable_per_world(per_world_context_t *per_world_ctx);
+void sve_disable_per_world(per_world_context_t *per_world_ctx);
 #else
-static inline void sve_enable(cpu_context_t *context)
+static inline void sve_init_el2_unused(void)
 {
 }
-static inline void sve_init_el2_unused(void)
+static inline void sve_enable_per_world(per_world_context_t *per_world_ctx)
 {
 }
-static inline void sve_disable(cpu_context_t *context)
+static inline void sve_disable_per_world(per_world_context_t *per_world_ctx)
 {
 }
 #endif /* ( ENABLE_SME_FOR_NS | ENABLE_SVE_FOR_NS ) */
diff --git a/include/lib/extensions/sys_reg_trace.h b/include/lib/extensions/sys_reg_trace.h
index beda88a..7004267 100644
--- a/include/lib/extensions/sys_reg_trace.h
+++ b/include/lib/extensions/sys_reg_trace.h
@@ -12,8 +12,8 @@
 #if ENABLE_SYS_REG_TRACE_FOR_NS
 
 #if __aarch64__
-void sys_reg_trace_enable(cpu_context_t *context);
-void sys_reg_trace_disable(cpu_context_t *context);
+void sys_reg_trace_enable_per_world(per_world_context_t *per_world_ctx);
+void sys_reg_trace_disable_per_world(per_world_context_t *per_world_ctx);
 void sys_reg_trace_init_el2_unused(void);
 #else
 void sys_reg_trace_init_el3(void);
@@ -22,10 +22,10 @@
 #else /* !ENABLE_SYS_REG_TRACE_FOR_NS */
 
 #if __aarch64__
-static inline void sys_reg_trace_enable(cpu_context_t *context)
+static inline void sys_reg_trace_enable_per_world(per_world_context_t *per_world_ctx)
 {
 }
-static inline void sys_reg_trace_disable(cpu_context_t *context)
+static inline void sys_reg_trace_disable_per_world(per_world_context_t *per_world_ctx)
 {
 }
 static inline void sys_reg_trace_init_el2_unused(void)