perf(cm): drop ZCR_EL3 saving and some ISBs and replace them with root context

SVE and SME aren't enabled symmetrically for all worlds, but EL3 needs
to context switch them nonetheless. Previously, this had to happen by
writing the enable bits just before reading/writing the relevant
context. But since the introduction of root context, this need not be
the case. We can have these enables always be present for EL3 and save
on some work (and ISBs!) on every context switch.

We can also hoist ZCR_EL3 to a never changing register, as we set its
value to be identical for every world, which happens to be the one we
want for EL3 too.

Change-Id: I3d950e72049a298008205ba32f230d5a5c02f8b0
Signed-off-by: Boyan Karatotev <boyan.karatotev@arm.com>
diff --git a/include/arch/aarch64/el3_common_macros.S b/include/arch/aarch64/el3_common_macros.S
index 4864596..2f2aeaf 100644
--- a/include/arch/aarch64/el3_common_macros.S
+++ b/include/arch/aarch64/el3_common_macros.S
@@ -454,6 +454,10 @@
 	 *  Necessary on PMUv3 <= p7 where MDCR_EL3.{SCCD,MCCD} are not
 	 *  available.
 	 *
+	 * CPTR_EL3.EZ: Set to one so that accesses to ZCR_EL3 do not trap
+	 * CPTR_EL3.TFP: Set to zero so that advanced SIMD operations don't trap
+	 * CPTR_EL3.ESM: Set to one so that SME related registers don't trap
+	 *
 	 * PSTATE.DIT: Set to one to enable the Data Independent Timing (DIT)
 	 *  functionality, if implemented in EL3.
 	 * ---------------------------------------------------------------------
@@ -473,6 +477,12 @@
 		orr	x15, x15, #PMCR_EL0_DP_BIT
 		msr	pmcr_el0, x15
 
+		mrs	x15, cptr_el3
+		orr	x15, x15, #CPTR_EZ_BIT
+		orr	x15, x15, #ESM_BIT
+		bic	x15, x15, #TFP_BIT
+		msr	cptr_el3, x15
+
 #if ENABLE_FEAT_DIT
 #if ENABLE_FEAT_DIT > 1
 		mrs	x15, id_aa64pfr0_el1
diff --git a/include/lib/el3_runtime/aarch64/context.h b/include/lib/el3_runtime/aarch64/context.h
index 87f1541..15d5204 100644
--- a/include/lib/el3_runtime/aarch64/context.h
+++ b/include/lib/el3_runtime/aarch64/context.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2024, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2025, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -167,9 +167,8 @@
  * Registers initialised in a per-world context.
  ******************************************************************************/
 #define CTX_CPTR_EL3			U(0x0)
-#define CTX_ZCR_EL3			U(0x8)
-#define CTX_MPAM3_EL3			U(0x10)
-#define CTX_PERWORLD_EL3STATE_END	U(0x18)
+#define CTX_MPAM3_EL3			U(0x8)
+#define CTX_PERWORLD_EL3STATE_END	U(0x10)
 
 #ifndef __ASSEMBLER__
 
@@ -278,7 +277,6 @@
  */
 typedef struct per_world_context {
 	uint64_t ctx_cptr_el3;
-	uint64_t ctx_zcr_el3;
 	uint64_t ctx_mpam3_el3;
 } per_world_context_t;
 
diff --git a/include/lib/extensions/sve.h b/include/lib/extensions/sve.h
index 2979efb..a471efb 100644
--- a/include/lib/extensions/sve.h
+++ b/include/lib/extensions/sve.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2024, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2025, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -11,10 +11,14 @@
 
 #if (ENABLE_SME_FOR_NS || ENABLE_SVE_FOR_NS)
 
+void sve_init_el3(void);
 void sve_init_el2_unused(void);
 void sve_enable_per_world(per_world_context_t *per_world_ctx);
 void sve_disable_per_world(per_world_context_t *per_world_ctx);
 #else
+static inline void sve_init_el3(void)
+{
+}
 static inline void sve_init_el2_unused(void)
 {
 }