perf(cm): drop ZCR_EL3 saving and some ISBs and replace them with root context

SVE and SME aren't enabled symmetrically for all worlds, but EL3 needs
to context switch them nonetheless. Previously, this had to happen by
writing the enable bits just before reading/writing the relevant
context. But since the introduction of root context, this need not be
the case. We can have these enables always be present for EL3 and save
on some work (and ISBs!) on every context switch.

We can also hoist ZCR_EL3 to a never changing register, as we set its
value to be identical for every world, which happens to be the one we
want for EL3 too.

Change-Id: I3d950e72049a298008205ba32f230d5a5c02f8b0
Signed-off-by: Boyan Karatotev <boyan.karatotev@arm.com>
diff --git a/include/arch/aarch64/el3_common_macros.S b/include/arch/aarch64/el3_common_macros.S
index 4864596..2f2aeaf 100644
--- a/include/arch/aarch64/el3_common_macros.S
+++ b/include/arch/aarch64/el3_common_macros.S
@@ -454,6 +454,10 @@
 	 *  Necessary on PMUv3 <= p7 where MDCR_EL3.{SCCD,MCCD} are not
 	 *  available.
 	 *
+	 * CPTR_EL3.EZ: Set to one so that accesses to ZCR_EL3 do not trap
+	 * CPTR_EL3.TFP: Set to zero so that advanced SIMD operations don't trap
+	 * CPTR_EL3.ESM: Set to one so that SME related registers don't trap
+	 *
 	 * PSTATE.DIT: Set to one to enable the Data Independent Timing (DIT)
 	 *  functionality, if implemented in EL3.
 	 * ---------------------------------------------------------------------
@@ -473,6 +477,12 @@
 		orr	x15, x15, #PMCR_EL0_DP_BIT
 		msr	pmcr_el0, x15
 
+		mrs	x15, cptr_el3
+		orr	x15, x15, #CPTR_EZ_BIT
+		orr	x15, x15, #ESM_BIT
+		bic	x15, x15, #TFP_BIT
+		msr	cptr_el3, x15
+
 #if ENABLE_FEAT_DIT
 #if ENABLE_FEAT_DIT > 1
 		mrs	x15, id_aa64pfr0_el1