perf(cm): drop ZCR_EL3 saving and some ISBs and replace them with root context
SVE and SME aren't enabled symmetrically for all worlds, but EL3 needs
to context switch them nonetheless. Previously, this had to happen by
writing the enable bits just before reading/writing the relevant
context. But since the introduction of root context, this need not be
the case. We can have these enables always be present for EL3 and save
on some work (and ISBs!) on every context switch.
We can also hoist ZCR_EL3 to a never changing register, as we set its
value to be identical for every world, which happens to be the one we
want for EL3 too.
Change-Id: I3d950e72049a298008205ba32f230d5a5c02f8b0
Signed-off-by: Boyan Karatotev <boyan.karatotev@arm.com>
diff --git a/docs/components/context-management-library.rst b/docs/components/context-management-library.rst
index 6a76ada..8cb1ace 100644
--- a/docs/components/context-management-library.rst
+++ b/docs/components/context-management-library.rst
@@ -498,7 +498,6 @@
typedef struct per_world_context {
uint64_t ctx_cptr_el3;
- uint64_t ctx_zcr_el3;
uint64_t ctx_mpam3_el3;
} per_world_context_t;
@@ -555,7 +554,7 @@
EL3 execution context needs to setup at both boot time (cold and warm boot)
entrypaths and at all the possible exception handlers routing to EL3 at runtime.
-*Copyright (c) 2024, Arm Limited and Contributors. All rights reserved.*
+*Copyright (c) 2024-2025, Arm Limited and Contributors. All rights reserved.*
.. |Context Memory Allocation| image:: ../resources/diagrams/context_memory_allocation.png
.. |CPU Context Memory Configuration| image:: ../resources/diagrams/cpu_data_config_context_memory.png
diff --git a/include/arch/aarch64/el3_common_macros.S b/include/arch/aarch64/el3_common_macros.S
index 4864596..2f2aeaf 100644
--- a/include/arch/aarch64/el3_common_macros.S
+++ b/include/arch/aarch64/el3_common_macros.S
@@ -454,6 +454,10 @@
* Necessary on PMUv3 <= p7 where MDCR_EL3.{SCCD,MCCD} are not
* available.
*
+ * CPTR_EL3.EZ: Set to one so that accesses to ZCR_EL3 do not trap
+ * CPTR_EL3.TFP: Set to zero so that advanced SIMD operations don't trap
+ * CPTR_EL3.ESM: Set to one so that SME related registers don't trap
+ *
* PSTATE.DIT: Set to one to enable the Data Independent Timing (DIT)
* functionality, if implemented in EL3.
* ---------------------------------------------------------------------
@@ -473,6 +477,12 @@
orr x15, x15, #PMCR_EL0_DP_BIT
msr pmcr_el0, x15
+ mrs x15, cptr_el3
+ orr x15, x15, #CPTR_EZ_BIT
+ orr x15, x15, #ESM_BIT
+ bic x15, x15, #TFP_BIT
+ msr cptr_el3, x15
+
#if ENABLE_FEAT_DIT
#if ENABLE_FEAT_DIT > 1
mrs x15, id_aa64pfr0_el1
diff --git a/include/lib/el3_runtime/aarch64/context.h b/include/lib/el3_runtime/aarch64/context.h
index 87f1541..15d5204 100644
--- a/include/lib/el3_runtime/aarch64/context.h
+++ b/include/lib/el3_runtime/aarch64/context.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2024, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2025, Arm Limited and Contributors. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
@@ -167,9 +167,8 @@
* Registers initialised in a per-world context.
******************************************************************************/
#define CTX_CPTR_EL3 U(0x0)
-#define CTX_ZCR_EL3 U(0x8)
-#define CTX_MPAM3_EL3 U(0x10)
-#define CTX_PERWORLD_EL3STATE_END U(0x18)
+#define CTX_MPAM3_EL3 U(0x8)
+#define CTX_PERWORLD_EL3STATE_END U(0x10)
#ifndef __ASSEMBLER__
@@ -278,7 +277,6 @@
*/
typedef struct per_world_context {
uint64_t ctx_cptr_el3;
- uint64_t ctx_zcr_el3;
uint64_t ctx_mpam3_el3;
} per_world_context_t;
diff --git a/include/lib/extensions/sve.h b/include/lib/extensions/sve.h
index 2979efb..a471efb 100644
--- a/include/lib/extensions/sve.h
+++ b/include/lib/extensions/sve.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2024, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2025, Arm Limited and Contributors. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
@@ -11,10 +11,14 @@
#if (ENABLE_SME_FOR_NS || ENABLE_SVE_FOR_NS)
+void sve_init_el3(void);
void sve_init_el2_unused(void);
void sve_enable_per_world(per_world_context_t *per_world_ctx);
void sve_disable_per_world(per_world_context_t *per_world_ctx);
#else
+static inline void sve_init_el3(void)
+{
+}
static inline void sve_init_el2_unused(void)
{
}
diff --git a/lib/el3_runtime/aarch64/context.S b/lib/el3_runtime/aarch64/context.S
index 6415cb3..1e6a42e 100644
--- a/lib/el3_runtime/aarch64/context.S
+++ b/lib/el3_runtime/aarch64/context.S
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2024, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2025, Arm Limited and Contributors. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
@@ -209,19 +209,6 @@
*/
func sve_context_save
.arch_extension sve
- /* Temporarily enable SVE */
- mrs x10, cptr_el3
- orr x11, x10, #CPTR_EZ_BIT
- bic x11, x11, #TFP_BIT
- msr cptr_el3, x11
- isb
-
- /* zcr_el3 */
- mrs x12, S3_6_C1_C2_0
- mov x13, #((SVE_VECTOR_LEN >> 7) - 1)
- msr S3_6_C1_C2_0, x13
- isb
-
/* Predicate registers */
mov x13, #CTX_SIMD_PREDICATES
add x9, x0, x13
@@ -237,11 +224,6 @@
mov x13, #CTX_SIMD_VECTORS
add x9, x0, x13
sve_vectors_op str, x9
-
- /* Restore SVE enablement */
- msr S3_6_C1_C2_0, x12 /* zcr_el3 */
- msr cptr_el3, x10
- isb
.arch_extension nosve
/* Save FPSR, FPCR and FPEXC32 */
@@ -260,19 +242,6 @@
*/
func sve_context_restore
.arch_extension sve
- /* Temporarily enable SVE for EL3 */
- mrs x10, cptr_el3
- orr x11, x10, #CPTR_EZ_BIT
- bic x11, x11, #TFP_BIT
- msr cptr_el3, x11
- isb
-
- /* zcr_el3 */
- mrs x12, S3_6_C1_C2_0
- mov x13, #((SVE_VECTOR_LEN >> 7) - 1)
- msr S3_6_C1_C2_0, x13
- isb
-
/* Restore FFR register before predicates */
mov x13, #CTX_SIMD_FFR
add x9, x0, x13
@@ -288,11 +257,6 @@
mov x13, #CTX_SIMD_VECTORS
add x9, x0, x13
sve_vectors_op ldr, x9
-
- /* Restore SVE enablement */
- msr S3_6_C1_C2_0, x12 /* zcr_el3 */
- msr cptr_el3, x10
- isb
.arch_extension nosve
/* Restore FPSR, FPCR and FPEXC32 */
@@ -604,10 +568,7 @@
/* ----------------------------------------------------------
* Restore CPTR_EL3.
- * ZCR is only restored if SVE is supported and enabled.
- * Synchronization is required before zcr_el3 is addressed.
- * ----------------------------------------------------------
- */
+ * ---------------------------------------------------------- */
/* The address of the per_world context is stored in x9 */
get_per_world_context x9
@@ -616,13 +577,6 @@
msr cptr_el3, x19
#if IMAGE_BL31
- ands x19, x19, #CPTR_EZ_BIT
- beq sve_not_enabled
-
- isb
- msr S3_6_C1_C2_0, x20 /* zcr_el3 */
-sve_not_enabled:
-
restore_mpam3_el3
#endif /* IMAGE_BL31 */
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index 5bc9336..e28e1c3 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -666,6 +666,10 @@
#if IMAGE_BL31
void cm_manage_extensions_el3(unsigned int my_idx)
{
+ if (is_feat_sve_supported()) {
+ sve_init_el3();
+ }
+
if (is_feat_amu_supported()) {
amu_init_el3(my_idx);
}
diff --git a/lib/extensions/sme/sme.c b/lib/extensions/sme/sme.c
index 98d57e9..e477da3 100644
--- a/lib/extensions/sme/sme.c
+++ b/lib/extensions/sme/sme.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2024, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2021-2025, Arm Limited and Contributors. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
@@ -40,13 +40,8 @@
void sme_init_el3(void)
{
- u_register_t cptr_el3 = read_cptr_el3();
u_register_t smcr_el3;
- /* Set CPTR_EL3.ESM bit so we can access SMCR_EL3 without trapping. */
- write_cptr_el3(cptr_el3 | ESM_BIT);
- isb();
-
/*
* Set the max LEN value and FA64 bit. This register is set up per_world
* to be the least restrictive, then lower ELs can restrict as needed
@@ -69,10 +64,6 @@
smcr_el3 |= SMCR_ELX_EZT0_BIT;
}
write_smcr_el3(smcr_el3);
-
- /* Reset CPTR_EL3 value. */
- write_cptr_el3(cptr_el3);
- isb();
}
void sme_init_el2_unused(void)
diff --git a/lib/extensions/sve/sve.c b/lib/extensions/sve/sve.c
index 143717e..4e18cdf 100644
--- a/lib/extensions/sve/sve.c
+++ b/lib/extensions/sve/sve.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2023, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2025, Arm Limited and Contributors. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
@@ -22,6 +22,12 @@
*/
#define CONVERT_SVE_LENGTH(x) (((x / 128) - 1))
+void sve_init_el3(void)
+{
+ /* Restrict maximum SVE vector length (SVE_VECTOR_LEN+1) * 128. */
+ write_zcr_el3(ZCR_EL3_LEN_MASK & CONVERT_SVE_LENGTH(SVE_VECTOR_LEN));
+}
+
void sve_enable_per_world(per_world_context_t *per_world_ctx)
{
u_register_t cptr_el3;
@@ -30,9 +36,6 @@
cptr_el3 = per_world_ctx->ctx_cptr_el3;
cptr_el3 = (cptr_el3 | CPTR_EZ_BIT) & ~(TFP_BIT);
per_world_ctx->ctx_cptr_el3 = cptr_el3;
-
- /* Restrict maximum SVE vector length (SVE_VECTOR_LEN+1) * 128. */
- per_world_ctx->ctx_zcr_el3 = (ZCR_EL3_LEN_MASK & CONVERT_SVE_LENGTH(SVE_VECTOR_LEN));
}
void sve_init_el2_unused(void)