feat(psci): remove cpu context init by index

Currently, the calling core (meaning the core which received the call to
CPU_ON or the powerdown path of CPU_SUSPEND on the same core) is in
charge of initialising the context for the waking core (the warmboot
entrypoint for both). This is convenient because the calling core can
write the context while in coherency and the waking core will only need
the context after its entered coherency. This avoids any cache
maintenance and makes communication simple.

However, this has 3 main problems:
a) asymmetric feature support is problematic - the calling core has no
   way of knowing the feature set of the waking core. If the two
   diverge, the architectural feature discovery via ID registers breaks
   down. We've thus far "fixed" this on a case by case basis which
   doesn't scale and introduces redundancy.

b) powerdown abandon (pabandon) introduces a contradiction - the calling
   core has to initialise the context for when the core wakes up, but
   should the core not powerdown it needs its old context intact. The only
   way to work around this is by keeping two copies of context which
   incurs a runtime and memory overhead.

c) cm_prepare_el3_exit[_ns]() doesn't have access to the entrypoint but needs
   it to make initialisation decisions. We can infer some of this from
   registers that have already been written but this is awkwardly
   limiting for what we can do. This also necessitates the split from
   the context initialisation.

We can solve all three by a making a core be in full ownership of its
own context. The calling core then only writes entrypoint information
and nothing else. The waking core then initialises its own context as it
sees fit with full knowledge of the whole picture.

The only tricky bit is cache coherency - the waking core has to be able
to coherently observe its new entrypoint. Calling cores will write to
the shared region with coherent caches on. If we make sure to read the
context only after the waking core has entered coherency, then we can
avoid cache operations and let hardware handle everything.

We can skip the spsr check for FEAT_TCR2 as it doesn't make a
difference. We can also skip enabling it twice from generic code.

Signed-off-by: Boyan Karatotev <boyan.karatotev@arm.com>
Signed-off-by: Manish Pandey <manish.pandey2@arm.com>
Change-Id: I86e7fe8b698191fc3b469e5ced1fd010f8754b0e
diff --git a/lib/el3_runtime/aarch32/context_mgmt.c b/lib/el3_runtime/aarch32/context_mgmt.c
index 132888c..00d9c01 100644
--- a/lib/el3_runtime/aarch32/context_mgmt.c
+++ b/lib/el3_runtime/aarch32/context_mgmt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2024, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2025, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -155,21 +155,6 @@
 #endif /*  IMAGE_BL32 */
 }
 
-#if !IMAGE_BL1
-/*******************************************************************************
- * The following function initializes the cpu_context for a CPU specified by
- * its `cpu_idx` for first use, and sets the initial entrypoint state as
- * specified by the entry_point_info structure.
- ******************************************************************************/
-void cm_init_context_by_index(unsigned int cpu_idx,
-			      const entry_point_info_t *ep)
-{
-	cpu_context_t *ctx;
-	ctx = cm_get_context_by_index(cpu_idx, GET_SECURITY_STATE(ep->h.attr));
-	cm_setup_context(ctx, ep);
-}
-#endif /* !IMAGE_BL1 */
-
 /*******************************************************************************
  * The following function initializes the cpu_context for the current CPU
  * for first use, and sets the initial entrypoint state as specified by the
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index 021d538..2d1b063 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -484,13 +484,6 @@
 #endif /* CTX_INCLUDE_PAUTH_REGS */
 
 	/*
-	 * SCR_EL3.TCR2EN: Enable access to TCR2_ELx for AArch64 if present.
-	 */
-	if (is_feat_tcr2_supported() && (GET_RW(ep->spsr) == MODE_RW_64)) {
-		scr_el3 |= SCR_TCR2EN_BIT;
-	}
-
-	/*
 	 * SCR_EL3.PIEN: Enable permission indirection and overlay
 	 * registers for AArch64 if present.
 	 */
@@ -591,6 +584,10 @@
 		trf_enable(ctx);
 	}
 
+	if (is_feat_tcr2_supported()) {
+		tcr2_enable(ctx);
+	}
+
 	pmuv3_enable(ctx);
 #endif /* IMAGE_BL31 */
 
@@ -839,8 +836,10 @@
 		spe_enable(ctx);
 	}
 
-	if (is_feat_trbe_supported()) {
-		trbe_enable(ctx);
+	if (!check_if_trbe_disable_affected_core()) {
+		if (is_feat_trbe_supported()) {
+			trbe_enable(ctx);
+		}
 	}
 
 	if (is_feat_brbe_supported()) {
@@ -954,21 +953,6 @@
 	}
 #endif /* IMAGE_BL31 */
 }
-
-#if !IMAGE_BL1
-/*******************************************************************************
- * The following function initializes the cpu_context for a CPU specified by
- * its `cpu_idx` for first use, and sets the initial entrypoint state as
- * specified by the entry_point_info structure.
- ******************************************************************************/
-void cm_init_context_by_index(unsigned int cpu_idx,
-			      const entry_point_info_t *ep)
-{
-	cpu_context_t *ctx;
-	ctx = cm_get_context_by_index(cpu_idx, GET_SECURITY_STATE(ep->h.attr));
-	cm_setup_context(ctx, ep);
-}
-#endif /* !IMAGE_BL1 */
 
 /*******************************************************************************
  * The following function initializes the cpu_context for the current CPU
@@ -1647,52 +1631,6 @@
 }
 #endif /* (CTX_INCLUDE_EL2_REGS && IMAGE_BL31) */
 
-#if IMAGE_BL31
-/*********************************************************************************
-* This function allows Architecture features asymmetry among cores.
-* TF-A assumes that all the cores in the platform has architecture feature parity
-* and hence the context is setup on different core (e.g. primary sets up the
-* context for secondary cores).This assumption may not be true for systems where
-* cores are not conforming to same Arch version or there is CPU Erratum which
-* requires certain feature to be be disabled only on a given core.
-*
-* This function is called on secondary cores to override any disparity in context
-* setup by primary, this would be called during warmboot path.
-*********************************************************************************/
-void cm_handle_asymmetric_features(void)
-{
-	cpu_context_t *ctx __maybe_unused = cm_get_context(NON_SECURE);
-
-	assert(ctx != NULL);
-
-#if ENABLE_SPE_FOR_NS == FEAT_STATE_CHECK_ASYMMETRIC
-	if (is_feat_spe_supported()) {
-		spe_enable(ctx);
-	} else {
-		spe_disable(ctx);
-	}
-#endif
-
-	if (check_if_trbe_disable_affected_core()) {
-		if (is_feat_trbe_supported()) {
-			trbe_disable(ctx);
-		}
-	}
-
-#if ENABLE_FEAT_TCR2 == FEAT_STATE_CHECK_ASYMMETRIC
-	el3_state_t *el3_state = get_el3state_ctx(ctx);
-	u_register_t spsr = read_ctx_reg(el3_state, CTX_SPSR_EL3);
-
-	if (is_feat_tcr2_supported() && (GET_RW(spsr) == MODE_RW_64)) {
-		tcr2_enable(ctx);
-	} else {
-		tcr2_disable(ctx);
-	}
-#endif
-
-}
-#endif
-
 /*******************************************************************************
  * This function is used to exit to Non-secure world. If CTX_INCLUDE_EL2_REGS
  * is enabled, it restores EL1 and EL2 sysreg contexts instead of directly
@@ -1701,18 +1639,6 @@
  ******************************************************************************/
 void cm_prepare_el3_exit_ns(void)
 {
-#if IMAGE_BL31
-	/*
-	 * Check and handle Architecture feature asymmetry among cores.
-	 *
-	 * In warmboot path secondary cores context is initialized on core which
-	 * did CPU_ON SMC call, if there is feature asymmetry in these cores handle
-	 * it in this function call.
-	 * For Symmetric cores this is an empty function.
-	 */
-	cm_handle_asymmetric_features();
-#endif
-
 #if (CTX_INCLUDE_EL2_REGS && IMAGE_BL31)
 #if ENABLE_ASSERTIONS
 	cpu_context_t *ctx = cm_get_context(NON_SECURE);
diff --git a/lib/psci/psci_common.c b/lib/psci/psci_common.c
index 17ecab8..1c634e3 100644
--- a/lib/psci/psci_common.c
+++ b/lib/psci/psci_common.c
@@ -1032,6 +1032,13 @@
 	}
 
 	/*
+	 * Caches and (importantly) coherency are on so we can rely on seeing
+	 * whatever the primary gave us without explicit cache maintenance
+	 */
+	entry_point_info_t *ep = get_cpu_data(warmboot_ep_info);
+	cm_init_my_context(ep);
+
+	/*
 	 * Generic management: Now we just need to retrieve the
 	 * information that we had stashed away during the cpu_on
 	 * call to set this cpu on its way.
diff --git a/lib/psci/psci_main.c b/lib/psci/psci_main.c
index 34de10f..dc902b6 100644
--- a/lib/psci/psci_main.c
+++ b/lib/psci/psci_main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2024, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2025, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -28,15 +28,17 @@
 
 {
 	int rc;
-	entry_point_info_t ep;
+	entry_point_info_t *ep;
+	unsigned int target_idx = (unsigned int)plat_core_pos_by_mpidr(target_cpu);
 
 	/* Validate the target CPU */
 	if (!is_valid_mpidr(target_cpu)) {
 		return PSCI_E_INVALID_PARAMS;
 	}
 
-	/* Validate the entry point and get the entry_point_info */
-	rc = psci_validate_entry_point(&ep, entrypoint, context_id);
+	ep = get_cpu_data_by_index(target_idx, warmboot_ep_info);
+	/* Validate the lower EL entry point and put it in the entry_point_info */
+	rc = psci_validate_entry_point(ep, entrypoint, context_id);
 	if (rc != PSCI_E_SUCCESS) {
 		return rc;
 	}
@@ -45,7 +47,7 @@
 	 * To turn this cpu on, specify which power
 	 * levels need to be turned on
 	 */
-	return psci_cpu_on_start(target_cpu, &ep);
+	return psci_cpu_on_start(target_cpu, ep);
 }
 
 unsigned int psci_version(void)
@@ -59,7 +61,6 @@
 {
 	int rc;
 	unsigned int target_pwrlvl, is_power_down_state;
-	entry_point_info_t ep;
 	psci_power_state_t state_info = { {PSCI_LOCAL_STATE_RUN} };
 	plat_local_state_t cpu_pd_state;
 	unsigned int cpu_idx = plat_my_core_pos();
@@ -173,7 +174,9 @@
 	 * point and program entry information.
 	 */
 	if (is_power_down_state != 0U) {
-		rc = psci_validate_entry_point(&ep, entrypoint, context_id);
+		entry_point_info_t *ep = get_cpu_data_by_index(cpu_idx, warmboot_ep_info);
+
+		rc = psci_validate_entry_point(ep, entrypoint, context_id);
 		if (rc != PSCI_E_SUCCESS) {
 			return rc;
 		}
@@ -186,7 +189,6 @@
 	 * arrival of an interrupt
 	 */
 	rc = psci_cpu_suspend_start(cpu_idx,
-				    &ep,
 				    target_pwrlvl,
 				    &state_info,
 				    is_power_down_state);
@@ -199,8 +201,8 @@
 {
 	int rc;
 	psci_power_state_t state_info;
-	entry_point_info_t ep;
 	unsigned int cpu_idx = plat_my_core_pos();
+	entry_point_info_t *ep = get_cpu_data_by_index(cpu_idx, warmboot_ep_info);
 
 	/* Check if the current CPU is the last ON CPU in the system */
 	if (!psci_is_last_on_cpu(cpu_idx)) {
@@ -208,7 +210,7 @@
 	}
 
 	/* Validate the entry point and get the entry_point_info */
-	rc = psci_validate_entry_point(&ep, entrypoint, context_id);
+	rc = psci_validate_entry_point(ep, entrypoint, context_id);
 	if (rc != PSCI_E_SUCCESS) {
 		return rc;
 	}
@@ -235,7 +237,6 @@
 	 * arrival of an interrupt
 	 */
 	rc = psci_cpu_suspend_start(cpu_idx,
-				    &ep,
 				    PLAT_MAX_PWR_LVL,
 				    &state_info,
 				    PSTATE_TYPE_POWERDOWN);
diff --git a/lib/psci/psci_on.c b/lib/psci/psci_on.c
index 3eacb9c..1bbea7a 100644
--- a/lib/psci/psci_on.c
+++ b/lib/psci/psci_on.c
@@ -141,10 +141,7 @@
 	rc = psci_plat_pm_ops->pwr_domain_on(target_cpu);
 	assert((rc == PSCI_E_SUCCESS) || (rc == PSCI_E_INTERN_FAIL));
 
-	if (rc == PSCI_E_SUCCESS) {
-		/* Store the re-entry information for the non-secure world. */
-		cm_init_context_by_index(target_idx, ep);
-	} else {
+	if (rc != PSCI_E_SUCCESS) {
 		/* Restore the state on error. */
 		psci_set_aff_info_state_by_idx(target_idx, AFF_STATE_OFF);
 		flush_cpu_data_by_index(target_idx,
diff --git a/lib/psci/psci_private.h b/lib/psci/psci_private.h
index 49b19c9..f3f5a5c 100644
--- a/lib/psci/psci_private.h
+++ b/lib/psci/psci_private.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2024, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2025, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -344,7 +344,6 @@
 
 /* Private exported functions from psci_suspend.c */
 int psci_cpu_suspend_start(unsigned int idx,
-			   const entry_point_info_t *ep,
 			   unsigned int end_pwrlvl,
 			   psci_power_state_t *state_info,
 			   unsigned int is_power_down_state);
diff --git a/lib/psci/psci_setup.c b/lib/psci/psci_setup.c
index 3604549..e1b0ae8 100644
--- a/lib/psci/psci_setup.c
+++ b/lib/psci/psci_setup.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2024, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2025, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -30,6 +30,7 @@
  * of relying on platform defined constants.
  ******************************************************************************/
 static cpu_context_t psci_ns_context[PLATFORM_CORE_COUNT];
+static entry_point_info_t warmboot_ep_info[PLATFORM_CORE_COUNT];
 
 /******************************************************************************
  * Define the psci capability variable.
@@ -114,6 +115,13 @@
 	}
 }
 
+static void __init populate_cpu_data(void)
+{
+	for (unsigned int idx = 0; idx < psci_plat_core_count; idx++) {
+		set_cpu_data_by_index(idx, warmboot_ep_info, &warmboot_ep_info[idx]);
+	}
+}
+
 /*******************************************************************************
  * Core routine to populate the power domain tree. The tree descriptor passed by
  * the platform is populated breadth-first and the first entry in the map
@@ -218,6 +226,9 @@
 	/* Update the CPU limits for each node in psci_non_cpu_pd_nodes */
 	psci_update_pwrlvl_limits();
 
+	/* Initialise the warmboot entrypoints */
+	populate_cpu_data();
+
 	/* Populate the mpidr field of cpu node for this CPU */
 	psci_cpu_pd_nodes[cpu_idx].mpidr =
 		read_mpidr() & MPIDR_AFFINITY_MASK;
diff --git a/lib/psci/psci_suspend.c b/lib/psci/psci_suspend.c
index f690e49..39d4482 100644
--- a/lib/psci/psci_suspend.c
+++ b/lib/psci/psci_suspend.c
@@ -45,7 +45,6 @@
 static void psci_suspend_to_pwrdown_start(unsigned int idx,
 					  unsigned int end_pwrlvl,
 					  unsigned int max_off_lvl,
-					  const entry_point_info_t *ep,
 					  const psci_power_state_t *state_info)
 {
 	PUBLISH_EVENT_ARG(psci_suspend_pwrdown_start, &idx);
@@ -85,12 +84,6 @@
 	if (psci_plat_pm_ops->pwr_domain_suspend_pwrdown_early != NULL)
 		psci_plat_pm_ops->pwr_domain_suspend_pwrdown_early(state_info);
 #endif
-
-	/*
-	 * Store the re-entry information for the non-secure world.
-	 */
-	cm_init_my_context(ep);
-
 	/*
 	 * Arch. management. Initiate power down sequence.
 	 */
@@ -116,7 +109,6 @@
  * not possible to undo any of the actions taken beyond that point.
  ******************************************************************************/
 int psci_cpu_suspend_start(unsigned int idx,
-			   const entry_point_info_t *ep,
 			   unsigned int end_pwrlvl,
 			   psci_power_state_t *state_info,
 			   unsigned int is_power_down_state)
@@ -124,10 +116,6 @@
 	int rc = PSCI_E_SUCCESS;
 	unsigned int parent_nodes[PLAT_MAX_PWR_LVL] = {0};
 	unsigned int max_off_lvl = 0;
-#if FEAT_PABANDON
-	cpu_context_t *ctx = cm_get_context(NON_SECURE);
-	cpu_context_t old_ctx;
-#endif
 
 	/*
 	 * This function must only be called on platforms where the
@@ -205,26 +193,9 @@
 #if !CTX_INCLUDE_EL2_REGS
 		cm_el1_sysregs_context_save(NON_SECURE);
 #endif
-		/*
-		 * when the core wakes it expects its context to already be in
-		 * place so we must overwrite it before powerdown. But if
-		 * powerdown never happens we want the old context. Save it in
-		 * case we wake up. EL2/El1 will not be touched by PSCI so don't
-		 * copy */
-		memcpy(&ctx->gpregs_ctx, &old_ctx.gpregs_ctx, sizeof(gp_regs_t));
-		memcpy(&ctx->el3state_ctx, &old_ctx.el3state_ctx, sizeof(el3_state_t));
-#if DYNAMIC_WORKAROUND_CVE_2018_3639
-		memcpy(&ctx->cve_2018_3639_ctx, &old_ctx.cve_2018_3639_ctx, sizeof(cve_2018_3639_t));
-#endif
-#if ERRATA_SPECULATIVE_AT
-		memcpy(&ctx->errata_speculative_at_ctx, &old_ctx.errata_speculative_at_ctx, sizeof(errata_speculative_at_t));
-#endif
-#if CTX_INCLUDE_PAUTH_REGS
-		memcpy(&ctx->pauth_ctx, &old_ctx.pauth_ctx, sizeof(pauth_t));
-#endif
 #endif
 		max_off_lvl = psci_find_max_off_lvl(state_info);
-		psci_suspend_to_pwrdown_start(idx, end_pwrlvl, end_pwrlvl, ep, state_info);
+		psci_suspend_to_pwrdown_start(idx, end_pwrlvl, end_pwrlvl, state_info);
 	}
 
 	/*
@@ -301,18 +272,6 @@
 #if FEAT_PABANDON
 		psci_cpu_suspend_to_powerdown_finish(idx, max_off_lvl, state_info);
 
-		/* we overwrote context ourselves, put it back */
-		memcpy(&ctx->gpregs_ctx, &old_ctx.gpregs_ctx, sizeof(gp_regs_t));
-		memcpy(&ctx->el3state_ctx, &old_ctx.el3state_ctx, sizeof(el3_state_t));
-#if DYNAMIC_WORKAROUND_CVE_2018_3639
-		memcpy(&ctx->cve_2018_3639_ctx, &old_ctx.cve_2018_3639_ctx, sizeof(cve_2018_3639_t));
-#endif
-#if ERRATA_SPECULATIVE_AT
-		memcpy(&ctx->errata_speculative_at_ctx, &old_ctx.errata_speculative_at_ctx, sizeof(errata_speculative_at_t));
-#endif
-#if CTX_INCLUDE_PAUTH_REGS
-		memcpy(&ctx->pauth_ctx, &old_ctx.pauth_ctx, sizeof(pauth_t));
-#endif
 #if !CTX_INCLUDE_EL2_REGS
 		cm_el1_sysregs_context_restore(NON_SECURE);
 #endif