perf(bl31): convert cpu_data fetching to C

The assembly routines are opaque to the compiler and it can't inline
them. There is also no requirement for them to be called without a
stack - each of their calls has a stack available. So convert them to C
so that the compiler can do its inlining magic.

On AArch32 we need to be able to call _cpu_data from the entrypoint so
it has to stay as a slight exception.

We can also straighten out the type of the cpu_ops_ptr member so we
don't have to cast it everywhere.

Change-Id: I9c2939a955b396edf26b99ef36318eebeaab13e6
Signed-off-by: Boyan Karatotev <boyan.karatotev@arm.com>
diff --git a/include/arch/aarch64/el3_common_macros.S b/include/arch/aarch64/el3_common_macros.S
index fce0f2c..ee5d8d9 100644
--- a/include/arch/aarch64/el3_common_macros.S
+++ b/include/arch/aarch64/el3_common_macros.S
@@ -65,7 +65,11 @@
 	 * ---------------------------------------------------------------------
 	 */
 	bl	plat_my_core_pos
-	bl	_cpu_data_by_index
+	/* index into the cpu_data */
+	mov_imm	x1, CPU_DATA_SIZE
+	mul	x0, x0, x1
+	adr_l	x1, percpu_data
+	add	x0, x0, x1
 	msr	tpidr_el3, x0
 #endif /* IMAGE_BL31 */
 
diff --git a/include/lib/el3_runtime/cpu_data.h b/include/lib/el3_runtime/cpu_data.h
index 3dc156a..20a6c39 100644
--- a/include/lib/el3_runtime/cpu_data.h
+++ b/include/lib/el3_runtime/cpu_data.h
@@ -138,7 +138,7 @@
 	void *cpu_context[CPU_DATA_CONTEXT_NUM];
 #endif /* __aarch64__ */
 	entry_point_info_t *warmboot_ep_info;
-	uintptr_t cpu_ops_ptr;
+	struct cpu_ops *cpu_ops_ptr;
 	struct psci_cpu_data psci_svc_cpu_data;
 #if ENABLE_PAUTH
 	uint64_t apiakey[2];
@@ -196,16 +196,19 @@
 		assert_cpu_data_pmf_ts0_offset_mismatch);
 #endif
 
-struct cpu_data *_cpu_data_by_index(uint32_t cpu_index);
+static inline cpu_data_t *_cpu_data_by_index(unsigned int cpu_index)
+{
+	return &percpu_data[cpu_index];
+}
 
 #ifdef __aarch64__
 /* Return the cpu_data structure for the current CPU. */
-static inline struct cpu_data *_cpu_data(void)
+static inline cpu_data_t *_cpu_data(void)
 {
 	return (cpu_data_t *)read_tpidr_el3();
 }
 #else
-struct cpu_data *_cpu_data(void);
+cpu_data_t *_cpu_data(void);
 #endif
 
 /*
diff --git a/lib/el3_runtime/aarch32/cpu_data.S b/lib/el3_runtime/aarch32/cpu_data.S
deleted file mode 100644
index e59b7fd..0000000
--- a/lib/el3_runtime/aarch32/cpu_data.S
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2016, Arm Limited and Contributors. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#include <asm_macros.S>
-#include <lib/el3_runtime/cpu_data.h>
-
-	.globl	_cpu_data
-	.globl	_cpu_data_by_index
-
-/* -----------------------------------------------------------------
- * cpu_data_t *_cpu_data(void)
- *
- * Return the cpu_data structure for the current CPU.
- * -----------------------------------------------------------------
- */
-func _cpu_data
-	/* r12 is pushed to meet the 8 byte stack alignment requirement */
-	push	{r12, lr}
-	bl	plat_my_core_pos
-	pop	{r12, lr}
-	b	_cpu_data_by_index
-endfunc _cpu_data
-
-/* -----------------------------------------------------------------
- * cpu_data_t *_cpu_data_by_index(uint32_t cpu_index)
- *
- * Return the cpu_data structure for the CPU with given linear index
- *
- * This can be called without a valid stack.
- * clobbers: r0, r1
- * -----------------------------------------------------------------
- */
-func _cpu_data_by_index
-	mov_imm	r1, CPU_DATA_SIZE
-	mul	r0, r0, r1
-	ldr	r1, =percpu_data
-	add	r0, r0, r1
-	bx	lr
-endfunc _cpu_data_by_index
diff --git a/lib/el3_runtime/aarch64/cpu_data.S b/lib/el3_runtime/aarch64/cpu_data.S
deleted file mode 100644
index 02d9415..0000000
--- a/lib/el3_runtime/aarch64/cpu_data.S
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2014-2025, Arm Limited and Contributors. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#include <asm_macros.S>
-#include <lib/el3_runtime/cpu_data.h>
-
-.globl	_cpu_data_by_index
-
-/* -----------------------------------------------------------------
- * cpu_data_t *_cpu_data_by_index(uint32_t cpu_index)
- *
- * Return the cpu_data structure for the CPU with given linear index
- *
- * This can be called without a valid stack.
- * clobbers: x0, x1
- * -----------------------------------------------------------------
- */
-func _cpu_data_by_index
-	mov_imm	x1, CPU_DATA_SIZE
-	mul	x0, x0, x1
-	adrp	x1, percpu_data
-	add	x1, x1, :lo12:percpu_data
-	add	x0, x0, x1
-	ret
-endfunc _cpu_data_by_index
diff --git a/lib/el3_runtime/cpu_data_array.c b/lib/el3_runtime/cpu_data_array.c
index 2056182..f2e97f0 100644
--- a/lib/el3_runtime/cpu_data_array.c
+++ b/lib/el3_runtime/cpu_data_array.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2016, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2025, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -8,6 +8,14 @@
 
 #include <lib/cassert.h>
 #include <lib/el3_runtime/cpu_data.h>
+#include <plat/common/platform.h>
 
 /* The per_cpu_ptr_cache_t space allocation */
 cpu_data_t percpu_data[PLATFORM_CORE_COUNT];
+
+#ifndef __aarch64__
+cpu_data_t *_cpu_data(void)
+{
+	return _cpu_data_by_index(plat_my_core_pos());
+}
+#endif
diff --git a/lib/psci/psci_lib.mk b/lib/psci/psci_lib.mk
index 527ad3a..e1dbec2 100644
--- a/lib/psci/psci_lib.mk
+++ b/lib/psci/psci_lib.mk
@@ -1,11 +1,10 @@
 #
-# Copyright (c) 2016-2024, Arm Limited and Contributors. All rights reserved.
+# Copyright (c) 2016-2025, Arm Limited and Contributors. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #
 
 PSCI_LIB_SOURCES	:=	lib/el3_runtime/cpu_data_array.c	\
-				lib/el3_runtime/${ARCH}/cpu_data.S	\
 				lib/el3_runtime/${ARCH}/context_mgmt.c	\
 				lib/cpus/${ARCH}/cpu_helpers.S		\
 				lib/cpus/errata_report.c		\
diff --git a/lib/psci/psci_setup.c b/lib/psci/psci_setup.c
index 0863a82..44c9bdb 100644
--- a/lib/psci/psci_setup.c
+++ b/lib/psci/psci_setup.c
@@ -63,8 +63,7 @@
 		/* Initialize with an invalid mpidr */
 		psci_cpu_pd_nodes[node_idx].mpidr = PSCI_INVALID_MPIDR;
 
-		svc_cpu_data =
-			&(_cpu_data_by_index(node_idx)->psci_svc_cpu_data);
+		svc_cpu_data = &get_cpu_data_by_index(node_idx, psci_svc_cpu_data);
 
 		/* Set the Affinity Info for the cores as OFF */
 		svc_cpu_data->aff_info_state = AFF_STATE_OFF;