pmf: Make the runtime instrumentation work on AArch32

Ported the pmf asm macros and the asm code in the bl31 entrypoint
necessary for the instrumentation to AArch32.

Since smc dispatch is handled by the bl32 payload on AArch32, we
provide this service only if AARCH32_SP=sp_min is set.

Signed-off-by: Bence Szépkúti <bence.szepkuti@arm.com>
Change-Id: Id33b7e9762ae86a4f4b40d7f1b37a90e5130c8ac
diff --git a/bl32/sp_min/aarch32/entrypoint.S b/bl32/sp_min/aarch32/entrypoint.S
index 0a68475..f3a1e44 100644
--- a/bl32/sp_min/aarch32/entrypoint.S
+++ b/bl32/sp_min/aarch32/entrypoint.S
@@ -10,6 +10,9 @@
 #include <common/runtime_svc.h>
 #include <context.h>
 #include <el3_common_macros.S>
+#include <lib/el3_runtime/cpu_data.h>
+#include <lib/pmf/aarch32/pmf_asm_macros.S>
+#include <lib/runtime_instr.h>
 #include <lib/xlat_tables/xlat_tables_defs.h>
 #include <smccc_helpers.h>
 #include <smccc_macros.S>
@@ -164,6 +167,20 @@
 	/* On SMC entry, `sp` points to `smc_ctx_t`. Save `lr`. */
 	str	lr, [sp, #SMC_CTX_LR_MON]
 
+#if ENABLE_RUNTIME_INSTRUMENTATION
+	/*
+	 * Read the timestamp value and store it on top of the C runtime stack.
+	 * The value will be saved to the per-cpu data once the C stack is
+	 * available, as a valid stack is needed to call _cpu_data()
+	 */
+	strd	r0, r1, [sp, #SMC_CTX_GPREG_R0]
+	ldcopr16 r0, r1, CNTPCT_64
+	ldr	lr, [sp, #SMC_CTX_SP_MON]
+	strd	r0, r1, [lr, #-8]!
+	str	lr, [sp, #SMC_CTX_SP_MON]
+	ldrd	r0, r1, [sp, #SMC_CTX_GPREG_R0]
+#endif
+
 	smccc_save_gp_mode_regs
 
 	clrex_on_monitor_entry
@@ -175,6 +192,23 @@
 	mov	r2, sp				/* handle */
 	ldr	sp, [r2, #SMC_CTX_SP_MON]
 
+#if ENABLE_RUNTIME_INSTRUMENTATION
+	/* Save handle to a callee saved register */
+	mov	r6, r2
+
+	/*
+	 * Restore the timestamp value and store it in per-cpu data. The value
+	 * will be extracted from per-cpu data by the C level SMC handler and
+	 * saved to the PMF timestamp region.
+	 */
+	ldrd	r4, r5, [sp], #8
+	bl	_cpu_data
+	strd	r4, r5, [r0, #CPU_DATA_PMF_TS0_OFFSET]
+
+	/* Restore handle */
+	mov	r2, r6
+#endif
+
 	ldr	r0, [r2, #SMC_CTX_SCR]
 	and	r3, r0, #SCR_NS_BIT		/* flags */
 
@@ -239,6 +273,16 @@
  * The Warm boot entrypoint for SP_MIN.
  */
 func sp_min_warm_entrypoint
+#if ENABLE_RUNTIME_INSTRUMENTATION
+	/*
+	 * This timestamp update happens with cache off.  The next
+	 * timestamp collection will need to do cache maintenance prior
+	 * to timestamp update.
+	 */
+	pmf_calc_timestamp_addr rt_instr_svc, RT_INSTR_EXIT_HW_LOW_PWR
+	ldcopr16 r2, r3, CNTPCT_64
+	strd	r2, r3, [r0]
+#endif
 	/*
 	 * On the warm boot path, most of the EL3 initialisations performed by
 	 * 'el3_entrypoint_common' must be skipped:
@@ -295,6 +339,30 @@
 	bl	smc_get_next_ctx
 	/* r0 points to `smc_ctx_t` */
 	/* The PSCI cpu_context registers have been copied to `smc_ctx_t` */
+
+#if ENABLE_RUNTIME_INSTRUMENTATION
+	/* Save smc_ctx_t */
+	mov	r5, r0
+
+	pmf_calc_timestamp_addr rt_instr_svc, RT_INSTR_EXIT_PSCI
+	mov	r4, r0
+
+	/*
+	 * Invalidate before updating timestamp to ensure previous timestamp
+	 * updates on the same cache line with caches disabled are properly
+	 * seen by the same core. Without the cache invalidate, the core might
+	 * write into a stale cache line.
+	 */
+	mov	r1, #PMF_TS_SIZE
+	bl	inv_dcache_range
+
+	ldcopr16 r0, r1, CNTPCT_64
+	strd	r0, r1, [r4]
+
+	/* Restore smc_ctx_t */
+	mov	r0, r5
+#endif
+
 	b	sp_min_exit
 endfunc sp_min_warm_entrypoint
 
diff --git a/bl32/sp_min/sp_min.ld.S b/bl32/sp_min/sp_min.ld.S
index 4559903..6997a7f 100644
--- a/bl32/sp_min/sp_min.ld.S
+++ b/bl32/sp_min/sp_min.ld.S
@@ -55,6 +55,14 @@
         KEEP(*(rt_svc_descs))
         __RT_SVC_DESCS_END__ = .;
 
+#if ENABLE_PMF
+        /* Ensure 4-byte alignment for descriptors and ensure inclusion */
+        . = ALIGN(4);
+        __PMF_SVC_DESCS_START__ = .;
+        KEEP(*(pmf_svc_descs))
+        __PMF_SVC_DESCS_END__ = .;
+#endif /* ENABLE_PMF */
+
         /*
          * Ensure 4-byte alignment for cpu_ops so that its fields are also
          * aligned. Also ensure cpu_ops inclusion.
diff --git a/bl32/sp_min/sp_min_main.c b/bl32/sp_min/sp_min_main.c
index f39e33b..f050160 100644
--- a/bl32/sp_min/sp_min_main.c
+++ b/bl32/sp_min/sp_min_main.c
@@ -19,7 +19,9 @@
 #include <context.h>
 #include <drivers/console.h>
 #include <lib/el3_runtime/context_mgmt.h>
+#include <lib/pmf/pmf.h>
 #include <lib/psci/psci.h>
+#include <lib/runtime_instr.h>
 #include <lib/utils.h>
 #include <plat/common/platform.h>
 #include <platform_sp_min.h>
@@ -28,6 +30,11 @@
 
 #include "sp_min_private.h"
 
+#if ENABLE_RUNTIME_INSTRUMENTATION
+PMF_REGISTER_SERVICE_SMC(rt_instr_svc, PMF_RT_INSTR_SVC_ID,
+	RT_INSTR_TOTAL_IDS, PMF_STORE_ENABLE)
+#endif
+
 /* Pointers to per-core cpu contexts */
 static void *sp_min_cpu_ctx_ptr[PLATFORM_CORE_COUNT];