pmf: Make the runtime instrumentation work on AArch32

Ported the pmf asm macros and the asm code in the bl31 entrypoint
necessary for the instrumentation to AArch32.

Since smc dispatch is handled by the bl32 payload on AArch32, we
provide this service only if AARCH32_SP=sp_min is set.

Signed-off-by: Bence Szépkúti <bence.szepkuti@arm.com>
Change-Id: Id33b7e9762ae86a4f4b40d7f1b37a90e5130c8ac
diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S
index 74b0993..665a05e 100644
--- a/bl31/aarch64/bl31_entrypoint.S
+++ b/bl31/aarch64/bl31_entrypoint.S
@@ -9,7 +9,7 @@
 #include <arch.h>
 #include <common/bl_common.h>
 #include <el3_common_macros.S>
-#include <lib/pmf/pmf_asm_macros.S>
+#include <lib/pmf/aarch64/pmf_asm_macros.S>
 #include <lib/runtime_instr.h>
 #include <lib/xlat_tables/xlat_mmu_helpers.h>
 
diff --git a/bl32/sp_min/aarch32/entrypoint.S b/bl32/sp_min/aarch32/entrypoint.S
index 0a68475..f3a1e44 100644
--- a/bl32/sp_min/aarch32/entrypoint.S
+++ b/bl32/sp_min/aarch32/entrypoint.S
@@ -10,6 +10,9 @@
 #include <common/runtime_svc.h>
 #include <context.h>
 #include <el3_common_macros.S>
+#include <lib/el3_runtime/cpu_data.h>
+#include <lib/pmf/aarch32/pmf_asm_macros.S>
+#include <lib/runtime_instr.h>
 #include <lib/xlat_tables/xlat_tables_defs.h>
 #include <smccc_helpers.h>
 #include <smccc_macros.S>
@@ -164,6 +167,20 @@
 	/* On SMC entry, `sp` points to `smc_ctx_t`. Save `lr`. */
 	str	lr, [sp, #SMC_CTX_LR_MON]
 
+#if ENABLE_RUNTIME_INSTRUMENTATION
+	/*
+	 * Read the timestamp value and store it on top of the C runtime stack.
+	 * The value will be saved to the per-cpu data once the C stack is
+	 * available, as a valid stack is needed to call _cpu_data()
+	 */
+	strd	r0, r1, [sp, #SMC_CTX_GPREG_R0]
+	ldcopr16 r0, r1, CNTPCT_64
+	ldr	lr, [sp, #SMC_CTX_SP_MON]
+	strd	r0, r1, [lr, #-8]!
+	str	lr, [sp, #SMC_CTX_SP_MON]
+	ldrd	r0, r1, [sp, #SMC_CTX_GPREG_R0]
+#endif
+
 	smccc_save_gp_mode_regs
 
 	clrex_on_monitor_entry
@@ -175,6 +192,23 @@
 	mov	r2, sp				/* handle */
 	ldr	sp, [r2, #SMC_CTX_SP_MON]
 
+#if ENABLE_RUNTIME_INSTRUMENTATION
+	/* Save handle to a callee saved register */
+	mov	r6, r2
+
+	/*
+	 * Restore the timestamp value and store it in per-cpu data. The value
+	 * will be extracted from per-cpu data by the C level SMC handler and
+	 * saved to the PMF timestamp region.
+	 */
+	ldrd	r4, r5, [sp], #8
+	bl	_cpu_data
+	strd	r4, r5, [r0, #CPU_DATA_PMF_TS0_OFFSET]
+
+	/* Restore handle */
+	mov	r2, r6
+#endif
+
 	ldr	r0, [r2, #SMC_CTX_SCR]
 	and	r3, r0, #SCR_NS_BIT		/* flags */
 
@@ -239,6 +273,16 @@
  * The Warm boot entrypoint for SP_MIN.
  */
 func sp_min_warm_entrypoint
+#if ENABLE_RUNTIME_INSTRUMENTATION
+	/*
+	 * This timestamp update happens with cache off.  The next
+	 * timestamp collection will need to do cache maintenance prior
+	 * to timestamp update.
+	 */
+	pmf_calc_timestamp_addr rt_instr_svc, RT_INSTR_EXIT_HW_LOW_PWR
+	ldcopr16 r2, r3, CNTPCT_64
+	strd	r2, r3, [r0]
+#endif
 	/*
 	 * On the warm boot path, most of the EL3 initialisations performed by
 	 * 'el3_entrypoint_common' must be skipped:
@@ -295,6 +339,30 @@
 	bl	smc_get_next_ctx
 	/* r0 points to `smc_ctx_t` */
 	/* The PSCI cpu_context registers have been copied to `smc_ctx_t` */
+
+#if ENABLE_RUNTIME_INSTRUMENTATION
+	/* Save smc_ctx_t */
+	mov	r5, r0
+
+	pmf_calc_timestamp_addr rt_instr_svc, RT_INSTR_EXIT_PSCI
+	mov	r4, r0
+
+	/*
+	 * Invalidate before updating timestamp to ensure previous timestamp
+	 * updates on the same cache line with caches disabled are properly
+	 * seen by the same core. Without the cache invalidate, the core might
+	 * write into a stale cache line.
+	 */
+	mov	r1, #PMF_TS_SIZE
+	bl	inv_dcache_range
+
+	ldcopr16 r0, r1, CNTPCT_64
+	strd	r0, r1, [r4]
+
+	/* Restore smc_ctx_t */
+	mov	r0, r5
+#endif
+
 	b	sp_min_exit
 endfunc sp_min_warm_entrypoint
 
diff --git a/bl32/sp_min/sp_min.ld.S b/bl32/sp_min/sp_min.ld.S
index 4559903..6997a7f 100644
--- a/bl32/sp_min/sp_min.ld.S
+++ b/bl32/sp_min/sp_min.ld.S
@@ -55,6 +55,14 @@
         KEEP(*(rt_svc_descs))
         __RT_SVC_DESCS_END__ = .;
 
+#if ENABLE_PMF
+        /* Ensure 4-byte alignment for descriptors and ensure inclusion */
+        . = ALIGN(4);
+        __PMF_SVC_DESCS_START__ = .;
+        KEEP(*(pmf_svc_descs))
+        __PMF_SVC_DESCS_END__ = .;
+#endif /* ENABLE_PMF */
+
         /*
          * Ensure 4-byte alignment for cpu_ops so that its fields are also
          * aligned. Also ensure cpu_ops inclusion.
diff --git a/bl32/sp_min/sp_min_main.c b/bl32/sp_min/sp_min_main.c
index f39e33b..f050160 100644
--- a/bl32/sp_min/sp_min_main.c
+++ b/bl32/sp_min/sp_min_main.c
@@ -19,7 +19,9 @@
 #include <context.h>
 #include <drivers/console.h>
 #include <lib/el3_runtime/context_mgmt.h>
+#include <lib/pmf/pmf.h>
 #include <lib/psci/psci.h>
+#include <lib/runtime_instr.h>
 #include <lib/utils.h>
 #include <plat/common/platform.h>
 #include <platform_sp_min.h>
@@ -28,6 +30,11 @@
 
 #include "sp_min_private.h"
 
+#if ENABLE_RUNTIME_INSTRUMENTATION
+PMF_REGISTER_SERVICE_SMC(rt_instr_svc, PMF_RT_INSTR_SVC_ID,
+	RT_INSTR_TOTAL_IDS, PMF_STORE_ENABLE)
+#endif
+
 /* Pointers to per-core cpu contexts */
 static void *sp_min_cpu_ctx_ptr[PLATFORM_CORE_COUNT];
 
diff --git a/include/lib/pmf/aarch32/pmf_asm_macros.S b/include/lib/pmf/aarch32/pmf_asm_macros.S
new file mode 100644
index 0000000..1dbb408
--- /dev/null
+++ b/include/lib/pmf/aarch32/pmf_asm_macros.S
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2019, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef PMF_ASM_MACROS_S
+#define PMF_ASM_MACROS_S
+
+#define PMF_TS_SIZE	8
+
+	/*
+	 * This macro calculates the address of the per-cpu timestamp
+	 * for the given service name and local timestamp id.
+	 * Clobbers: r0 - r4
+	 */
+	.macro pmf_calc_timestamp_addr _name, _tid
+	mov	r4, lr
+	bl	plat_my_core_pos
+	mov	lr, r4
+	ldr	r1, =__PERCPU_TIMESTAMP_SIZE__
+	mov	r2, #(\_tid * PMF_TS_SIZE)
+	mla	r0, r0, r1, r2
+	ldr	r1, =pmf_ts_mem_\_name
+	add	r0, r0, r1
+	.endm
+
+#endif /* PMF_ASM_MACROS_S */
diff --git a/include/lib/pmf/pmf_asm_macros.S b/include/lib/pmf/aarch64/pmf_asm_macros.S
similarity index 100%
rename from include/lib/pmf/pmf_asm_macros.S
rename to include/lib/pmf/aarch64/pmf_asm_macros.S
diff --git a/plat/arm/common/arm_common.mk b/plat/arm/common/arm_common.mk
index ccb851e..cda39b7 100644
--- a/plat/arm/common/arm_common.mk
+++ b/plat/arm/common/arm_common.mk
@@ -222,6 +222,9 @@
 BL31_SOURCES		+=	plat/arm/common/aarch64/execution_state_switch.c\
 				plat/arm/common/arm_sip_svc.c			\
 				lib/pmf/pmf_smc.c
+else
+BL32_SOURCES		+=	plat/arm/common/arm_sip_svc.c			\
+				lib/pmf/pmf_smc.c
 endif
 endif