armv8: generic_timer: Use event stream for udelay
Polling cntpct_el0 in a tight loop for delays is inefficient.
This is particularly apparent on Arm FVPs, which do not simulate
real time, meaning that a 1s sleep can take a couple of orders
of magnitude longer to execute in wall time.
If running at EL2 or above (where CNTHCTL_EL2 is available), enable
the cntpct_el0 event stream temporarily and use wfe to implement
the delay more efficiently. The event period is chosen as a
trade-off between efficiency and the fact that Arm FVPs do not
typically simulate real time.
This is only implemented for Armv8 boards, where an architectural
timer exists, and only enabled by default for the ARCH_VEXPRESS64
board family.
Signed-off-by: Peter Hoyes <Peter.Hoyes@arm.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
diff --git a/arch/arm/cpu/armv8/generic_timer.c b/arch/arm/cpu/armv8/generic_timer.c
index e4aa5a4..1de7ec5 100644
--- a/arch/arm/cpu/armv8/generic_timer.c
+++ b/arch/arm/cpu/armv8/generic_timer.c
@@ -114,3 +114,30 @@
return val / get_tbclk();
}
+
+#if CONFIG_IS_ENABLED(ARMV8_UDELAY_EVENT_STREAM)
+void __udelay(unsigned long usec)
+{
+ u64 target = get_ticks() + usec_to_tick(usec);
+
+ /* At EL2 or above, use the event stream to avoid polling CNTPCT_EL0 so often */
+ if (current_el() >= 2) {
+ u32 cnthctl_val;
+ const u8 event_period = 0x7;
+
+ asm volatile("mrs %0, cnthctl_el2" : "=r" (cnthctl_val));
+ asm volatile("msr cnthctl_el2, %0" : : "r"
+ (cnthctl_val | CNTHCTL_EL2_EVNT_EN | CNTHCTL_EL2_EVNT_I(event_period)));
+
+ while (get_ticks() + (1ULL << event_period) <= target)
+ wfe();
+
+ /* Reset the event stream */
+ asm volatile("msr cnthctl_el2, %0" : : "r" (cnthctl_val));
+ }
+
+ /* Fall back to polling CNTPCT_EL0 */
+ while (get_ticks() <= target)
+ ;
+}
+#endif