armv8: Reduce exception handling code
The arm64 exception handling code is quite big, mostly due to
architectural alignment requirements. Each exception entry spans 32
instructions, which sounds generous, but is too small to fit all of the
save/branch/restore code in there. So at the moment we use only four
instructions, branching into shared save and restore routines.
To not leave the space for those remaining 28 instructions wasted, let's
split the save and restore routines and stuff them into the gaps.
This saves about 250 bytes of code, which is helpful for those tight
SPLs.
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
diff --git a/arch/arm/cpu/armv8/exceptions.S b/arch/arm/cpu/armv8/exceptions.S
index 1a78a5d..a15af72 100644
--- a/arch/arm/cpu/armv8/exceptions.S
+++ b/arch/arm/cpu/armv8/exceptions.S
@@ -11,7 +11,26 @@
#include <linux/linkage.h>
/*
- * Exception vectors.
+ * AArch64 exception vectors:
+ * We have four types of exceptions:
+ * - synchronous: traps, data aborts, undefined instructions, ...
+ * - IRQ: group 1 (normal) interrupts
+ * - FIQ: group 0 or secure interrupts
+ * - SError: fatal system errors
+ * There are entries for all four of those for different contexts:
+ * - from same exception level, when using the SP_EL0 stack pointer
+ * - from same exception level, when using the SP_ELx stack pointer
+ * - from lower exception level, when this is AArch64
+ * - from lower exception level, when this is AArch32
+ * Each of those 16 entries have space for 32 instructions, each entry must
+ * be 128 byte aligned, the whole table must be 2K aligned.
+ * The 32 instructions are not enough to save and restore all registers and
+ * to branch to the actual handler, so we split this up:
+ * Each entry saves the LR, branches to the save routine, then to the actual
+ * handler, then to the restore routine. The save and restore routines are
+ * each split in half and stuffed in the unused gap between the entries.
+ * Also as we do not run anything in a lower exception level, we just provide
+ * the first 8 entries for exceptions from the same EL.
*/
.align 11
.globl vectors
@@ -22,52 +41,9 @@
bl do_bad_sync
b exception_exit
- .align 7 /* Current EL IRQ Thread */
- stp x29, x30, [sp, #-16]!
- bl _exception_entry
- bl do_bad_irq
- b exception_exit
-
- .align 7 /* Current EL FIQ Thread */
- stp x29, x30, [sp, #-16]!
- bl _exception_entry
- bl do_bad_fiq
- b exception_exit
-
- .align 7 /* Current EL Error Thread */
- stp x29, x30, [sp, #-16]!
- bl _exception_entry
- bl do_bad_error
- b exception_exit
-
- .align 7 /* Current EL Synchronous Handler */
- stp x29, x30, [sp, #-16]!
- bl _exception_entry
- bl do_sync
- b exception_exit
-
- .align 7 /* Current EL IRQ Handler */
- stp x29, x30, [sp, #-16]!
- bl _exception_entry
- bl do_irq
- b exception_exit
-
- .align 7 /* Current EL FIQ Handler */
- stp x29, x30, [sp, #-16]!
- bl _exception_entry
- bl do_fiq
- b exception_exit
-
- .align 7 /* Current EL Error Handler */
- stp x29, x30, [sp, #-16]!
- bl _exception_entry
- bl do_error
- b exception_exit
-
/*
- * Enter Exception.
- * This will save the processor state that is ELR/X0~X30
- * to the stack frame.
+ * Save (most of) the GP registers to the stack frame.
+ * This is the first part of the shared routine called into from all entries.
*/
_exception_entry:
stp x27, x28, [sp, #-16]!
@@ -84,7 +60,19 @@
stp x5, x6, [sp, #-16]!
stp x3, x4, [sp, #-16]!
stp x1, x2, [sp, #-16]!
+ b _save_el_regs /* jump to the second part */
+ .align 7 /* Current EL IRQ Thread */
+ stp x29, x30, [sp, #-16]!
+ bl _exception_entry
+ bl do_bad_irq
+ b exception_exit
+
+/*
+ * Save exception specific context: ESR and ELR, for all exception levels.
+ * This is the second part of the shared routine called into from all entries.
+ */
+_save_el_regs:
/* Could be running at EL3/EL2/EL1 */
switch_el x11, 3f, 2f, 1f
3: mrs x1, esr_el3
@@ -100,16 +88,36 @@
mov x0, sp
ret
-
+ .align 7 /* Current EL FIQ Thread */
+ stp x29, x30, [sp, #-16]!
+ bl _exception_entry
+ bl do_bad_fiq
+ /* falling through to _exception_exit */
+/*
+ * Restore the exception return address, for all exception levels.
+ * This is the first part of the shared routine called into from all entries.
+ */
exception_exit:
ldp x2, x0, [sp],#16
switch_el x11, 3f, 2f, 1f
3: msr elr_el3, x2
- b 0f
+ b _restore_regs
2: msr elr_el2, x2
- b 0f
+ b _restore_regs
1: msr elr_el1, x2
-0:
+ b _restore_regs /* jump to the second part */
+
+ .align 7 /* Current EL Error Thread */
+ stp x29, x30, [sp, #-16]!
+ bl _exception_entry
+ bl do_bad_error
+ b exception_exit
+
+/*
+ * Restore the general purpose registers from the exception stack, then return.
+ * This is the second part of the shared routine called into from all entries.
+ */
+_restore_regs:
ldp x1, x2, [sp],#16
ldp x3, x4, [sp],#16
ldp x5, x6, [sp],#16
@@ -126,3 +134,27 @@
ldp x27, x28, [sp],#16
ldp x29, x30, [sp],#16
eret
+
+ .align 7 /* Current EL (SP_ELx) Synchronous Handler */
+ stp x29, x30, [sp, #-16]!
+ bl _exception_entry
+ bl do_sync
+ b exception_exit
+
+ .align 7 /* Current EL (SP_ELx) IRQ Handler */
+ stp x29, x30, [sp, #-16]!
+ bl _exception_entry
+ bl do_irq
+ b exception_exit
+
+ .align 7 /* Current EL (SP_ELx) FIQ Handler */
+ stp x29, x30, [sp, #-16]!
+ bl _exception_entry
+ bl do_fiq
+ b exception_exit
+
+ .align 7 /* Current EL (SP_ELx) Error Handler */
+ stp x29, x30, [sp, #-16]!
+ bl _exception_entry
+ bl do_error
+ b exception_exit