perf(cpus): inline cpu_rev_var checks
We strive to apply errata as close to reset as possible with as few
things enabled as possible. Importantly, the I-cache will not be
enabled. This means that repeated branches to these tiny functions must
be re-fetched all the way from memory each time which has glacial speed.
Cores are allowed to fetch things ahead of time though as long as
execution is fairly linear. So we can trade a little bit of space (3 to
7 instructions per erratum) to keep things linear and not have to go to
memory.
While we're at it, optimise the the cpu_rev_var_{ls, hs, range}
functions to take up less space. Dropping the moves allows for a bit of
assembly magic that produces the same result in 2 and 3 instructions
respectively.
Change-Id: I51608352f23b2244ea7a99e76c10892d257f12bf
Signed-off-by: Boyan Karatotev <boyan.karatotev@arm.com>
diff --git a/include/lib/cpus/aarch64/cpu_macros.S b/include/lib/cpus/aarch64/cpu_macros.S
index ac26fd7..5a8219e 100644
--- a/include/lib/cpus/aarch64/cpu_macros.S
+++ b/include/lib/cpus/aarch64/cpu_macros.S
@@ -479,8 +479,35 @@
.endm
/*
- * Helpers to select which revisions errata apply to. Don't leave a link
- * register as the cpu_rev_var_*** will call the ret and we can save on one.
+ * Helpers to report if an erratum applies. Compares the given revision variant
+ * to the given value. Return ERRATA_APPLIES or ERRATA_NOT_APPLIES accordingly.
+ *
+ * _rev_num: the given revision variant. Or
+ * _rev_num_lo,_rev_num_hi: the lower and upper bounds of the revision variant
+ *
+ * in body:
+ * clobber: x0
+ * argument: x0 - cpu_rev_var
+ */
+.macro cpu_rev_var_ls _rev_num:req
+ cmp x0, #\_rev_num
+ cset x0, ls
+.endm
+
+.macro cpu_rev_var_hs _rev_num:req
+ cmp x0, #\_rev_num
+ cset x0, hs
+.endm
+
+.macro cpu_rev_var_range _rev_num_lo:req, _rev_num_hi:req
+ cmp x0, #\_rev_num_lo
+ mov x1, #\_rev_num_hi
+ ccmp x0, x1, #2, hs
+ cset x0, ls
+.endm
+
+/*
+ * Helpers to select which revisions errata apply to.
*
* _cpu:
* Name of cpu as given to declare_cpu_ops
@@ -496,28 +523,27 @@
* Revision to apply to
*
* in body:
- * clobber: x0 to x4
+ * clobber: x0 to x1
* argument: x0 - cpu_rev_var
*/
.macro check_erratum_ls _cpu:req, _cve:req, _id:req, _rev_num:req
func check_erratum_\_cpu\()_\_id
- mov x1, #\_rev_num
- b cpu_rev_var_ls
+ cpu_rev_var_ls \_rev_num
+ ret
endfunc check_erratum_\_cpu\()_\_id
.endm
.macro check_erratum_hs _cpu:req, _cve:req, _id:req, _rev_num:req
func check_erratum_\_cpu\()_\_id
- mov x1, #\_rev_num
- b cpu_rev_var_hs
+ cpu_rev_var_hs \_rev_num
+ ret
endfunc check_erratum_\_cpu\()_\_id
.endm
.macro check_erratum_range _cpu:req, _cve:req, _id:req, _rev_num_lo:req, _rev_num_hi:req
func check_erratum_\_cpu\()_\_id
- mov x1, #\_rev_num_lo
- mov x2, #\_rev_num_hi
- b cpu_rev_var_range
+ cpu_rev_var_range \_rev_num_lo, \_rev_num_hi
+ ret
endfunc check_erratum_\_cpu\()_\_id
.endm
@@ -532,7 +558,10 @@
endfunc check_erratum_\_cpu\()_\_id
.endm
-/* provide a shorthand for the name format for annoying errata */
+/*
+ * provide a shorthand for the name format for annoying errata
+ * body: clobber x0 to x3
+ */
.macro check_erratum_custom_start _cpu:req, _cve:req, _id:req
func check_erratum_\_cpu\()_\_id
.endm