blob: de15d09e36058978b61d1235b943aa19a3b82015 [file] [log] [blame]
Tom Rini10e47792018-05-06 17:58:06 -04001/* SPDX-License-Identifier: GPL-2.0+ */
Marek Vasut14fefa02016-05-26 18:01:40 +02002/*
3 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
4 *
5 * Author: Nicolas Pitre <nico@fluxnic.net>
6 * - contributed to gcc-3.4 on Sep 30, 2003
7 * - adapted for the Linux kernel on Oct 2, 2003
8 */
Tom Rini10e47792018-05-06 17:58:06 -04009/*
10 * Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
Marek Vasut14fefa02016-05-26 18:01:40 +020011 */
12
Marek Vasut14fefa02016-05-26 18:01:40 +020013#include <linux/linkage.h>
14#include <asm/assembler.h>
15
16/*
17 * U-Boot compatibility bit, define empty UNWIND() macro as, since we
Tom Rini42d83082023-01-10 11:19:33 -050018 * do not support stack unwinding to make all of the functions available
19 * without diverging from Linux code.
Marek Vasut14fefa02016-05-26 18:01:40 +020020 */
21#ifdef __UBOOT__
22#define UNWIND(x...)
Marek Vasut14fefa02016-05-26 18:01:40 +020023#endif
24
25.macro ARM_DIV_BODY dividend, divisor, result, curbit
26
27#if __LINUX_ARM_ARCH__ >= 5
28
29 clz \curbit, \divisor
30 clz \result, \dividend
31 sub \result, \curbit, \result
32 mov \curbit, #1
33 mov \divisor, \divisor, lsl \result
34 mov \curbit, \curbit, lsl \result
35 mov \result, #0
Wolfgang Denk9d328a62021-09-27 17:42:38 +020036
Marek Vasut14fefa02016-05-26 18:01:40 +020037#else
38
39 @ Initially shift the divisor left 3 bits if possible,
40 @ set curbit accordingly. This allows for curbit to be located
41 @ at the left end of each 4 bit nibbles in the division loop
42 @ to save one loop in most cases.
43 tst \divisor, #0xe0000000
44 moveq \divisor, \divisor, lsl #3
45 moveq \curbit, #8
46 movne \curbit, #1
47
48 @ Unless the divisor is very big, shift it up in multiples of
49 @ four bits, since this is the amount of unwinding in the main
Wolfgang Denk9d328a62021-09-27 17:42:38 +020050 @ division loop. Continue shifting until the divisor is
Marek Vasut14fefa02016-05-26 18:01:40 +020051 @ larger than the dividend.
521: cmp \divisor, #0x10000000
53 cmplo \divisor, \dividend
54 movlo \divisor, \divisor, lsl #4
55 movlo \curbit, \curbit, lsl #4
56 blo 1b
57
58 @ For very big divisors, we must shift it a bit at a time, or
59 @ we will be in danger of overflowing.
601: cmp \divisor, #0x80000000
61 cmplo \divisor, \dividend
62 movlo \divisor, \divisor, lsl #1
63 movlo \curbit, \curbit, lsl #1
64 blo 1b
65
66 mov \result, #0
67
68#endif
69
70 @ Division loop
711: cmp \dividend, \divisor
72 subhs \dividend, \dividend, \divisor
73 orrhs \result, \result, \curbit
74 cmp \dividend, \divisor, lsr #1
75 subhs \dividend, \dividend, \divisor, lsr #1
76 orrhs \result, \result, \curbit, lsr #1
77 cmp \dividend, \divisor, lsr #2
78 subhs \dividend, \dividend, \divisor, lsr #2
79 orrhs \result, \result, \curbit, lsr #2
80 cmp \dividend, \divisor, lsr #3
81 subhs \dividend, \dividend, \divisor, lsr #3
82 orrhs \result, \result, \curbit, lsr #3
83 cmp \dividend, #0 @ Early termination?
Marek Vasutd0a85862016-05-26 18:01:43 +020084 movsne \curbit, \curbit, lsr #4 @ No, any more bits to do?
Marek Vasut14fefa02016-05-26 18:01:40 +020085 movne \divisor, \divisor, lsr #4
86 bne 1b
87
88.endm
89
90
91.macro ARM_DIV2_ORDER divisor, order
92
93#if __LINUX_ARM_ARCH__ >= 5
94
95 clz \order, \divisor
96 rsb \order, \order, #31
97
98#else
99
100 cmp \divisor, #(1 << 16)
101 movhs \divisor, \divisor, lsr #16
102 movhs \order, #16
103 movlo \order, #0
104
105 cmp \divisor, #(1 << 8)
106 movhs \divisor, \divisor, lsr #8
107 addhs \order, \order, #8
108
109 cmp \divisor, #(1 << 4)
110 movhs \divisor, \divisor, lsr #4
111 addhs \order, \order, #4
112
113 cmp \divisor, #(1 << 2)
114 addhi \order, \order, #3
115 addls \order, \order, \divisor, lsr #1
116
117#endif
118
119.endm
120
121
122.macro ARM_MOD_BODY dividend, divisor, order, spare
123
124#if __LINUX_ARM_ARCH__ >= 5
125
126 clz \order, \divisor
127 clz \spare, \dividend
128 sub \order, \order, \spare
129 mov \divisor, \divisor, lsl \order
130
131#else
132
133 mov \order, #0
134
135 @ Unless the divisor is very big, shift it up in multiples of
136 @ four bits, since this is the amount of unwinding in the main
Wolfgang Denk9d328a62021-09-27 17:42:38 +0200137 @ division loop. Continue shifting until the divisor is
Marek Vasut14fefa02016-05-26 18:01:40 +0200138 @ larger than the dividend.
1391: cmp \divisor, #0x10000000
140 cmplo \divisor, \dividend
141 movlo \divisor, \divisor, lsl #4
142 addlo \order, \order, #4
143 blo 1b
144
145 @ For very big divisors, we must shift it a bit at a time, or
146 @ we will be in danger of overflowing.
1471: cmp \divisor, #0x80000000
148 cmplo \divisor, \dividend
149 movlo \divisor, \divisor, lsl #1
150 addlo \order, \order, #1
151 blo 1b
152
153#endif
154
155 @ Perform all needed subtractions to keep only the reminder.
156 @ Do comparisons in batch of 4 first.
157 subs \order, \order, #3 @ yes, 3 is intended here
158 blt 2f
159
1601: cmp \dividend, \divisor
161 subhs \dividend, \dividend, \divisor
162 cmp \dividend, \divisor, lsr #1
163 subhs \dividend, \dividend, \divisor, lsr #1
164 cmp \dividend, \divisor, lsr #2
165 subhs \dividend, \dividend, \divisor, lsr #2
166 cmp \dividend, \divisor, lsr #3
167 subhs \dividend, \dividend, \divisor, lsr #3
168 cmp \dividend, #1
169 mov \divisor, \divisor, lsr #4
Marek Vasutd0a85862016-05-26 18:01:43 +0200170 subsge \order, \order, #4
Marek Vasut14fefa02016-05-26 18:01:40 +0200171 bge 1b
172
173 tst \order, #3
174 teqne \dividend, #0
175 beq 5f
176
177 @ Either 1, 2 or 3 comparison/subtractions are left.
1782: cmn \order, #2
179 blt 4f
180 beq 3f
181 cmp \dividend, \divisor
182 subhs \dividend, \dividend, \divisor
183 mov \divisor, \divisor, lsr #1
1843: cmp \dividend, \divisor
185 subhs \dividend, \dividend, \divisor
186 mov \divisor, \divisor, lsr #1
1874: cmp \dividend, \divisor
188 subhs \dividend, \dividend, \divisor
1895:
190.endm
191
192
Stephen Warren14d74b02016-06-03 13:05:11 -0600193.pushsection .text.__udivsi3, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200194ENTRY(__udivsi3)
195ENTRY(__aeabi_uidiv)
196UNWIND(.fnstart)
197
198 subs r2, r1, #1
199 reteq lr
200 bcc Ldiv0
201 cmp r0, r1
202 bls 11f
203 tst r1, r2
204 beq 12f
205
206 ARM_DIV_BODY r0, r1, r2, r3
207
208 mov r0, r2
209 ret lr
210
21111: moveq r0, #1
212 movne r0, #0
213 ret lr
214
21512: ARM_DIV2_ORDER r1, r2
216
217 mov r0, r0, lsr r2
218 ret lr
219
220UNWIND(.fnend)
221ENDPROC(__udivsi3)
222ENDPROC(__aeabi_uidiv)
Stephen Warren14d74b02016-06-03 13:05:11 -0600223.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200224
Stephen Warren14d74b02016-06-03 13:05:11 -0600225.pushsection .text.__umodsi3, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200226ENTRY(__umodsi3)
227UNWIND(.fnstart)
228
229 subs r2, r1, #1 @ compare divisor with 1
230 bcc Ldiv0
231 cmpne r0, r1 @ compare dividend with divisor
232 moveq r0, #0
233 tsthi r1, r2 @ see if divisor is power of 2
234 andeq r0, r0, r2
235 retls lr
236
237 ARM_MOD_BODY r0, r1, r2, r3
238
239 ret lr
240
241UNWIND(.fnend)
242ENDPROC(__umodsi3)
Stephen Warren14d74b02016-06-03 13:05:11 -0600243.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200244
Stephen Warren14d74b02016-06-03 13:05:11 -0600245.pushsection .text.__divsi3, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200246ENTRY(__divsi3)
247ENTRY(__aeabi_idiv)
248UNWIND(.fnstart)
249
250 cmp r1, #0
251 eor ip, r0, r1 @ save the sign of the result.
252 beq Ldiv0
253 rsbmi r1, r1, #0 @ loops below use unsigned.
254 subs r2, r1, #1 @ division by 1 or -1 ?
255 beq 10f
256 movs r3, r0
257 rsbmi r3, r0, #0 @ positive dividend value
258 cmp r3, r1
259 bls 11f
260 tst r1, r2 @ divisor is power of 2 ?
261 beq 12f
262
263 ARM_DIV_BODY r3, r1, r0, r2
264
265 cmp ip, #0
266 rsbmi r0, r0, #0
267 ret lr
268
26910: teq ip, r0 @ same sign ?
270 rsbmi r0, r0, #0
271 ret lr
272
27311: movlo r0, #0
274 moveq r0, ip, asr #31
275 orreq r0, r0, #1
276 ret lr
277
27812: ARM_DIV2_ORDER r1, r2
279
280 cmp ip, #0
281 mov r0, r3, lsr r2
282 rsbmi r0, r0, #0
283 ret lr
284
285UNWIND(.fnend)
286ENDPROC(__divsi3)
287ENDPROC(__aeabi_idiv)
Stephen Warren14d74b02016-06-03 13:05:11 -0600288.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200289
Stephen Warren14d74b02016-06-03 13:05:11 -0600290.pushsection .text.__modsi3, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200291ENTRY(__modsi3)
292UNWIND(.fnstart)
293
294 cmp r1, #0
295 beq Ldiv0
296 rsbmi r1, r1, #0 @ loops below use unsigned.
297 movs ip, r0 @ preserve sign of dividend
298 rsbmi r0, r0, #0 @ if negative make positive
299 subs r2, r1, #1 @ compare divisor with 1
300 cmpne r0, r1 @ compare dividend with divisor
301 moveq r0, #0
302 tsthi r1, r2 @ see if divisor is power of 2
303 andeq r0, r0, r2
304 bls 10f
305
306 ARM_MOD_BODY r0, r1, r2, r3
307
30810: cmp ip, #0
309 rsbmi r0, r0, #0
310 ret lr
311
312UNWIND(.fnend)
313ENDPROC(__modsi3)
Stephen Warren14d74b02016-06-03 13:05:11 -0600314.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200315
Stephen Warren14d74b02016-06-03 13:05:11 -0600316.pushsection .text.__aeabi_uidivmod, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200317ENTRY(__aeabi_uidivmod)
318UNWIND(.fnstart)
319UNWIND(.save {r0, r1, ip, lr} )
320
321 stmfd sp!, {r0, r1, ip, lr}
322 bl __aeabi_uidiv
323 ldmfd sp!, {r1, r2, ip, lr}
324 mul r3, r0, r2
325 sub r1, r1, r3
326 ret lr
327
328UNWIND(.fnend)
329ENDPROC(__aeabi_uidivmod)
Stephen Warren14d74b02016-06-03 13:05:11 -0600330.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200331
Stephen Warren14d74b02016-06-03 13:05:11 -0600332.pushsection .text.__aeabi_uidivmod, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200333ENTRY(__aeabi_idivmod)
334UNWIND(.fnstart)
335UNWIND(.save {r0, r1, ip, lr} )
Marek Vasut71cfb332016-05-26 18:01:46 +0200336
Marek Vasut14fefa02016-05-26 18:01:40 +0200337 stmfd sp!, {r0, r1, ip, lr}
338 bl __aeabi_idiv
339 ldmfd sp!, {r1, r2, ip, lr}
340 mul r3, r0, r2
341 sub r1, r1, r3
342 ret lr
343
344UNWIND(.fnend)
345ENDPROC(__aeabi_idivmod)
Stephen Warren14d74b02016-06-03 13:05:11 -0600346.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200347
Stephen Warren14d74b02016-06-03 13:05:11 -0600348.pushsection .text.Ldiv0, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200349Ldiv0:
350UNWIND(.fnstart)
351UNWIND(.pad #4)
352UNWIND(.save {lr})
Marek Vasut71cfb332016-05-26 18:01:46 +0200353
Marek Vasut14fefa02016-05-26 18:01:40 +0200354 str lr, [sp, #-8]!
355 bl __div0
356 mov r0, #0 @ About as wrong as it could be.
357 ldr pc, [sp], #8
Marek Vasut71cfb332016-05-26 18:01:46 +0200358
Marek Vasut14fefa02016-05-26 18:01:40 +0200359UNWIND(.fnend)
360ENDPROC(Ldiv0)
Stephen Warren14d74b02016-06-03 13:05:11 -0600361.popsection
Marek Vasute92cb0b2016-05-26 18:01:45 +0200362
363/* Thumb-1 specialities */
Tom Rini1c640a62017-03-18 09:01:44 -0400364#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
Marek Vasut0529a672016-06-05 00:46:55 +0200365.pushsection .text.__gnu_thumb1_case_sqi, "ax"
Marek Vasute92cb0b2016-05-26 18:01:45 +0200366ENTRY(__gnu_thumb1_case_sqi)
367 push {r1}
368 mov r1, lr
369 lsrs r1, r1, #1
370 lsls r1, r1, #1
371 ldrsb r1, [r1, r0]
372 lsls r1, r1, #1
373 add lr, lr, r1
374 pop {r1}
Sergei Antonov85f8c352022-08-21 16:34:20 +0300375 ret lr
Marek Vasute92cb0b2016-05-26 18:01:45 +0200376ENDPROC(__gnu_thumb1_case_sqi)
Stephen Warren14d74b02016-06-03 13:05:11 -0600377.popsection
Marek Vasute92cb0b2016-05-26 18:01:45 +0200378
Marek Vasut0529a672016-06-05 00:46:55 +0200379.pushsection .text.__gnu_thumb1_case_uqi, "ax"
Marek Vasute92cb0b2016-05-26 18:01:45 +0200380ENTRY(__gnu_thumb1_case_uqi)
381 push {r1}
382 mov r1, lr
383 lsrs r1, r1, #1
384 lsls r1, r1, #1
385 ldrb r1, [r1, r0]
386 lsls r1, r1, #1
387 add lr, lr, r1
388 pop {r1}
Sergei Antonov85f8c352022-08-21 16:34:20 +0300389 ret lr
Marek Vasute92cb0b2016-05-26 18:01:45 +0200390ENDPROC(__gnu_thumb1_case_uqi)
Stephen Warren14d74b02016-06-03 13:05:11 -0600391.popsection
Marek Vasute92cb0b2016-05-26 18:01:45 +0200392
Marek Vasut71cfb332016-05-26 18:01:46 +0200393.pushsection .text.__gnu_thumb1_case_shi, "ax"
Stephen Warren14d74b02016-06-03 13:05:11 -0600394ENTRY(__gnu_thumb1_case_shi)
Marek Vasute92cb0b2016-05-26 18:01:45 +0200395 push {r0, r1}
396 mov r1, lr
397 lsrs r1, r1, #1
398 lsls r0, r0, #1
399 lsls r1, r1, #1
400 ldrsh r1, [r1, r0]
401 lsls r1, r1, #1
402 add lr, lr, r1
403 pop {r0, r1}
Sergei Antonov85f8c352022-08-21 16:34:20 +0300404 ret lr
Marek Vasute92cb0b2016-05-26 18:01:45 +0200405ENDPROC(__gnu_thumb1_case_shi)
Stephen Warren14d74b02016-06-03 13:05:11 -0600406.popsection
Marek Vasute92cb0b2016-05-26 18:01:45 +0200407
Marek Vasut71cfb332016-05-26 18:01:46 +0200408.pushsection .text.__gnu_thumb1_case_uhi, "ax"
Stephen Warren14d74b02016-06-03 13:05:11 -0600409ENTRY(__gnu_thumb1_case_uhi)
Marek Vasute92cb0b2016-05-26 18:01:45 +0200410 push {r0, r1}
411 mov r1, lr
412 lsrs r1, r1, #1
413 lsls r0, r0, #1
414 lsls r1, r1, #1
415 ldrh r1, [r1, r0]
416 lsls r1, r1, #1
417 add lr, lr, r1
418 pop {r0, r1}
Sergei Antonov85f8c352022-08-21 16:34:20 +0300419 ret lr
Marek Vasute92cb0b2016-05-26 18:01:45 +0200420ENDPROC(__gnu_thumb1_case_uhi)
Stephen Warren14d74b02016-06-03 13:05:11 -0600421.popsection
Marek Vasute92cb0b2016-05-26 18:01:45 +0200422#endif