blob: 6e29cbf2ab825616788c92b6f16489c33d23b328 [file] [log] [blame]
Marek Vasut14fefa02016-05-26 18:01:40 +02001/*
2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
3 *
4 * Author: Nicolas Pitre <nico@fluxnic.net>
5 * - contributed to gcc-3.4 on Sep 30, 2003
6 * - adapted for the Linux kernel on Oct 2, 2003
7 */
8
9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
10
11 * SPDX-License-Identifier: GPL-2.0+
12 */
13
14
15#include <linux/linkage.h>
16#include <asm/assembler.h>
17
18/*
19 * U-Boot compatibility bit, define empty UNWIND() macro as, since we
20 * do not support stack unwinding and define CONFIG_AEABI to make all
21 * of the functions available without diverging from Linux code.
22 */
23#ifdef __UBOOT__
24#define UNWIND(x...)
25#define CONFIG_AEABI
26#endif
27
28.macro ARM_DIV_BODY dividend, divisor, result, curbit
29
30#if __LINUX_ARM_ARCH__ >= 5
31
32 clz \curbit, \divisor
33 clz \result, \dividend
34 sub \result, \curbit, \result
35 mov \curbit, #1
36 mov \divisor, \divisor, lsl \result
37 mov \curbit, \curbit, lsl \result
38 mov \result, #0
39
40#else
41
42 @ Initially shift the divisor left 3 bits if possible,
43 @ set curbit accordingly. This allows for curbit to be located
44 @ at the left end of each 4 bit nibbles in the division loop
45 @ to save one loop in most cases.
46 tst \divisor, #0xe0000000
47 moveq \divisor, \divisor, lsl #3
48 moveq \curbit, #8
49 movne \curbit, #1
50
51 @ Unless the divisor is very big, shift it up in multiples of
52 @ four bits, since this is the amount of unwinding in the main
53 @ division loop. Continue shifting until the divisor is
54 @ larger than the dividend.
551: cmp \divisor, #0x10000000
56 cmplo \divisor, \dividend
57 movlo \divisor, \divisor, lsl #4
58 movlo \curbit, \curbit, lsl #4
59 blo 1b
60
61 @ For very big divisors, we must shift it a bit at a time, or
62 @ we will be in danger of overflowing.
631: cmp \divisor, #0x80000000
64 cmplo \divisor, \dividend
65 movlo \divisor, \divisor, lsl #1
66 movlo \curbit, \curbit, lsl #1
67 blo 1b
68
69 mov \result, #0
70
71#endif
72
73 @ Division loop
741: cmp \dividend, \divisor
75 subhs \dividend, \dividend, \divisor
76 orrhs \result, \result, \curbit
77 cmp \dividend, \divisor, lsr #1
78 subhs \dividend, \dividend, \divisor, lsr #1
79 orrhs \result, \result, \curbit, lsr #1
80 cmp \dividend, \divisor, lsr #2
81 subhs \dividend, \dividend, \divisor, lsr #2
82 orrhs \result, \result, \curbit, lsr #2
83 cmp \dividend, \divisor, lsr #3
84 subhs \dividend, \dividend, \divisor, lsr #3
85 orrhs \result, \result, \curbit, lsr #3
86 cmp \dividend, #0 @ Early termination?
Marek Vasutd0a85862016-05-26 18:01:43 +020087 movsne \curbit, \curbit, lsr #4 @ No, any more bits to do?
Marek Vasut14fefa02016-05-26 18:01:40 +020088 movne \divisor, \divisor, lsr #4
89 bne 1b
90
91.endm
92
93
94.macro ARM_DIV2_ORDER divisor, order
95
96#if __LINUX_ARM_ARCH__ >= 5
97
98 clz \order, \divisor
99 rsb \order, \order, #31
100
101#else
102
103 cmp \divisor, #(1 << 16)
104 movhs \divisor, \divisor, lsr #16
105 movhs \order, #16
106 movlo \order, #0
107
108 cmp \divisor, #(1 << 8)
109 movhs \divisor, \divisor, lsr #8
110 addhs \order, \order, #8
111
112 cmp \divisor, #(1 << 4)
113 movhs \divisor, \divisor, lsr #4
114 addhs \order, \order, #4
115
116 cmp \divisor, #(1 << 2)
117 addhi \order, \order, #3
118 addls \order, \order, \divisor, lsr #1
119
120#endif
121
122.endm
123
124
125.macro ARM_MOD_BODY dividend, divisor, order, spare
126
127#if __LINUX_ARM_ARCH__ >= 5
128
129 clz \order, \divisor
130 clz \spare, \dividend
131 sub \order, \order, \spare
132 mov \divisor, \divisor, lsl \order
133
134#else
135
136 mov \order, #0
137
138 @ Unless the divisor is very big, shift it up in multiples of
139 @ four bits, since this is the amount of unwinding in the main
140 @ division loop. Continue shifting until the divisor is
141 @ larger than the dividend.
1421: cmp \divisor, #0x10000000
143 cmplo \divisor, \dividend
144 movlo \divisor, \divisor, lsl #4
145 addlo \order, \order, #4
146 blo 1b
147
148 @ For very big divisors, we must shift it a bit at a time, or
149 @ we will be in danger of overflowing.
1501: cmp \divisor, #0x80000000
151 cmplo \divisor, \dividend
152 movlo \divisor, \divisor, lsl #1
153 addlo \order, \order, #1
154 blo 1b
155
156#endif
157
158 @ Perform all needed subtractions to keep only the reminder.
159 @ Do comparisons in batch of 4 first.
160 subs \order, \order, #3 @ yes, 3 is intended here
161 blt 2f
162
1631: cmp \dividend, \divisor
164 subhs \dividend, \dividend, \divisor
165 cmp \dividend, \divisor, lsr #1
166 subhs \dividend, \dividend, \divisor, lsr #1
167 cmp \dividend, \divisor, lsr #2
168 subhs \dividend, \dividend, \divisor, lsr #2
169 cmp \dividend, \divisor, lsr #3
170 subhs \dividend, \dividend, \divisor, lsr #3
171 cmp \dividend, #1
172 mov \divisor, \divisor, lsr #4
Marek Vasutd0a85862016-05-26 18:01:43 +0200173 subsge \order, \order, #4
Marek Vasut14fefa02016-05-26 18:01:40 +0200174 bge 1b
175
176 tst \order, #3
177 teqne \dividend, #0
178 beq 5f
179
180 @ Either 1, 2 or 3 comparison/subtractions are left.
1812: cmn \order, #2
182 blt 4f
183 beq 3f
184 cmp \dividend, \divisor
185 subhs \dividend, \dividend, \divisor
186 mov \divisor, \divisor, lsr #1
1873: cmp \dividend, \divisor
188 subhs \dividend, \dividend, \divisor
189 mov \divisor, \divisor, lsr #1
1904: cmp \dividend, \divisor
191 subhs \dividend, \dividend, \divisor
1925:
193.endm
194
195
Stephen Warren14d74b02016-06-03 13:05:11 -0600196.pushsection .text.__udivsi3, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200197ENTRY(__udivsi3)
198ENTRY(__aeabi_uidiv)
199UNWIND(.fnstart)
200
201 subs r2, r1, #1
202 reteq lr
203 bcc Ldiv0
204 cmp r0, r1
205 bls 11f
206 tst r1, r2
207 beq 12f
208
209 ARM_DIV_BODY r0, r1, r2, r3
210
211 mov r0, r2
212 ret lr
213
21411: moveq r0, #1
215 movne r0, #0
216 ret lr
217
21812: ARM_DIV2_ORDER r1, r2
219
220 mov r0, r0, lsr r2
221 ret lr
222
223UNWIND(.fnend)
224ENDPROC(__udivsi3)
225ENDPROC(__aeabi_uidiv)
Stephen Warren14d74b02016-06-03 13:05:11 -0600226.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200227
Stephen Warren14d74b02016-06-03 13:05:11 -0600228.pushsection .text.__umodsi3, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200229ENTRY(__umodsi3)
230UNWIND(.fnstart)
231
232 subs r2, r1, #1 @ compare divisor with 1
233 bcc Ldiv0
234 cmpne r0, r1 @ compare dividend with divisor
235 moveq r0, #0
236 tsthi r1, r2 @ see if divisor is power of 2
237 andeq r0, r0, r2
238 retls lr
239
240 ARM_MOD_BODY r0, r1, r2, r3
241
242 ret lr
243
244UNWIND(.fnend)
245ENDPROC(__umodsi3)
Stephen Warren14d74b02016-06-03 13:05:11 -0600246.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200247
Stephen Warren14d74b02016-06-03 13:05:11 -0600248.pushsection .text.__divsi3, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200249ENTRY(__divsi3)
250ENTRY(__aeabi_idiv)
251UNWIND(.fnstart)
252
253 cmp r1, #0
254 eor ip, r0, r1 @ save the sign of the result.
255 beq Ldiv0
256 rsbmi r1, r1, #0 @ loops below use unsigned.
257 subs r2, r1, #1 @ division by 1 or -1 ?
258 beq 10f
259 movs r3, r0
260 rsbmi r3, r0, #0 @ positive dividend value
261 cmp r3, r1
262 bls 11f
263 tst r1, r2 @ divisor is power of 2 ?
264 beq 12f
265
266 ARM_DIV_BODY r3, r1, r0, r2
267
268 cmp ip, #0
269 rsbmi r0, r0, #0
270 ret lr
271
27210: teq ip, r0 @ same sign ?
273 rsbmi r0, r0, #0
274 ret lr
275
27611: movlo r0, #0
277 moveq r0, ip, asr #31
278 orreq r0, r0, #1
279 ret lr
280
28112: ARM_DIV2_ORDER r1, r2
282
283 cmp ip, #0
284 mov r0, r3, lsr r2
285 rsbmi r0, r0, #0
286 ret lr
287
288UNWIND(.fnend)
289ENDPROC(__divsi3)
290ENDPROC(__aeabi_idiv)
Stephen Warren14d74b02016-06-03 13:05:11 -0600291.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200292
Stephen Warren14d74b02016-06-03 13:05:11 -0600293.pushsection .text.__modsi3, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200294ENTRY(__modsi3)
295UNWIND(.fnstart)
296
297 cmp r1, #0
298 beq Ldiv0
299 rsbmi r1, r1, #0 @ loops below use unsigned.
300 movs ip, r0 @ preserve sign of dividend
301 rsbmi r0, r0, #0 @ if negative make positive
302 subs r2, r1, #1 @ compare divisor with 1
303 cmpne r0, r1 @ compare dividend with divisor
304 moveq r0, #0
305 tsthi r1, r2 @ see if divisor is power of 2
306 andeq r0, r0, r2
307 bls 10f
308
309 ARM_MOD_BODY r0, r1, r2, r3
310
31110: cmp ip, #0
312 rsbmi r0, r0, #0
313 ret lr
314
315UNWIND(.fnend)
316ENDPROC(__modsi3)
Stephen Warren14d74b02016-06-03 13:05:11 -0600317.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200318
319#ifdef CONFIG_AEABI
320
Stephen Warren14d74b02016-06-03 13:05:11 -0600321.pushsection .text.__aeabi_uidivmod, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200322ENTRY(__aeabi_uidivmod)
323UNWIND(.fnstart)
324UNWIND(.save {r0, r1, ip, lr} )
325
326 stmfd sp!, {r0, r1, ip, lr}
327 bl __aeabi_uidiv
328 ldmfd sp!, {r1, r2, ip, lr}
329 mul r3, r0, r2
330 sub r1, r1, r3
331 ret lr
332
333UNWIND(.fnend)
334ENDPROC(__aeabi_uidivmod)
Stephen Warren14d74b02016-06-03 13:05:11 -0600335.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200336
Stephen Warren14d74b02016-06-03 13:05:11 -0600337.pushsection .text.__aeabi_uidivmod, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200338ENTRY(__aeabi_idivmod)
339UNWIND(.fnstart)
340UNWIND(.save {r0, r1, ip, lr} )
Marek Vasut71cfb332016-05-26 18:01:46 +0200341
Marek Vasut14fefa02016-05-26 18:01:40 +0200342 stmfd sp!, {r0, r1, ip, lr}
343 bl __aeabi_idiv
344 ldmfd sp!, {r1, r2, ip, lr}
345 mul r3, r0, r2
346 sub r1, r1, r3
347 ret lr
348
349UNWIND(.fnend)
350ENDPROC(__aeabi_idivmod)
Stephen Warren14d74b02016-06-03 13:05:11 -0600351.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200352
353#endif
354
Stephen Warren14d74b02016-06-03 13:05:11 -0600355.pushsection .text.Ldiv0, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200356Ldiv0:
357UNWIND(.fnstart)
358UNWIND(.pad #4)
359UNWIND(.save {lr})
Marek Vasut71cfb332016-05-26 18:01:46 +0200360
Marek Vasut14fefa02016-05-26 18:01:40 +0200361 str lr, [sp, #-8]!
362 bl __div0
363 mov r0, #0 @ About as wrong as it could be.
364 ldr pc, [sp], #8
Marek Vasut71cfb332016-05-26 18:01:46 +0200365
Marek Vasut14fefa02016-05-26 18:01:40 +0200366UNWIND(.fnend)
367ENDPROC(Ldiv0)
Stephen Warren14d74b02016-06-03 13:05:11 -0600368.popsection
Marek Vasute92cb0b2016-05-26 18:01:45 +0200369
370/* Thumb-1 specialities */
Tom Rini1c640a62017-03-18 09:01:44 -0400371#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
Marek Vasut0529a672016-06-05 00:46:55 +0200372.pushsection .text.__gnu_thumb1_case_sqi, "ax"
Marek Vasute92cb0b2016-05-26 18:01:45 +0200373ENTRY(__gnu_thumb1_case_sqi)
374 push {r1}
375 mov r1, lr
376 lsrs r1, r1, #1
377 lsls r1, r1, #1
378 ldrsb r1, [r1, r0]
379 lsls r1, r1, #1
380 add lr, lr, r1
381 pop {r1}
382 bx lr
383ENDPROC(__gnu_thumb1_case_sqi)
Stephen Warren14d74b02016-06-03 13:05:11 -0600384.popsection
Marek Vasute92cb0b2016-05-26 18:01:45 +0200385
Marek Vasut0529a672016-06-05 00:46:55 +0200386.pushsection .text.__gnu_thumb1_case_uqi, "ax"
Marek Vasute92cb0b2016-05-26 18:01:45 +0200387ENTRY(__gnu_thumb1_case_uqi)
388 push {r1}
389 mov r1, lr
390 lsrs r1, r1, #1
391 lsls r1, r1, #1
392 ldrb r1, [r1, r0]
393 lsls r1, r1, #1
394 add lr, lr, r1
395 pop {r1}
396 bx lr
397ENDPROC(__gnu_thumb1_case_uqi)
Stephen Warren14d74b02016-06-03 13:05:11 -0600398.popsection
Marek Vasute92cb0b2016-05-26 18:01:45 +0200399
Marek Vasut71cfb332016-05-26 18:01:46 +0200400.pushsection .text.__gnu_thumb1_case_shi, "ax"
Stephen Warren14d74b02016-06-03 13:05:11 -0600401ENTRY(__gnu_thumb1_case_shi)
Marek Vasute92cb0b2016-05-26 18:01:45 +0200402 push {r0, r1}
403 mov r1, lr
404 lsrs r1, r1, #1
405 lsls r0, r0, #1
406 lsls r1, r1, #1
407 ldrsh r1, [r1, r0]
408 lsls r1, r1, #1
409 add lr, lr, r1
410 pop {r0, r1}
411 bx lr
412ENDPROC(__gnu_thumb1_case_shi)
Stephen Warren14d74b02016-06-03 13:05:11 -0600413.popsection
Marek Vasute92cb0b2016-05-26 18:01:45 +0200414
Marek Vasut71cfb332016-05-26 18:01:46 +0200415.pushsection .text.__gnu_thumb1_case_uhi, "ax"
Stephen Warren14d74b02016-06-03 13:05:11 -0600416ENTRY(__gnu_thumb1_case_uhi)
Marek Vasute92cb0b2016-05-26 18:01:45 +0200417 push {r0, r1}
418 mov r1, lr
419 lsrs r1, r1, #1
420 lsls r0, r0, #1
421 lsls r1, r1, #1
422 ldrh r1, [r1, r0]
423 lsls r1, r1, #1
424 add lr, lr, r1
425 pop {r0, r1}
426 bx lr
427ENDPROC(__gnu_thumb1_case_uhi)
Stephen Warren14d74b02016-06-03 13:05:11 -0600428.popsection
Marek Vasute92cb0b2016-05-26 18:01:45 +0200429#endif