blob: 0798d098afe8c0509ec2721a901b71f3b157fe73 [file] [log] [blame]
Tom Rini10e47792018-05-06 17:58:06 -04001/* SPDX-License-Identifier: GPL-2.0+ */
Marek Vasut14fefa02016-05-26 18:01:40 +02002/*
3 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
4 *
5 * Author: Nicolas Pitre <nico@fluxnic.net>
6 * - contributed to gcc-3.4 on Sep 30, 2003
7 * - adapted for the Linux kernel on Oct 2, 2003
8 */
Tom Rini10e47792018-05-06 17:58:06 -04009/*
10 * Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
Marek Vasut14fefa02016-05-26 18:01:40 +020011 */
12
Marek Vasut14fefa02016-05-26 18:01:40 +020013#include <linux/linkage.h>
14#include <asm/assembler.h>
15
16/*
17 * U-Boot compatibility bit, define empty UNWIND() macro as, since we
18 * do not support stack unwinding and define CONFIG_AEABI to make all
19 * of the functions available without diverging from Linux code.
20 */
21#ifdef __UBOOT__
22#define UNWIND(x...)
23#define CONFIG_AEABI
24#endif
25
26.macro ARM_DIV_BODY dividend, divisor, result, curbit
27
28#if __LINUX_ARM_ARCH__ >= 5
29
30 clz \curbit, \divisor
31 clz \result, \dividend
32 sub \result, \curbit, \result
33 mov \curbit, #1
34 mov \divisor, \divisor, lsl \result
35 mov \curbit, \curbit, lsl \result
36 mov \result, #0
37
38#else
39
40 @ Initially shift the divisor left 3 bits if possible,
41 @ set curbit accordingly. This allows for curbit to be located
42 @ at the left end of each 4 bit nibbles in the division loop
43 @ to save one loop in most cases.
44 tst \divisor, #0xe0000000
45 moveq \divisor, \divisor, lsl #3
46 moveq \curbit, #8
47 movne \curbit, #1
48
49 @ Unless the divisor is very big, shift it up in multiples of
50 @ four bits, since this is the amount of unwinding in the main
51 @ division loop. Continue shifting until the divisor is
52 @ larger than the dividend.
531: cmp \divisor, #0x10000000
54 cmplo \divisor, \dividend
55 movlo \divisor, \divisor, lsl #4
56 movlo \curbit, \curbit, lsl #4
57 blo 1b
58
59 @ For very big divisors, we must shift it a bit at a time, or
60 @ we will be in danger of overflowing.
611: cmp \divisor, #0x80000000
62 cmplo \divisor, \dividend
63 movlo \divisor, \divisor, lsl #1
64 movlo \curbit, \curbit, lsl #1
65 blo 1b
66
67 mov \result, #0
68
69#endif
70
71 @ Division loop
721: cmp \dividend, \divisor
73 subhs \dividend, \dividend, \divisor
74 orrhs \result, \result, \curbit
75 cmp \dividend, \divisor, lsr #1
76 subhs \dividend, \dividend, \divisor, lsr #1
77 orrhs \result, \result, \curbit, lsr #1
78 cmp \dividend, \divisor, lsr #2
79 subhs \dividend, \dividend, \divisor, lsr #2
80 orrhs \result, \result, \curbit, lsr #2
81 cmp \dividend, \divisor, lsr #3
82 subhs \dividend, \dividend, \divisor, lsr #3
83 orrhs \result, \result, \curbit, lsr #3
84 cmp \dividend, #0 @ Early termination?
Marek Vasutd0a85862016-05-26 18:01:43 +020085 movsne \curbit, \curbit, lsr #4 @ No, any more bits to do?
Marek Vasut14fefa02016-05-26 18:01:40 +020086 movne \divisor, \divisor, lsr #4
87 bne 1b
88
89.endm
90
91
92.macro ARM_DIV2_ORDER divisor, order
93
94#if __LINUX_ARM_ARCH__ >= 5
95
96 clz \order, \divisor
97 rsb \order, \order, #31
98
99#else
100
101 cmp \divisor, #(1 << 16)
102 movhs \divisor, \divisor, lsr #16
103 movhs \order, #16
104 movlo \order, #0
105
106 cmp \divisor, #(1 << 8)
107 movhs \divisor, \divisor, lsr #8
108 addhs \order, \order, #8
109
110 cmp \divisor, #(1 << 4)
111 movhs \divisor, \divisor, lsr #4
112 addhs \order, \order, #4
113
114 cmp \divisor, #(1 << 2)
115 addhi \order, \order, #3
116 addls \order, \order, \divisor, lsr #1
117
118#endif
119
120.endm
121
122
123.macro ARM_MOD_BODY dividend, divisor, order, spare
124
125#if __LINUX_ARM_ARCH__ >= 5
126
127 clz \order, \divisor
128 clz \spare, \dividend
129 sub \order, \order, \spare
130 mov \divisor, \divisor, lsl \order
131
132#else
133
134 mov \order, #0
135
136 @ Unless the divisor is very big, shift it up in multiples of
137 @ four bits, since this is the amount of unwinding in the main
138 @ division loop. Continue shifting until the divisor is
139 @ larger than the dividend.
1401: cmp \divisor, #0x10000000
141 cmplo \divisor, \dividend
142 movlo \divisor, \divisor, lsl #4
143 addlo \order, \order, #4
144 blo 1b
145
146 @ For very big divisors, we must shift it a bit at a time, or
147 @ we will be in danger of overflowing.
1481: cmp \divisor, #0x80000000
149 cmplo \divisor, \dividend
150 movlo \divisor, \divisor, lsl #1
151 addlo \order, \order, #1
152 blo 1b
153
154#endif
155
156 @ Perform all needed subtractions to keep only the reminder.
157 @ Do comparisons in batch of 4 first.
158 subs \order, \order, #3 @ yes, 3 is intended here
159 blt 2f
160
1611: cmp \dividend, \divisor
162 subhs \dividend, \dividend, \divisor
163 cmp \dividend, \divisor, lsr #1
164 subhs \dividend, \dividend, \divisor, lsr #1
165 cmp \dividend, \divisor, lsr #2
166 subhs \dividend, \dividend, \divisor, lsr #2
167 cmp \dividend, \divisor, lsr #3
168 subhs \dividend, \dividend, \divisor, lsr #3
169 cmp \dividend, #1
170 mov \divisor, \divisor, lsr #4
Marek Vasutd0a85862016-05-26 18:01:43 +0200171 subsge \order, \order, #4
Marek Vasut14fefa02016-05-26 18:01:40 +0200172 bge 1b
173
174 tst \order, #3
175 teqne \dividend, #0
176 beq 5f
177
178 @ Either 1, 2 or 3 comparison/subtractions are left.
1792: cmn \order, #2
180 blt 4f
181 beq 3f
182 cmp \dividend, \divisor
183 subhs \dividend, \dividend, \divisor
184 mov \divisor, \divisor, lsr #1
1853: cmp \dividend, \divisor
186 subhs \dividend, \dividend, \divisor
187 mov \divisor, \divisor, lsr #1
1884: cmp \dividend, \divisor
189 subhs \dividend, \dividend, \divisor
1905:
191.endm
192
193
Stephen Warren14d74b02016-06-03 13:05:11 -0600194.pushsection .text.__udivsi3, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200195ENTRY(__udivsi3)
196ENTRY(__aeabi_uidiv)
197UNWIND(.fnstart)
198
199 subs r2, r1, #1
200 reteq lr
201 bcc Ldiv0
202 cmp r0, r1
203 bls 11f
204 tst r1, r2
205 beq 12f
206
207 ARM_DIV_BODY r0, r1, r2, r3
208
209 mov r0, r2
210 ret lr
211
21211: moveq r0, #1
213 movne r0, #0
214 ret lr
215
21612: ARM_DIV2_ORDER r1, r2
217
218 mov r0, r0, lsr r2
219 ret lr
220
221UNWIND(.fnend)
222ENDPROC(__udivsi3)
223ENDPROC(__aeabi_uidiv)
Stephen Warren14d74b02016-06-03 13:05:11 -0600224.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200225
Stephen Warren14d74b02016-06-03 13:05:11 -0600226.pushsection .text.__umodsi3, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200227ENTRY(__umodsi3)
228UNWIND(.fnstart)
229
230 subs r2, r1, #1 @ compare divisor with 1
231 bcc Ldiv0
232 cmpne r0, r1 @ compare dividend with divisor
233 moveq r0, #0
234 tsthi r1, r2 @ see if divisor is power of 2
235 andeq r0, r0, r2
236 retls lr
237
238 ARM_MOD_BODY r0, r1, r2, r3
239
240 ret lr
241
242UNWIND(.fnend)
243ENDPROC(__umodsi3)
Stephen Warren14d74b02016-06-03 13:05:11 -0600244.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200245
Stephen Warren14d74b02016-06-03 13:05:11 -0600246.pushsection .text.__divsi3, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200247ENTRY(__divsi3)
248ENTRY(__aeabi_idiv)
249UNWIND(.fnstart)
250
251 cmp r1, #0
252 eor ip, r0, r1 @ save the sign of the result.
253 beq Ldiv0
254 rsbmi r1, r1, #0 @ loops below use unsigned.
255 subs r2, r1, #1 @ division by 1 or -1 ?
256 beq 10f
257 movs r3, r0
258 rsbmi r3, r0, #0 @ positive dividend value
259 cmp r3, r1
260 bls 11f
261 tst r1, r2 @ divisor is power of 2 ?
262 beq 12f
263
264 ARM_DIV_BODY r3, r1, r0, r2
265
266 cmp ip, #0
267 rsbmi r0, r0, #0
268 ret lr
269
27010: teq ip, r0 @ same sign ?
271 rsbmi r0, r0, #0
272 ret lr
273
27411: movlo r0, #0
275 moveq r0, ip, asr #31
276 orreq r0, r0, #1
277 ret lr
278
27912: ARM_DIV2_ORDER r1, r2
280
281 cmp ip, #0
282 mov r0, r3, lsr r2
283 rsbmi r0, r0, #0
284 ret lr
285
286UNWIND(.fnend)
287ENDPROC(__divsi3)
288ENDPROC(__aeabi_idiv)
Stephen Warren14d74b02016-06-03 13:05:11 -0600289.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200290
Stephen Warren14d74b02016-06-03 13:05:11 -0600291.pushsection .text.__modsi3, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200292ENTRY(__modsi3)
293UNWIND(.fnstart)
294
295 cmp r1, #0
296 beq Ldiv0
297 rsbmi r1, r1, #0 @ loops below use unsigned.
298 movs ip, r0 @ preserve sign of dividend
299 rsbmi r0, r0, #0 @ if negative make positive
300 subs r2, r1, #1 @ compare divisor with 1
301 cmpne r0, r1 @ compare dividend with divisor
302 moveq r0, #0
303 tsthi r1, r2 @ see if divisor is power of 2
304 andeq r0, r0, r2
305 bls 10f
306
307 ARM_MOD_BODY r0, r1, r2, r3
308
30910: cmp ip, #0
310 rsbmi r0, r0, #0
311 ret lr
312
313UNWIND(.fnend)
314ENDPROC(__modsi3)
Stephen Warren14d74b02016-06-03 13:05:11 -0600315.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200316
317#ifdef CONFIG_AEABI
318
Stephen Warren14d74b02016-06-03 13:05:11 -0600319.pushsection .text.__aeabi_uidivmod, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200320ENTRY(__aeabi_uidivmod)
321UNWIND(.fnstart)
322UNWIND(.save {r0, r1, ip, lr} )
323
324 stmfd sp!, {r0, r1, ip, lr}
325 bl __aeabi_uidiv
326 ldmfd sp!, {r1, r2, ip, lr}
327 mul r3, r0, r2
328 sub r1, r1, r3
329 ret lr
330
331UNWIND(.fnend)
332ENDPROC(__aeabi_uidivmod)
Stephen Warren14d74b02016-06-03 13:05:11 -0600333.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200334
Stephen Warren14d74b02016-06-03 13:05:11 -0600335.pushsection .text.__aeabi_uidivmod, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200336ENTRY(__aeabi_idivmod)
337UNWIND(.fnstart)
338UNWIND(.save {r0, r1, ip, lr} )
Marek Vasut71cfb332016-05-26 18:01:46 +0200339
Marek Vasut14fefa02016-05-26 18:01:40 +0200340 stmfd sp!, {r0, r1, ip, lr}
341 bl __aeabi_idiv
342 ldmfd sp!, {r1, r2, ip, lr}
343 mul r3, r0, r2
344 sub r1, r1, r3
345 ret lr
346
347UNWIND(.fnend)
348ENDPROC(__aeabi_idivmod)
Stephen Warren14d74b02016-06-03 13:05:11 -0600349.popsection
Marek Vasut14fefa02016-05-26 18:01:40 +0200350
351#endif
352
Stephen Warren14d74b02016-06-03 13:05:11 -0600353.pushsection .text.Ldiv0, "ax"
Marek Vasut14fefa02016-05-26 18:01:40 +0200354Ldiv0:
355UNWIND(.fnstart)
356UNWIND(.pad #4)
357UNWIND(.save {lr})
Marek Vasut71cfb332016-05-26 18:01:46 +0200358
Marek Vasut14fefa02016-05-26 18:01:40 +0200359 str lr, [sp, #-8]!
360 bl __div0
361 mov r0, #0 @ About as wrong as it could be.
362 ldr pc, [sp], #8
Marek Vasut71cfb332016-05-26 18:01:46 +0200363
Marek Vasut14fefa02016-05-26 18:01:40 +0200364UNWIND(.fnend)
365ENDPROC(Ldiv0)
Stephen Warren14d74b02016-06-03 13:05:11 -0600366.popsection
Marek Vasute92cb0b2016-05-26 18:01:45 +0200367
368/* Thumb-1 specialities */
Tom Rini1c640a62017-03-18 09:01:44 -0400369#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
Marek Vasut0529a672016-06-05 00:46:55 +0200370.pushsection .text.__gnu_thumb1_case_sqi, "ax"
Marek Vasute92cb0b2016-05-26 18:01:45 +0200371ENTRY(__gnu_thumb1_case_sqi)
372 push {r1}
373 mov r1, lr
374 lsrs r1, r1, #1
375 lsls r1, r1, #1
376 ldrsb r1, [r1, r0]
377 lsls r1, r1, #1
378 add lr, lr, r1
379 pop {r1}
380 bx lr
381ENDPROC(__gnu_thumb1_case_sqi)
Stephen Warren14d74b02016-06-03 13:05:11 -0600382.popsection
Marek Vasute92cb0b2016-05-26 18:01:45 +0200383
Marek Vasut0529a672016-06-05 00:46:55 +0200384.pushsection .text.__gnu_thumb1_case_uqi, "ax"
Marek Vasute92cb0b2016-05-26 18:01:45 +0200385ENTRY(__gnu_thumb1_case_uqi)
386 push {r1}
387 mov r1, lr
388 lsrs r1, r1, #1
389 lsls r1, r1, #1
390 ldrb r1, [r1, r0]
391 lsls r1, r1, #1
392 add lr, lr, r1
393 pop {r1}
394 bx lr
395ENDPROC(__gnu_thumb1_case_uqi)
Stephen Warren14d74b02016-06-03 13:05:11 -0600396.popsection
Marek Vasute92cb0b2016-05-26 18:01:45 +0200397
Marek Vasut71cfb332016-05-26 18:01:46 +0200398.pushsection .text.__gnu_thumb1_case_shi, "ax"
Stephen Warren14d74b02016-06-03 13:05:11 -0600399ENTRY(__gnu_thumb1_case_shi)
Marek Vasute92cb0b2016-05-26 18:01:45 +0200400 push {r0, r1}
401 mov r1, lr
402 lsrs r1, r1, #1
403 lsls r0, r0, #1
404 lsls r1, r1, #1
405 ldrsh r1, [r1, r0]
406 lsls r1, r1, #1
407 add lr, lr, r1
408 pop {r0, r1}
409 bx lr
410ENDPROC(__gnu_thumb1_case_shi)
Stephen Warren14d74b02016-06-03 13:05:11 -0600411.popsection
Marek Vasute92cb0b2016-05-26 18:01:45 +0200412
Marek Vasut71cfb332016-05-26 18:01:46 +0200413.pushsection .text.__gnu_thumb1_case_uhi, "ax"
Stephen Warren14d74b02016-06-03 13:05:11 -0600414ENTRY(__gnu_thumb1_case_uhi)
Marek Vasute92cb0b2016-05-26 18:01:45 +0200415 push {r0, r1}
416 mov r1, lr
417 lsrs r1, r1, #1
418 lsls r0, r0, #1
419 lsls r1, r1, #1
420 ldrh r1, [r1, r0]
421 lsls r1, r1, #1
422 add lr, lr, r1
423 pop {r0, r1}
424 bx lr
425ENDPROC(__gnu_thumb1_case_uhi)
Stephen Warren14d74b02016-06-03 13:05:11 -0600426.popsection
Marek Vasute92cb0b2016-05-26 18:01:45 +0200427#endif