blob: 408839b3cf2b217c912bdf869602bdc25660fc97 [file] [log] [blame]
Marek Vasut14fefa02016-05-26 18:01:40 +02001/*
2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
3 *
4 * Author: Nicolas Pitre <nico@fluxnic.net>
5 * - contributed to gcc-3.4 on Sep 30, 2003
6 * - adapted for the Linux kernel on Oct 2, 2003
7 */
8
9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
10
11 * SPDX-License-Identifier: GPL-2.0+
12 */
13
14
15#include <linux/linkage.h>
16#include <asm/assembler.h>
17
18/*
19 * U-Boot compatibility bit, define empty UNWIND() macro as, since we
20 * do not support stack unwinding and define CONFIG_AEABI to make all
21 * of the functions available without diverging from Linux code.
22 */
23#ifdef __UBOOT__
24#define UNWIND(x...)
25#define CONFIG_AEABI
26#endif
27
28.macro ARM_DIV_BODY dividend, divisor, result, curbit
29
30#if __LINUX_ARM_ARCH__ >= 5
31
32 clz \curbit, \divisor
33 clz \result, \dividend
34 sub \result, \curbit, \result
35 mov \curbit, #1
36 mov \divisor, \divisor, lsl \result
37 mov \curbit, \curbit, lsl \result
38 mov \result, #0
39
40#else
41
42 @ Initially shift the divisor left 3 bits if possible,
43 @ set curbit accordingly. This allows for curbit to be located
44 @ at the left end of each 4 bit nibbles in the division loop
45 @ to save one loop in most cases.
46 tst \divisor, #0xe0000000
47 moveq \divisor, \divisor, lsl #3
48 moveq \curbit, #8
49 movne \curbit, #1
50
51 @ Unless the divisor is very big, shift it up in multiples of
52 @ four bits, since this is the amount of unwinding in the main
53 @ division loop. Continue shifting until the divisor is
54 @ larger than the dividend.
551: cmp \divisor, #0x10000000
56 cmplo \divisor, \dividend
57 movlo \divisor, \divisor, lsl #4
58 movlo \curbit, \curbit, lsl #4
59 blo 1b
60
61 @ For very big divisors, we must shift it a bit at a time, or
62 @ we will be in danger of overflowing.
631: cmp \divisor, #0x80000000
64 cmplo \divisor, \dividend
65 movlo \divisor, \divisor, lsl #1
66 movlo \curbit, \curbit, lsl #1
67 blo 1b
68
69 mov \result, #0
70
71#endif
72
73 @ Division loop
741: cmp \dividend, \divisor
75 subhs \dividend, \dividend, \divisor
76 orrhs \result, \result, \curbit
77 cmp \dividend, \divisor, lsr #1
78 subhs \dividend, \dividend, \divisor, lsr #1
79 orrhs \result, \result, \curbit, lsr #1
80 cmp \dividend, \divisor, lsr #2
81 subhs \dividend, \dividend, \divisor, lsr #2
82 orrhs \result, \result, \curbit, lsr #2
83 cmp \dividend, \divisor, lsr #3
84 subhs \dividend, \dividend, \divisor, lsr #3
85 orrhs \result, \result, \curbit, lsr #3
86 cmp \dividend, #0 @ Early termination?
Marek Vasutd0a85862016-05-26 18:01:43 +020087 movsne \curbit, \curbit, lsr #4 @ No, any more bits to do?
Marek Vasut14fefa02016-05-26 18:01:40 +020088 movne \divisor, \divisor, lsr #4
89 bne 1b
90
91.endm
92
93
94.macro ARM_DIV2_ORDER divisor, order
95
96#if __LINUX_ARM_ARCH__ >= 5
97
98 clz \order, \divisor
99 rsb \order, \order, #31
100
101#else
102
103 cmp \divisor, #(1 << 16)
104 movhs \divisor, \divisor, lsr #16
105 movhs \order, #16
106 movlo \order, #0
107
108 cmp \divisor, #(1 << 8)
109 movhs \divisor, \divisor, lsr #8
110 addhs \order, \order, #8
111
112 cmp \divisor, #(1 << 4)
113 movhs \divisor, \divisor, lsr #4
114 addhs \order, \order, #4
115
116 cmp \divisor, #(1 << 2)
117 addhi \order, \order, #3
118 addls \order, \order, \divisor, lsr #1
119
120#endif
121
122.endm
123
124
125.macro ARM_MOD_BODY dividend, divisor, order, spare
126
127#if __LINUX_ARM_ARCH__ >= 5
128
129 clz \order, \divisor
130 clz \spare, \dividend
131 sub \order, \order, \spare
132 mov \divisor, \divisor, lsl \order
133
134#else
135
136 mov \order, #0
137
138 @ Unless the divisor is very big, shift it up in multiples of
139 @ four bits, since this is the amount of unwinding in the main
140 @ division loop. Continue shifting until the divisor is
141 @ larger than the dividend.
1421: cmp \divisor, #0x10000000
143 cmplo \divisor, \dividend
144 movlo \divisor, \divisor, lsl #4
145 addlo \order, \order, #4
146 blo 1b
147
148 @ For very big divisors, we must shift it a bit at a time, or
149 @ we will be in danger of overflowing.
1501: cmp \divisor, #0x80000000
151 cmplo \divisor, \dividend
152 movlo \divisor, \divisor, lsl #1
153 addlo \order, \order, #1
154 blo 1b
155
156#endif
157
158 @ Perform all needed subtractions to keep only the reminder.
159 @ Do comparisons in batch of 4 first.
160 subs \order, \order, #3 @ yes, 3 is intended here
161 blt 2f
162
1631: cmp \dividend, \divisor
164 subhs \dividend, \dividend, \divisor
165 cmp \dividend, \divisor, lsr #1
166 subhs \dividend, \dividend, \divisor, lsr #1
167 cmp \dividend, \divisor, lsr #2
168 subhs \dividend, \dividend, \divisor, lsr #2
169 cmp \dividend, \divisor, lsr #3
170 subhs \dividend, \dividend, \divisor, lsr #3
171 cmp \dividend, #1
172 mov \divisor, \divisor, lsr #4
Marek Vasutd0a85862016-05-26 18:01:43 +0200173 subsge \order, \order, #4
Marek Vasut14fefa02016-05-26 18:01:40 +0200174 bge 1b
175
176 tst \order, #3
177 teqne \dividend, #0
178 beq 5f
179
180 @ Either 1, 2 or 3 comparison/subtractions are left.
1812: cmn \order, #2
182 blt 4f
183 beq 3f
184 cmp \dividend, \divisor
185 subhs \dividend, \dividend, \divisor
186 mov \divisor, \divisor, lsr #1
1873: cmp \dividend, \divisor
188 subhs \dividend, \dividend, \divisor
189 mov \divisor, \divisor, lsr #1
1904: cmp \dividend, \divisor
191 subhs \dividend, \dividend, \divisor
1925:
193.endm
194
195
196ENTRY(__udivsi3)
197ENTRY(__aeabi_uidiv)
198UNWIND(.fnstart)
199
200 subs r2, r1, #1
201 reteq lr
202 bcc Ldiv0
203 cmp r0, r1
204 bls 11f
205 tst r1, r2
206 beq 12f
207
208 ARM_DIV_BODY r0, r1, r2, r3
209
210 mov r0, r2
211 ret lr
212
21311: moveq r0, #1
214 movne r0, #0
215 ret lr
216
21712: ARM_DIV2_ORDER r1, r2
218
219 mov r0, r0, lsr r2
220 ret lr
221
222UNWIND(.fnend)
223ENDPROC(__udivsi3)
224ENDPROC(__aeabi_uidiv)
225
226ENTRY(__umodsi3)
227UNWIND(.fnstart)
228
229 subs r2, r1, #1 @ compare divisor with 1
230 bcc Ldiv0
231 cmpne r0, r1 @ compare dividend with divisor
232 moveq r0, #0
233 tsthi r1, r2 @ see if divisor is power of 2
234 andeq r0, r0, r2
235 retls lr
236
237 ARM_MOD_BODY r0, r1, r2, r3
238
239 ret lr
240
241UNWIND(.fnend)
242ENDPROC(__umodsi3)
243
244ENTRY(__divsi3)
245ENTRY(__aeabi_idiv)
246UNWIND(.fnstart)
247
248 cmp r1, #0
249 eor ip, r0, r1 @ save the sign of the result.
250 beq Ldiv0
251 rsbmi r1, r1, #0 @ loops below use unsigned.
252 subs r2, r1, #1 @ division by 1 or -1 ?
253 beq 10f
254 movs r3, r0
255 rsbmi r3, r0, #0 @ positive dividend value
256 cmp r3, r1
257 bls 11f
258 tst r1, r2 @ divisor is power of 2 ?
259 beq 12f
260
261 ARM_DIV_BODY r3, r1, r0, r2
262
263 cmp ip, #0
264 rsbmi r0, r0, #0
265 ret lr
266
26710: teq ip, r0 @ same sign ?
268 rsbmi r0, r0, #0
269 ret lr
270
27111: movlo r0, #0
272 moveq r0, ip, asr #31
273 orreq r0, r0, #1
274 ret lr
275
27612: ARM_DIV2_ORDER r1, r2
277
278 cmp ip, #0
279 mov r0, r3, lsr r2
280 rsbmi r0, r0, #0
281 ret lr
282
283UNWIND(.fnend)
284ENDPROC(__divsi3)
285ENDPROC(__aeabi_idiv)
286
287ENTRY(__modsi3)
288UNWIND(.fnstart)
289
290 cmp r1, #0
291 beq Ldiv0
292 rsbmi r1, r1, #0 @ loops below use unsigned.
293 movs ip, r0 @ preserve sign of dividend
294 rsbmi r0, r0, #0 @ if negative make positive
295 subs r2, r1, #1 @ compare divisor with 1
296 cmpne r0, r1 @ compare dividend with divisor
297 moveq r0, #0
298 tsthi r1, r2 @ see if divisor is power of 2
299 andeq r0, r0, r2
300 bls 10f
301
302 ARM_MOD_BODY r0, r1, r2, r3
303
30410: cmp ip, #0
305 rsbmi r0, r0, #0
306 ret lr
307
308UNWIND(.fnend)
309ENDPROC(__modsi3)
310
311#ifdef CONFIG_AEABI
312
313ENTRY(__aeabi_uidivmod)
314UNWIND(.fnstart)
315UNWIND(.save {r0, r1, ip, lr} )
316
317 stmfd sp!, {r0, r1, ip, lr}
318 bl __aeabi_uidiv
319 ldmfd sp!, {r1, r2, ip, lr}
320 mul r3, r0, r2
321 sub r1, r1, r3
322 ret lr
323
324UNWIND(.fnend)
325ENDPROC(__aeabi_uidivmod)
326
327ENTRY(__aeabi_idivmod)
328UNWIND(.fnstart)
329UNWIND(.save {r0, r1, ip, lr} )
330 stmfd sp!, {r0, r1, ip, lr}
331 bl __aeabi_idiv
332 ldmfd sp!, {r1, r2, ip, lr}
333 mul r3, r0, r2
334 sub r1, r1, r3
335 ret lr
336
337UNWIND(.fnend)
338ENDPROC(__aeabi_idivmod)
339
340#endif
341
342Ldiv0:
343UNWIND(.fnstart)
344UNWIND(.pad #4)
345UNWIND(.save {lr})
346 str lr, [sp, #-8]!
347 bl __div0
348 mov r0, #0 @ About as wrong as it could be.
349 ldr pc, [sp], #8
350UNWIND(.fnend)
351ENDPROC(Ldiv0)
Marek Vasute92cb0b2016-05-26 18:01:45 +0200352
353/* Thumb-1 specialities */
354#if defined(CONFIG_SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
355ENTRY(__gnu_thumb1_case_sqi)
356 push {r1}
357 mov r1, lr
358 lsrs r1, r1, #1
359 lsls r1, r1, #1
360 ldrsb r1, [r1, r0]
361 lsls r1, r1, #1
362 add lr, lr, r1
363 pop {r1}
364 bx lr
365ENDPROC(__gnu_thumb1_case_sqi)
366
367ENTRY(__gnu_thumb1_case_uqi)
368 push {r1}
369 mov r1, lr
370 lsrs r1, r1, #1
371 lsls r1, r1, #1
372 ldrb r1, [r1, r0]
373 lsls r1, r1, #1
374 add lr, lr, r1
375 pop {r1}
376 bx lr
377ENDPROC(__gnu_thumb1_case_uqi)
378
379ENTRY(__gnu_thumb1_case_shi)
380 push {r0, r1}
381 mov r1, lr
382 lsrs r1, r1, #1
383 lsls r0, r0, #1
384 lsls r1, r1, #1
385 ldrsh r1, [r1, r0]
386 lsls r1, r1, #1
387 add lr, lr, r1
388 pop {r0, r1}
389 bx lr
390ENDPROC(__gnu_thumb1_case_shi)
391
392ENTRY(__gnu_thumb1_case_uhi)
393 push {r0, r1}
394 mov r1, lr
395 lsrs r1, r1, #1
396 lsls r0, r0, #1
397 lsls r1, r1, #1
398 ldrh r1, [r1, r0]
399 lsls r1, r1, #1
400 add lr, lr, r1
401 pop {r0, r1}
402 bx lr
403ENDPROC(__gnu_thumb1_case_uhi)
404#endif