Tom Rini | 10e4779 | 2018-05-06 17:58:06 -0400 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
Simon Glass | 33a1d87 | 2016-03-16 07:44:34 -0600 | [diff] [blame] | 2 | /* |
| 3 | * Copyright 2010, Google Inc. |
| 4 | * |
| 5 | * Brought in from coreboot uldivmod.S |
Simon Glass | 33a1d87 | 2016-03-16 07:44:34 -0600 | [diff] [blame] | 6 | */ |
| 7 | |
| 8 | #include <linux/linkage.h> |
| 9 | #include <asm/assembler.h> |
| 10 | |
Simon Glass | 33a1d87 | 2016-03-16 07:44:34 -0600 | [diff] [blame] | 11 | /* |
| 12 | * A, Q = r0 + (r1 << 32) |
| 13 | * B, R = r2 + (r3 << 32) |
| 14 | * A / B = Q ... R |
| 15 | */ |
| 16 | |
| 17 | A_0 .req r0 |
| 18 | A_1 .req r1 |
| 19 | B_0 .req r2 |
| 20 | B_1 .req r3 |
| 21 | C_0 .req r4 |
| 22 | C_1 .req r5 |
| 23 | D_0 .req r6 |
| 24 | D_1 .req r7 |
| 25 | |
| 26 | Q_0 .req r0 |
| 27 | Q_1 .req r1 |
| 28 | R_0 .req r2 |
| 29 | R_1 .req r3 |
| 30 | |
| 31 | THUMB( |
| 32 | TMP .req r8 |
| 33 | ) |
| 34 | |
Marek Vasut | 71cfb33 | 2016-05-26 18:01:46 +0200 | [diff] [blame] | 35 | .pushsection .text.__aeabi_uldivmod, "ax" |
Stephen Warren | 14d74b0 | 2016-06-03 13:05:11 -0600 | [diff] [blame] | 36 | ENTRY(__aeabi_uldivmod) |
Marek Vasut | 71cfb33 | 2016-05-26 18:01:46 +0200 | [diff] [blame] | 37 | |
Simon Glass | 33a1d87 | 2016-03-16 07:44:34 -0600 | [diff] [blame] | 38 | stmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) lr} |
| 39 | @ Test if B == 0 |
| 40 | orrs ip, B_0, B_1 @ Z set -> B == 0 |
| 41 | beq L_div_by_0 |
| 42 | @ Test if B is power of 2: (B & (B - 1)) == 0 |
| 43 | subs C_0, B_0, #1 |
| 44 | sbc C_1, B_1, #0 |
| 45 | tst C_0, B_0 |
| 46 | tsteq B_1, C_1 |
| 47 | beq L_pow2 |
| 48 | @ Test if A_1 == B_1 == 0 |
| 49 | orrs ip, A_1, B_1 |
| 50 | beq L_div_32_32 |
| 51 | |
| 52 | L_div_64_64: |
| 53 | /* CLZ only exists in ARM architecture version 5 and above. */ |
| 54 | #ifdef HAVE_CLZ |
| 55 | mov C_0, #1 |
| 56 | mov C_1, #0 |
| 57 | @ D_0 = clz A |
| 58 | teq A_1, #0 |
| 59 | clz D_0, A_1 |
| 60 | clzeq ip, A_0 |
| 61 | addeq D_0, D_0, ip |
| 62 | @ D_1 = clz B |
| 63 | teq B_1, #0 |
| 64 | clz D_1, B_1 |
| 65 | clzeq ip, B_0 |
| 66 | addeq D_1, D_1, ip |
| 67 | @ if clz B - clz A > 0 |
| 68 | subs D_0, D_1, D_0 |
| 69 | bls L_done_shift |
| 70 | @ B <<= (clz B - clz A) |
| 71 | subs D_1, D_0, #32 |
| 72 | rsb ip, D_0, #32 |
| 73 | movmi B_1, B_1, lsl D_0 |
| 74 | ARM( orrmi B_1, B_1, B_0, lsr ip ) |
| 75 | THUMB( lsrmi TMP, B_0, ip ) |
| 76 | THUMB( orrmi B_1, B_1, TMP ) |
| 77 | movpl B_1, B_0, lsl D_1 |
| 78 | mov B_0, B_0, lsl D_0 |
| 79 | @ C = 1 << (clz B - clz A) |
| 80 | movmi C_1, C_1, lsl D_0 |
| 81 | ARM( orrmi C_1, C_1, C_0, lsr ip ) |
| 82 | THUMB( lsrmi TMP, C_0, ip ) |
| 83 | THUMB( orrmi C_1, C_1, TMP ) |
| 84 | movpl C_1, C_0, lsl D_1 |
| 85 | mov C_0, C_0, lsl D_0 |
| 86 | L_done_shift: |
| 87 | mov D_0, #0 |
| 88 | mov D_1, #0 |
| 89 | @ C: current bit; D: result |
| 90 | #else |
| 91 | @ C: current bit; D: result |
| 92 | mov C_0, #1 |
| 93 | mov C_1, #0 |
| 94 | mov D_0, #0 |
| 95 | mov D_1, #0 |
| 96 | L_lsl_4: |
| 97 | cmp B_1, #0x10000000 |
| 98 | cmpcc B_1, A_1 |
| 99 | cmpeq B_0, A_0 |
| 100 | bcs L_lsl_1 |
| 101 | @ B <<= 4 |
| 102 | mov B_1, B_1, lsl #4 |
| 103 | orr B_1, B_1, B_0, lsr #28 |
| 104 | mov B_0, B_0, lsl #4 |
| 105 | @ C <<= 4 |
| 106 | mov C_1, C_1, lsl #4 |
| 107 | orr C_1, C_1, C_0, lsr #28 |
| 108 | mov C_0, C_0, lsl #4 |
| 109 | b L_lsl_4 |
| 110 | L_lsl_1: |
| 111 | cmp B_1, #0x80000000 |
| 112 | cmpcc B_1, A_1 |
| 113 | cmpeq B_0, A_0 |
| 114 | bcs L_subtract |
| 115 | @ B <<= 1 |
| 116 | mov B_1, B_1, lsl #1 |
| 117 | orr B_1, B_1, B_0, lsr #31 |
| 118 | mov B_0, B_0, lsl #1 |
| 119 | @ C <<= 1 |
| 120 | mov C_1, C_1, lsl #1 |
| 121 | orr C_1, C_1, C_0, lsr #31 |
| 122 | mov C_0, C_0, lsl #1 |
| 123 | b L_lsl_1 |
| 124 | #endif |
| 125 | L_subtract: |
| 126 | @ if A >= B |
| 127 | cmp A_1, B_1 |
| 128 | cmpeq A_0, B_0 |
| 129 | bcc L_update |
| 130 | @ A -= B |
| 131 | subs A_0, A_0, B_0 |
| 132 | sbc A_1, A_1, B_1 |
| 133 | @ D |= C |
| 134 | orr D_0, D_0, C_0 |
| 135 | orr D_1, D_1, C_1 |
| 136 | L_update: |
| 137 | @ if A == 0: break |
| 138 | orrs ip, A_1, A_0 |
| 139 | beq L_exit |
| 140 | @ C >>= 1 |
| 141 | movs C_1, C_1, lsr #1 |
| 142 | movs C_0, C_0, rrx |
| 143 | @ if C == 0: break |
| 144 | orrs ip, C_1, C_0 |
| 145 | beq L_exit |
| 146 | @ B >>= 1 |
| 147 | movs B_1, B_1, lsr #1 |
| 148 | mov B_0, B_0, rrx |
| 149 | b L_subtract |
| 150 | L_exit: |
| 151 | @ Note: A, B & Q, R are aliases |
| 152 | mov R_0, A_0 |
| 153 | mov R_1, A_1 |
| 154 | mov Q_0, D_0 |
| 155 | mov Q_1, D_1 |
| 156 | ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} |
| 157 | |
| 158 | L_div_32_32: |
| 159 | @ Note: A_0 & r0 are aliases |
| 160 | @ Q_1 r1 |
| 161 | mov r1, B_0 |
| 162 | bl __aeabi_uidivmod |
| 163 | mov R_0, r1 |
| 164 | mov R_1, #0 |
| 165 | mov Q_1, #0 |
| 166 | ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} |
| 167 | |
| 168 | L_pow2: |
| 169 | #ifdef HAVE_CLZ |
| 170 | @ Note: A, B and Q, R are aliases |
| 171 | @ R = A & (B - 1) |
| 172 | and C_0, A_0, C_0 |
| 173 | and C_1, A_1, C_1 |
| 174 | @ Q = A >> log2(B) |
| 175 | @ Note: B must not be 0 here! |
| 176 | clz D_0, B_0 |
| 177 | add D_1, D_0, #1 |
| 178 | rsbs D_0, D_0, #31 |
| 179 | bpl L_1 |
| 180 | clz D_0, B_1 |
| 181 | rsb D_0, D_0, #31 |
| 182 | mov A_0, A_1, lsr D_0 |
| 183 | add D_0, D_0, #32 |
| 184 | L_1: |
| 185 | movpl A_0, A_0, lsr D_0 |
| 186 | ARM( orrpl A_0, A_0, A_1, lsl D_1 ) |
| 187 | THUMB( lslpl TMP, A_1, D_1 ) |
| 188 | THUMB( orrpl A_0, A_0, TMP ) |
| 189 | mov A_1, A_1, lsr D_0 |
| 190 | @ Mov back C to R |
| 191 | mov R_0, C_0 |
| 192 | mov R_1, C_1 |
| 193 | ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} |
| 194 | #else |
| 195 | @ Note: A, B and Q, R are aliases |
| 196 | @ R = A & (B - 1) |
| 197 | and C_0, A_0, C_0 |
| 198 | and C_1, A_1, C_1 |
| 199 | @ Q = A >> log2(B) |
| 200 | @ Note: B must not be 0 here! |
| 201 | @ Count the leading zeroes in B. |
| 202 | mov D_0, #0 |
| 203 | orrs B_0, B_0, B_0 |
| 204 | @ If B is greater than 1 << 31, divide A and B by 1 << 32. |
| 205 | moveq A_0, A_1 |
| 206 | moveq A_1, #0 |
| 207 | moveq B_0, B_1 |
| 208 | @ Count the remaining leading zeroes in B. |
| 209 | movs B_1, B_0, lsl #16 |
| 210 | addeq D_0, #16 |
| 211 | moveq B_0, B_0, lsr #16 |
| 212 | tst B_0, #0xff |
| 213 | addeq D_0, #8 |
| 214 | moveq B_0, B_0, lsr #8 |
| 215 | tst B_0, #0xf |
| 216 | addeq D_0, #4 |
| 217 | moveq B_0, B_0, lsr #4 |
| 218 | tst B_0, #0x3 |
| 219 | addeq D_0, #2 |
| 220 | moveq B_0, B_0, lsr #2 |
| 221 | tst B_0, #0x1 |
| 222 | addeq D_0, #1 |
| 223 | @ Shift A to the right by the appropriate amount. |
| 224 | rsb D_1, D_0, #32 |
| 225 | mov Q_0, A_0, lsr D_0 |
Marek Vasut | 8eb18ae | 2016-05-26 18:01:41 +0200 | [diff] [blame] | 226 | ARM( orr Q_0, Q_0, A_1, lsl D_1 ) |
| 227 | THUMB( lsl A_1, D_1 ) |
| 228 | THUMB( orr Q_0, A_1 ) |
Simon Glass | 33a1d87 | 2016-03-16 07:44:34 -0600 | [diff] [blame] | 229 | mov Q_1, A_1, lsr D_0 |
| 230 | @ Move C to R |
| 231 | mov R_0, C_0 |
| 232 | mov R_1, C_1 |
| 233 | ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} |
| 234 | #endif |
| 235 | |
| 236 | L_div_by_0: |
| 237 | bl __div0 |
| 238 | @ As wrong as it could be |
| 239 | mov Q_0, #0 |
| 240 | mov Q_1, #0 |
| 241 | mov R_0, #0 |
| 242 | mov R_1, #0 |
| 243 | ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} |
Simon Glass | 33a1d87 | 2016-03-16 07:44:34 -0600 | [diff] [blame] | 244 | ENDPROC(__aeabi_uldivmod) |
Stephen Warren | 14d74b0 | 2016-06-03 13:05:11 -0600 | [diff] [blame] | 245 | .popsection |