1/* 2 * udiv.S: This routine was taken from glibc-1.09 and is covered 3 * by the GNU Library General Public License Version 2. 4 */ 5 6 7/* This file is generated from divrem.m4; DO NOT EDIT! */ 8/* 9 * Division and remainder, from Appendix E of the Sparc Version 8 10 * Architecture Manual, with fixes from Gordon Irlam. 11 */ 12 13/* 14 * Input: dividend and divisor in %o0 and %o1 respectively. 15 * 16 * m4 parameters: 17 * .udiv name of function to generate 18 * div div=div => %o0 / %o1; div=rem => %o0 % %o1 19 * false false=true => signed; false=false => unsigned 20 * 21 * Algorithm parameters: 22 * N how many bits per iteration we try to get (4) 23 * WORDSIZE total number of bits (32) 24 * 25 * Derived constants: 26 * TOPBITS number of bits in the top decade of a number 27 * 28 * Important variables: 29 * Q the partial quotient under development (initially 0) 30 * R the remainder so far, initially the dividend 31 * ITER number of main division loop iterations required; 32 * equal to ceil(log2(quotient) / N). Note that this 33 * is the log base (2^N) of the quotient. 34 * V the current comparand, initially divisor*2^(ITER*N-1) 35 * 36 * Cost: 37 * Current estimate for non-large dividend is 38 * ceil(log2(quotient) / N) * (10 + 7N/2) + C 39 * A large dividend is one greater than 2^(31-TOPBITS) and takes a 40 * different path, as the upper bits of the quotient must be developed 41 * one bit at a time. 42 */ 43 44 45 .globl .udiv 46 .globl _Udiv 47.udiv: 48_Udiv: /* needed for export */ 49 50 ! Ready to divide. Compute size of quotient; scale comparand. 51 orcc %o1, %g0, %o5 52 bne 1f 53 mov %o0, %o3 54 55 ! Divide by zero trap. If it returns, return 0 (about as 56 ! wrong as possible, but that is what SunOS does...). 57 ta ST_DIV0 58 retl 59 clr %o0 60 611: 62 cmp %o3, %o5 ! if %o1 exceeds %o0, done 63 blu Lgot_result ! (and algorithm fails otherwise) 64 clr %o2 65 66 sethi %hi(1 << (32 - 4 - 1)), %g1 67 68 cmp %o3, %g1 69 blu Lnot_really_big 70 clr %o4 71 72 ! Here the dividend is >= 2**(31-N) or so. We must be careful here, 73 ! as our usual N-at-a-shot divide step will cause overflow and havoc. 74 ! The number of bits in the result here is N*ITER+SC, where SC <= N. 75 ! Compute ITER in an unorthodox manner: know we need to shift V into 76 ! the top decade: so do not even bother to compare to R. 77 1: 78 cmp %o5, %g1 79 bgeu 3f 80 mov 1, %g7 81 82 sll %o5, 4, %o5 83 84 b 1b 85 add %o4, 1, %o4 86 87 ! Now compute %g7. 88 2: 89 addcc %o5, %o5, %o5 90 bcc Lnot_too_big 91 add %g7, 1, %g7 92 93 ! We get here if the %o1 overflowed while shifting. 94 ! This means that %o3 has the high-order bit set. 95 ! Restore %o5 and subtract from %o3. 96 sll %g1, 4, %g1 ! high order bit 97 srl %o5, 1, %o5 ! rest of %o5 98 add %o5, %g1, %o5 99 100 b Ldo_single_div 101 sub %g7, 1, %g7 102 103 Lnot_too_big: 104 3: 105 cmp %o5, %o3 106 blu 2b 107 nop 108 109 be Ldo_single_div 110 nop 111 /* NB: these are commented out in the V8-Sparc manual as well */ 112 /* (I do not understand this) */ 113 ! %o5 > %o3: went too far: back up 1 step 114 ! srl %o5, 1, %o5 115 ! dec %g7 116 ! do single-bit divide steps 117 ! 118 ! We have to be careful here. We know that %o3 >= %o5, so we can do the 119 ! first divide step without thinking. BUT, the others are conditional, 120 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- 121 ! order bit set in the first step, just falling into the regular 122 ! division loop will mess up the first time around. 123 ! So we unroll slightly... 124 Ldo_single_div: 125 subcc %g7, 1, %g7 126 bl Lend_regular_divide 127 nop 128 129 sub %o3, %o5, %o3 130 mov 1, %o2 131 132 b Lend_single_divloop 133 nop 134 Lsingle_divloop: 135 sll %o2, 1, %o2 136 bl 1f 137 srl %o5, 1, %o5 138 ! %o3 >= 0 139 sub %o3, %o5, %o3 140 b 2f 141 add %o2, 1, %o2 142 1: ! %o3 < 0 143 add %o3, %o5, %o3 144 sub %o2, 1, %o2 145 2: 146 Lend_single_divloop: 147 subcc %g7, 1, %g7 148 bge Lsingle_divloop 149 tst %o3 150 151 b,a Lend_regular_divide 152 153Lnot_really_big: 1541: 155 sll %o5, 4, %o5 156 157 cmp %o5, %o3 158 bleu 1b 159 addcc %o4, 1, %o4 160 161 be Lgot_result 162 sub %o4, 1, %o4 163 164 tst %o3 ! set up for initial iteration 165Ldivloop: 166 sll %o2, 4, %o2 167 ! depth 1, accumulated bits 0 168 bl L.1.16 169 srl %o5,1,%o5 170 ! remainder is positive 171 subcc %o3,%o5,%o3 172 ! depth 2, accumulated bits 1 173 bl L.2.17 174 srl %o5,1,%o5 175 ! remainder is positive 176 subcc %o3,%o5,%o3 177 ! depth 3, accumulated bits 3 178 bl L.3.19 179 srl %o5,1,%o5 180 ! remainder is positive 181 subcc %o3,%o5,%o3 182 ! depth 4, accumulated bits 7 183 bl L.4.23 184 srl %o5,1,%o5 185 ! remainder is positive 186 subcc %o3,%o5,%o3 187 b 9f 188 add %o2, (7*2+1), %o2 189 190L.4.23: 191 ! remainder is negative 192 addcc %o3,%o5,%o3 193 b 9f 194 add %o2, (7*2-1), %o2 195 196L.3.19: 197 ! remainder is negative 198 addcc %o3,%o5,%o3 199 ! depth 4, accumulated bits 5 200 bl L.4.21 201 srl %o5,1,%o5 202 ! remainder is positive 203 subcc %o3,%o5,%o3 204 b 9f 205 add %o2, (5*2+1), %o2 206 207L.4.21: 208 ! remainder is negative 209 addcc %o3,%o5,%o3 210 b 9f 211 add %o2, (5*2-1), %o2 212 213L.2.17: 214 ! remainder is negative 215 addcc %o3,%o5,%o3 216 ! depth 3, accumulated bits 1 217 bl L.3.17 218 srl %o5,1,%o5 219 ! remainder is positive 220 subcc %o3,%o5,%o3 221 ! depth 4, accumulated bits 3 222 bl L.4.19 223 srl %o5,1,%o5 224 ! remainder is positive 225 subcc %o3,%o5,%o3 226 b 9f 227 add %o2, (3*2+1), %o2 228 229L.4.19: 230 ! remainder is negative 231 addcc %o3,%o5,%o3 232 b 9f 233 add %o2, (3*2-1), %o2 234 235L.3.17: 236 ! remainder is negative 237 addcc %o3,%o5,%o3 238 ! depth 4, accumulated bits 1 239 bl L.4.17 240 srl %o5,1,%o5 241 ! remainder is positive 242 subcc %o3,%o5,%o3 243 b 9f 244 add %o2, (1*2+1), %o2 245 246L.4.17: 247 ! remainder is negative 248 addcc %o3,%o5,%o3 249 b 9f 250 add %o2, (1*2-1), %o2 251 252L.1.16: 253 ! remainder is negative 254 addcc %o3,%o5,%o3 255 ! depth 2, accumulated bits -1 256 bl L.2.15 257 srl %o5,1,%o5 258 ! remainder is positive 259 subcc %o3,%o5,%o3 260 ! depth 3, accumulated bits -1 261 bl L.3.15 262 srl %o5,1,%o5 263 ! remainder is positive 264 subcc %o3,%o5,%o3 265 ! depth 4, accumulated bits -1 266 bl L.4.15 267 srl %o5,1,%o5 268 ! remainder is positive 269 subcc %o3,%o5,%o3 270 b 9f 271 add %o2, (-1*2+1), %o2 272 273L.4.15: 274 ! remainder is negative 275 addcc %o3,%o5,%o3 276 b 9f 277 add %o2, (-1*2-1), %o2 278 279L.3.15: 280 ! remainder is negative 281 addcc %o3,%o5,%o3 282 ! depth 4, accumulated bits -3 283 bl L.4.13 284 srl %o5,1,%o5 285 ! remainder is positive 286 subcc %o3,%o5,%o3 287 b 9f 288 add %o2, (-3*2+1), %o2 289 290L.4.13: 291 ! remainder is negative 292 addcc %o3,%o5,%o3 293 b 9f 294 add %o2, (-3*2-1), %o2 295 296L.2.15: 297 ! remainder is negative 298 addcc %o3,%o5,%o3 299 ! depth 3, accumulated bits -3 300 bl L.3.13 301 srl %o5,1,%o5 302 ! remainder is positive 303 subcc %o3,%o5,%o3 304 ! depth 4, accumulated bits -5 305 bl L.4.11 306 srl %o5,1,%o5 307 ! remainder is positive 308 subcc %o3,%o5,%o3 309 b 9f 310 add %o2, (-5*2+1), %o2 311 312L.4.11: 313 ! remainder is negative 314 addcc %o3,%o5,%o3 315 b 9f 316 add %o2, (-5*2-1), %o2 317 318L.3.13: 319 ! remainder is negative 320 addcc %o3,%o5,%o3 321 ! depth 4, accumulated bits -7 322 bl L.4.9 323 srl %o5,1,%o5 324 ! remainder is positive 325 subcc %o3,%o5,%o3 326 b 9f 327 add %o2, (-7*2+1), %o2 328 329L.4.9: 330 ! remainder is negative 331 addcc %o3,%o5,%o3 332 b 9f 333 add %o2, (-7*2-1), %o2 334 335 9: 336Lend_regular_divide: 337 subcc %o4, 1, %o4 338 bge Ldivloop 339 tst %o3 340 341 bl,a Lgot_result 342 ! non-restoring fixup here (one instruction only!) 343 sub %o2, 1, %o2 344 345Lgot_result: 346 347 retl 348 mov %o2, %o0 349 350 .globl .udiv_patch 351.udiv_patch: 352 wr %g0, 0x0, %y 353 nop 354 nop 355 retl 356 udiv %o0, %o1, %o0 357 nop 358