1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic. 2 * Note: I added some stuff for use with gnupg 3 * 4 * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998, 5 * 2000, 2001, 2002, 2003 Free Software Foundation, Inc. 6 * 7 * This file is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU Library General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or (at your 10 * option) any later version. 11 * 12 * This file is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 15 * License for more details. 16 * 17 * You should have received a copy of the GNU Library General Public License 18 * along with this file; see the file COPYING.LIB. If not, write to 19 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, 20 * MA 02111-1307, USA. */ 21 22 #include <asm-generic/bitops/count_zeros.h> 23 24 /* You have to define the following before including this file: 25 * 26 * UWtype -- An unsigned type, default type for operations (typically a "word") 27 * UHWtype -- An unsigned type, at least half the size of UWtype. 28 * UDWtype -- An unsigned type, at least twice as large a UWtype 29 * W_TYPE_SIZE -- size in bits of UWtype 30 * 31 * SItype, USItype -- Signed and unsigned 32 bit types. 32 * DItype, UDItype -- Signed and unsigned 64 bit types. 33 * 34 * On a 32 bit machine UWtype should typically be USItype; 35 * on a 64 bit machine, UWtype should typically be UDItype. 36 */ 37 38 #define __BITS4 (W_TYPE_SIZE / 4) 39 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) 40 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) 41 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) 42 43 /* This is used to make sure no undesirable sharing between different libraries 44 that use this file takes place. */ 45 #ifndef __MPN 46 #define __MPN(x) __##x 47 #endif 48 49 /* Define auxiliary asm macros. 50 * 51 * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two 52 * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype 53 * word product in HIGH_PROD and LOW_PROD. 54 * 55 * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a 56 * UDWtype product. This is just a variant of umul_ppmm. 57 58 * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 59 * denominator) divides a UDWtype, composed by the UWtype integers 60 * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient 61 * in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less 62 * than DENOMINATOR for correct operation. If, in addition, the most 63 * significant bit of DENOMINATOR must be 1, then the pre-processor symbol 64 * UDIV_NEEDS_NORMALIZATION is defined to 1. 65 * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 66 * denominator). Like udiv_qrnnd but the numbers are signed. The quotient 67 * is rounded towards 0. 68 * 69 * 5) count_leading_zeros(count, x) counts the number of zero-bits from the 70 * msb to the first non-zero bit in the UWtype X. This is the number of 71 * steps X needs to be shifted left to set the msb. Undefined for X == 0, 72 * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. 73 * 74 * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts 75 * from the least significant end. 76 * 77 * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, 78 * high_addend_2, low_addend_2) adds two UWtype integers, composed by 79 * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 80 * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow 81 * (i.e. carry out) is not stored anywhere, and is lost. 82 * 83 * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, 84 * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, 85 * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and 86 * LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE 87 * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, 88 * and is lost. 89 * 90 * If any of these macros are left undefined for a particular CPU, 91 * C macros are used. */ 92 93 /* The CPUs come in alphabetical order below. 94 * 95 * Please add support for more CPUs here, or improve the current support 96 * for the CPUs below! */ 97 98 #if defined(__GNUC__) && !defined(NO_ASM) 99 100 /* We sometimes need to clobber "cc" with gcc2, but that would not be 101 understood by gcc1. Use cpp to avoid major code duplication. */ 102 #if __GNUC__ < 2 103 #define __CLOBBER_CC 104 #define __AND_CLOBBER_CC 105 #else /* __GNUC__ >= 2 */ 106 #define __CLOBBER_CC : "cc" 107 #define __AND_CLOBBER_CC , "cc" 108 #endif /* __GNUC__ < 2 */ 109 110 /*************************************** 111 ************** A29K ***************** 112 ***************************************/ 113 #if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32 114 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 115 __asm__ ("add %1,%4,%5\n" \ 116 "addc %0,%2,%3" \ 117 : "=r" ((USItype)(sh)), \ 118 "=&r" ((USItype)(sl)) \ 119 : "%r" ((USItype)(ah)), \ 120 "rI" ((USItype)(bh)), \ 121 "%r" ((USItype)(al)), \ 122 "rI" ((USItype)(bl))) 123 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 124 __asm__ ("sub %1,%4,%5\n" \ 125 "subc %0,%2,%3" \ 126 : "=r" ((USItype)(sh)), \ 127 "=&r" ((USItype)(sl)) \ 128 : "r" ((USItype)(ah)), \ 129 "rI" ((USItype)(bh)), \ 130 "r" ((USItype)(al)), \ 131 "rI" ((USItype)(bl))) 132 #define umul_ppmm(xh, xl, m0, m1) \ 133 do { \ 134 USItype __m0 = (m0), __m1 = (m1); \ 135 __asm__ ("multiplu %0,%1,%2" \ 136 : "=r" ((USItype)(xl)) \ 137 : "r" (__m0), \ 138 "r" (__m1)); \ 139 __asm__ ("multmu %0,%1,%2" \ 140 : "=r" ((USItype)(xh)) \ 141 : "r" (__m0), \ 142 "r" (__m1)); \ 143 } while (0) 144 #define udiv_qrnnd(q, r, n1, n0, d) \ 145 __asm__ ("dividu %0,%3,%4" \ 146 : "=r" ((USItype)(q)), \ 147 "=q" ((USItype)(r)) \ 148 : "1" ((USItype)(n1)), \ 149 "r" ((USItype)(n0)), \ 150 "r" ((USItype)(d))) 151 #endif /* __a29k__ */ 152 153 #if defined(__alpha) && W_TYPE_SIZE == 64 154 #define umul_ppmm(ph, pl, m0, m1) \ 155 do { \ 156 UDItype __m0 = (m0), __m1 = (m1); \ 157 __asm__ ("umulh %r1,%2,%0" \ 158 : "=r" ((UDItype) ph) \ 159 : "%rJ" (__m0), \ 160 "rI" (__m1)); \ 161 (pl) = __m0 * __m1; \ 162 } while (0) 163 #define UMUL_TIME 46 164 #ifndef LONGLONG_STANDALONE 165 #define udiv_qrnnd(q, r, n1, n0, d) \ 166 do { UDItype __r; \ 167 (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ 168 (r) = __r; \ 169 } while (0) 170 extern UDItype __udiv_qrnnd(); 171 #define UDIV_TIME 220 172 #endif /* LONGLONG_STANDALONE */ 173 #endif /* __alpha */ 174 175 /*************************************** 176 ************** ARM ****************** 177 ***************************************/ 178 #if defined(__arm__) && W_TYPE_SIZE == 32 179 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 180 __asm__ ("adds %1, %4, %5\n" \ 181 "adc %0, %2, %3" \ 182 : "=r" ((USItype)(sh)), \ 183 "=&r" ((USItype)(sl)) \ 184 : "%r" ((USItype)(ah)), \ 185 "rI" ((USItype)(bh)), \ 186 "%r" ((USItype)(al)), \ 187 "rI" ((USItype)(bl))) 188 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 189 __asm__ ("subs %1, %4, %5\n" \ 190 "sbc %0, %2, %3" \ 191 : "=r" ((USItype)(sh)), \ 192 "=&r" ((USItype)(sl)) \ 193 : "r" ((USItype)(ah)), \ 194 "rI" ((USItype)(bh)), \ 195 "r" ((USItype)(al)), \ 196 "rI" ((USItype)(bl))) 197 #if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__ 198 #define umul_ppmm(xh, xl, a, b) \ 199 __asm__ ("%@ Inlined umul_ppmm\n" \ 200 "mov %|r0, %2, lsr #16 @ AAAA\n" \ 201 "mov %|r2, %3, lsr #16 @ BBBB\n" \ 202 "bic %|r1, %2, %|r0, lsl #16 @ aaaa\n" \ 203 "bic %0, %3, %|r2, lsl #16 @ bbbb\n" \ 204 "mul %1, %|r1, %|r2 @ aaaa * BBBB\n" \ 205 "mul %|r2, %|r0, %|r2 @ AAAA * BBBB\n" \ 206 "mul %|r1, %0, %|r1 @ aaaa * bbbb\n" \ 207 "mul %0, %|r0, %0 @ AAAA * bbbb\n" \ 208 "adds %|r0, %1, %0 @ central sum\n" \ 209 "addcs %|r2, %|r2, #65536\n" \ 210 "adds %1, %|r1, %|r0, lsl #16\n" \ 211 "adc %0, %|r2, %|r0, lsr #16" \ 212 : "=&r" ((USItype)(xh)), \ 213 "=r" ((USItype)(xl)) \ 214 : "r" ((USItype)(a)), \ 215 "r" ((USItype)(b)) \ 216 : "r0", "r1", "r2") 217 #else 218 #define umul_ppmm(xh, xl, a, b) \ 219 __asm__ ("%@ Inlined umul_ppmm\n" \ 220 "umull %r1, %r0, %r2, %r3" \ 221 : "=&r" ((USItype)(xh)), \ 222 "=r" ((USItype)(xl)) \ 223 : "r" ((USItype)(a)), \ 224 "r" ((USItype)(b)) \ 225 : "r0", "r1") 226 #endif 227 #define UMUL_TIME 20 228 #define UDIV_TIME 100 229 #endif /* __arm__ */ 230 231 /*************************************** 232 ************** CLIPPER ************** 233 ***************************************/ 234 #if defined(__clipper__) && W_TYPE_SIZE == 32 235 #define umul_ppmm(w1, w0, u, v) \ 236 ({union {UDItype __ll; \ 237 struct {USItype __l, __h; } __i; \ 238 } __xx; \ 239 __asm__ ("mulwux %2,%0" \ 240 : "=r" (__xx.__ll) \ 241 : "%0" ((USItype)(u)), \ 242 "r" ((USItype)(v))); \ 243 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 244 #define smul_ppmm(w1, w0, u, v) \ 245 ({union {DItype __ll; \ 246 struct {SItype __l, __h; } __i; \ 247 } __xx; \ 248 __asm__ ("mulwx %2,%0" \ 249 : "=r" (__xx.__ll) \ 250 : "%0" ((SItype)(u)), \ 251 "r" ((SItype)(v))); \ 252 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 253 #define __umulsidi3(u, v) \ 254 ({UDItype __w; \ 255 __asm__ ("mulwux %2,%0" \ 256 : "=r" (__w) \ 257 : "%0" ((USItype)(u)), \ 258 "r" ((USItype)(v))); \ 259 __w; }) 260 #endif /* __clipper__ */ 261 262 /*************************************** 263 ************** GMICRO *************** 264 ***************************************/ 265 #if defined(__gmicro__) && W_TYPE_SIZE == 32 266 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 267 __asm__ ("add.w %5,%1\n" \ 268 "addx %3,%0" \ 269 : "=g" ((USItype)(sh)), \ 270 "=&g" ((USItype)(sl)) \ 271 : "%0" ((USItype)(ah)), \ 272 "g" ((USItype)(bh)), \ 273 "%1" ((USItype)(al)), \ 274 "g" ((USItype)(bl))) 275 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 276 __asm__ ("sub.w %5,%1\n" \ 277 "subx %3,%0" \ 278 : "=g" ((USItype)(sh)), \ 279 "=&g" ((USItype)(sl)) \ 280 : "0" ((USItype)(ah)), \ 281 "g" ((USItype)(bh)), \ 282 "1" ((USItype)(al)), \ 283 "g" ((USItype)(bl))) 284 #define umul_ppmm(ph, pl, m0, m1) \ 285 __asm__ ("mulx %3,%0,%1" \ 286 : "=g" ((USItype)(ph)), \ 287 "=r" ((USItype)(pl)) \ 288 : "%0" ((USItype)(m0)), \ 289 "g" ((USItype)(m1))) 290 #define udiv_qrnnd(q, r, nh, nl, d) \ 291 __asm__ ("divx %4,%0,%1" \ 292 : "=g" ((USItype)(q)), \ 293 "=r" ((USItype)(r)) \ 294 : "1" ((USItype)(nh)), \ 295 "0" ((USItype)(nl)), \ 296 "g" ((USItype)(d))) 297 #endif 298 299 /*************************************** 300 ************** HPPA ***************** 301 ***************************************/ 302 #if defined(__hppa) && W_TYPE_SIZE == 32 303 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 304 __asm__ ("add %4,%5,%1\n" \ 305 "addc %2,%3,%0" \ 306 : "=r" ((USItype)(sh)), \ 307 "=&r" ((USItype)(sl)) \ 308 : "%rM" ((USItype)(ah)), \ 309 "rM" ((USItype)(bh)), \ 310 "%rM" ((USItype)(al)), \ 311 "rM" ((USItype)(bl))) 312 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 313 __asm__ ("sub %4,%5,%1\n" \ 314 "subb %2,%3,%0" \ 315 : "=r" ((USItype)(sh)), \ 316 "=&r" ((USItype)(sl)) \ 317 : "rM" ((USItype)(ah)), \ 318 "rM" ((USItype)(bh)), \ 319 "rM" ((USItype)(al)), \ 320 "rM" ((USItype)(bl))) 321 #if 0 && defined(_PA_RISC1_1) 322 /* xmpyu uses floating point register which is not allowed in Linux kernel. */ 323 #define umul_ppmm(wh, wl, u, v) \ 324 do { \ 325 union {UDItype __ll; \ 326 struct {USItype __h, __l; } __i; \ 327 } __xx; \ 328 __asm__ ("xmpyu %1,%2,%0" \ 329 : "=*f" (__xx.__ll) \ 330 : "*f" ((USItype)(u)), \ 331 "*f" ((USItype)(v))); \ 332 (wh) = __xx.__i.__h; \ 333 (wl) = __xx.__i.__l; \ 334 } while (0) 335 #define UMUL_TIME 8 336 #define UDIV_TIME 60 337 #else 338 #define UMUL_TIME 40 339 #define UDIV_TIME 80 340 #endif 341 #if 0 /* #ifndef LONGLONG_STANDALONE */ 342 #define udiv_qrnnd(q, r, n1, n0, d) \ 343 do { USItype __r; \ 344 (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ 345 (r) = __r; \ 346 } while (0) 347 extern USItype __udiv_qrnnd(); 348 #endif /* LONGLONG_STANDALONE */ 349 #endif /* hppa */ 350 351 /*************************************** 352 ************** I370 ***************** 353 ***************************************/ 354 #if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32 355 #define umul_ppmm(xh, xl, m0, m1) \ 356 do { \ 357 union {UDItype __ll; \ 358 struct {USItype __h, __l; } __i; \ 359 } __xx; \ 360 USItype __m0 = (m0), __m1 = (m1); \ 361 __asm__ ("mr %0,%3" \ 362 : "=r" (__xx.__i.__h), \ 363 "=r" (__xx.__i.__l) \ 364 : "%1" (__m0), \ 365 "r" (__m1)); \ 366 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 367 (xh) += ((((SItype) __m0 >> 31) & __m1) \ 368 + (((SItype) __m1 >> 31) & __m0)); \ 369 } while (0) 370 #define smul_ppmm(xh, xl, m0, m1) \ 371 do { \ 372 union {DItype __ll; \ 373 struct {USItype __h, __l; } __i; \ 374 } __xx; \ 375 __asm__ ("mr %0,%3" \ 376 : "=r" (__xx.__i.__h), \ 377 "=r" (__xx.__i.__l) \ 378 : "%1" (m0), \ 379 "r" (m1)); \ 380 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 381 } while (0) 382 #define sdiv_qrnnd(q, r, n1, n0, d) \ 383 do { \ 384 union {DItype __ll; \ 385 struct {USItype __h, __l; } __i; \ 386 } __xx; \ 387 __xx.__i.__h = n1; __xx.__i.__l = n0; \ 388 __asm__ ("dr %0,%2" \ 389 : "=r" (__xx.__ll) \ 390 : "0" (__xx.__ll), "r" (d)); \ 391 (q) = __xx.__i.__l; (r) = __xx.__i.__h; \ 392 } while (0) 393 #endif 394 395 /*************************************** 396 ************** I386 ***************** 397 ***************************************/ 398 #undef __i386__ 399 #if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32 400 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 401 __asm__ ("addl %5,%1\n" \ 402 "adcl %3,%0" \ 403 : "=r" ((USItype)(sh)), \ 404 "=&r" ((USItype)(sl)) \ 405 : "%0" ((USItype)(ah)), \ 406 "g" ((USItype)(bh)), \ 407 "%1" ((USItype)(al)), \ 408 "g" ((USItype)(bl))) 409 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 410 __asm__ ("subl %5,%1\n" \ 411 "sbbl %3,%0" \ 412 : "=r" ((USItype)(sh)), \ 413 "=&r" ((USItype)(sl)) \ 414 : "0" ((USItype)(ah)), \ 415 "g" ((USItype)(bh)), \ 416 "1" ((USItype)(al)), \ 417 "g" ((USItype)(bl))) 418 #define umul_ppmm(w1, w0, u, v) \ 419 __asm__ ("mull %3" \ 420 : "=a" ((USItype)(w0)), \ 421 "=d" ((USItype)(w1)) \ 422 : "%0" ((USItype)(u)), \ 423 "rm" ((USItype)(v))) 424 #define udiv_qrnnd(q, r, n1, n0, d) \ 425 __asm__ ("divl %4" \ 426 : "=a" ((USItype)(q)), \ 427 "=d" ((USItype)(r)) \ 428 : "0" ((USItype)(n0)), \ 429 "1" ((USItype)(n1)), \ 430 "rm" ((USItype)(d))) 431 #ifndef UMUL_TIME 432 #define UMUL_TIME 40 433 #endif 434 #ifndef UDIV_TIME 435 #define UDIV_TIME 40 436 #endif 437 #endif /* 80x86 */ 438 439 /*************************************** 440 ************** I860 ***************** 441 ***************************************/ 442 #if defined(__i860__) && W_TYPE_SIZE == 32 443 #define rshift_rhlc(r, h, l, c) \ 444 __asm__ ("shr %3,r0,r0\n" \ 445 "shrd %1,%2,%0" \ 446 "=r" (r) : "r" (h), "r" (l), "rn" (c)) 447 #endif /* i860 */ 448 449 /*************************************** 450 ************** I960 ***************** 451 ***************************************/ 452 #if defined(__i960__) && W_TYPE_SIZE == 32 453 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 454 __asm__ ("cmpo 1,0\n" \ 455 "addc %5,%4,%1\n" \ 456 "addc %3,%2,%0" \ 457 : "=r" ((USItype)(sh)), \ 458 "=&r" ((USItype)(sl)) \ 459 : "%dI" ((USItype)(ah)), \ 460 "dI" ((USItype)(bh)), \ 461 "%dI" ((USItype)(al)), \ 462 "dI" ((USItype)(bl))) 463 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 464 __asm__ ("cmpo 0,0\n" \ 465 "subc %5,%4,%1\n" \ 466 "subc %3,%2,%0" \ 467 : "=r" ((USItype)(sh)), \ 468 "=&r" ((USItype)(sl)) \ 469 : "dI" ((USItype)(ah)), \ 470 "dI" ((USItype)(bh)), \ 471 "dI" ((USItype)(al)), \ 472 "dI" ((USItype)(bl))) 473 #define umul_ppmm(w1, w0, u, v) \ 474 ({union {UDItype __ll; \ 475 struct {USItype __l, __h; } __i; \ 476 } __xx; \ 477 __asm__ ("emul %2,%1,%0" \ 478 : "=d" (__xx.__ll) \ 479 : "%dI" ((USItype)(u)), \ 480 "dI" ((USItype)(v))); \ 481 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 482 #define __umulsidi3(u, v) \ 483 ({UDItype __w; \ 484 __asm__ ("emul %2,%1,%0" \ 485 : "=d" (__w) \ 486 : "%dI" ((USItype)(u)), \ 487 "dI" ((USItype)(v))); \ 488 __w; }) 489 #define udiv_qrnnd(q, r, nh, nl, d) \ 490 do { \ 491 union {UDItype __ll; \ 492 struct {USItype __l, __h; } __i; \ 493 } __nn; \ 494 __nn.__i.__h = (nh); __nn.__i.__l = (nl); \ 495 __asm__ ("ediv %d,%n,%0" \ 496 : "=d" (__rq.__ll) \ 497 : "dI" (__nn.__ll), \ 498 "dI" ((USItype)(d))); \ 499 (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ 500 } while (0) 501 #if defined(__i960mx) /* what is the proper symbol to test??? */ 502 #define rshift_rhlc(r, h, l, c) \ 503 do { \ 504 union {UDItype __ll; \ 505 struct {USItype __l, __h; } __i; \ 506 } __nn; \ 507 __nn.__i.__h = (h); __nn.__i.__l = (l); \ 508 __asm__ ("shre %2,%1,%0" \ 509 : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \ 510 } 511 #endif /* i960mx */ 512 #endif /* i960 */ 513 514 /*************************************** 515 ************** 68000 **************** 516 ***************************************/ 517 #if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32 518 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 519 __asm__ ("add%.l %5,%1\n" \ 520 "addx%.l %3,%0" \ 521 : "=d" ((USItype)(sh)), \ 522 "=&d" ((USItype)(sl)) \ 523 : "%0" ((USItype)(ah)), \ 524 "d" ((USItype)(bh)), \ 525 "%1" ((USItype)(al)), \ 526 "g" ((USItype)(bl))) 527 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 528 __asm__ ("sub%.l %5,%1\n" \ 529 "subx%.l %3,%0" \ 530 : "=d" ((USItype)(sh)), \ 531 "=&d" ((USItype)(sl)) \ 532 : "0" ((USItype)(ah)), \ 533 "d" ((USItype)(bh)), \ 534 "1" ((USItype)(al)), \ 535 "g" ((USItype)(bl))) 536 #if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) 537 #define umul_ppmm(w1, w0, u, v) \ 538 __asm__ ("mulu%.l %3,%1:%0" \ 539 : "=d" ((USItype)(w0)), \ 540 "=d" ((USItype)(w1)) \ 541 : "%0" ((USItype)(u)), \ 542 "dmi" ((USItype)(v))) 543 #define UMUL_TIME 45 544 #define udiv_qrnnd(q, r, n1, n0, d) \ 545 __asm__ ("divu%.l %4,%1:%0" \ 546 : "=d" ((USItype)(q)), \ 547 "=d" ((USItype)(r)) \ 548 : "0" ((USItype)(n0)), \ 549 "1" ((USItype)(n1)), \ 550 "dmi" ((USItype)(d))) 551 #define UDIV_TIME 90 552 #define sdiv_qrnnd(q, r, n1, n0, d) \ 553 __asm__ ("divs%.l %4,%1:%0" \ 554 : "=d" ((USItype)(q)), \ 555 "=d" ((USItype)(r)) \ 556 : "0" ((USItype)(n0)), \ 557 "1" ((USItype)(n1)), \ 558 "dmi" ((USItype)(d))) 559 #else /* not mc68020 */ 560 #define umul_ppmm(xh, xl, a, b) \ 561 do { USItype __umul_tmp1, __umul_tmp2; \ 562 __asm__ ("| Inlined umul_ppmm\n" \ 563 "move%.l %5,%3\n" \ 564 "move%.l %2,%0\n" \ 565 "move%.w %3,%1\n" \ 566 "swap %3\n" \ 567 "swap %0\n" \ 568 "mulu %2,%1\n" \ 569 "mulu %3,%0\n" \ 570 "mulu %2,%3\n" \ 571 "swap %2\n" \ 572 "mulu %5,%2\n" \ 573 "add%.l %3,%2\n" \ 574 "jcc 1f\n" \ 575 "add%.l %#0x10000,%0\n" \ 576 "1: move%.l %2,%3\n" \ 577 "clr%.w %2\n" \ 578 "swap %2\n" \ 579 "swap %3\n" \ 580 "clr%.w %3\n" \ 581 "add%.l %3,%1\n" \ 582 "addx%.l %2,%0\n" \ 583 "| End inlined umul_ppmm" \ 584 : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ 585 "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ 586 : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ 587 } while (0) 588 #define UMUL_TIME 100 589 #define UDIV_TIME 400 590 #endif /* not mc68020 */ 591 #endif /* mc68000 */ 592 593 /*************************************** 594 ************** 88000 **************** 595 ***************************************/ 596 #if defined(__m88000__) && W_TYPE_SIZE == 32 597 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 598 __asm__ ("addu.co %1,%r4,%r5\n" \ 599 "addu.ci %0,%r2,%r3" \ 600 : "=r" ((USItype)(sh)), \ 601 "=&r" ((USItype)(sl)) \ 602 : "%rJ" ((USItype)(ah)), \ 603 "rJ" ((USItype)(bh)), \ 604 "%rJ" ((USItype)(al)), \ 605 "rJ" ((USItype)(bl))) 606 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 607 __asm__ ("subu.co %1,%r4,%r5\n" \ 608 "subu.ci %0,%r2,%r3" \ 609 : "=r" ((USItype)(sh)), \ 610 "=&r" ((USItype)(sl)) \ 611 : "rJ" ((USItype)(ah)), \ 612 "rJ" ((USItype)(bh)), \ 613 "rJ" ((USItype)(al)), \ 614 "rJ" ((USItype)(bl))) 615 #if defined(__m88110__) 616 #define umul_ppmm(wh, wl, u, v) \ 617 do { \ 618 union {UDItype __ll; \ 619 struct {USItype __h, __l; } __i; \ 620 } __x; \ 621 __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \ 622 (wh) = __x.__i.__h; \ 623 (wl) = __x.__i.__l; \ 624 } while (0) 625 #define udiv_qrnnd(q, r, n1, n0, d) \ 626 ({union {UDItype __ll; \ 627 struct {USItype __h, __l; } __i; \ 628 } __x, __q; \ 629 __x.__i.__h = (n1); __x.__i.__l = (n0); \ 630 __asm__ ("divu.d %0,%1,%2" \ 631 : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ 632 (r) = (n0) - __q.__l * (d); (q) = __q.__l; }) 633 #define UMUL_TIME 5 634 #define UDIV_TIME 25 635 #else 636 #define UMUL_TIME 17 637 #define UDIV_TIME 150 638 #endif /* __m88110__ */ 639 #endif /* __m88000__ */ 640 641 /*************************************** 642 ************** MIPS ***************** 643 ***************************************/ 644 #if defined(__mips__) && W_TYPE_SIZE == 32 645 #if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4 646 #define umul_ppmm(w1, w0, u, v) \ 647 do { \ 648 UDItype __ll = (UDItype)(u) * (v); \ 649 w1 = __ll >> 32; \ 650 w0 = __ll; \ 651 } while (0) 652 #elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7 653 #define umul_ppmm(w1, w0, u, v) \ 654 __asm__ ("multu %2,%3" \ 655 : "=l" ((USItype)(w0)), \ 656 "=h" ((USItype)(w1)) \ 657 : "d" ((USItype)(u)), \ 658 "d" ((USItype)(v))) 659 #else 660 #define umul_ppmm(w1, w0, u, v) \ 661 __asm__ ("multu %2,%3\n" \ 662 "mflo %0\n" \ 663 "mfhi %1" \ 664 : "=d" ((USItype)(w0)), \ 665 "=d" ((USItype)(w1)) \ 666 : "d" ((USItype)(u)), \ 667 "d" ((USItype)(v))) 668 #endif 669 #define UMUL_TIME 10 670 #define UDIV_TIME 100 671 #endif /* __mips__ */ 672 673 /*************************************** 674 ************** MIPS/64 ************** 675 ***************************************/ 676 #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 677 #if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4 678 #define umul_ppmm(w1, w0, u, v) \ 679 do { \ 680 typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ 681 __ll_UTItype __ll = (__ll_UTItype)(u) * (v); \ 682 w1 = __ll >> 64; \ 683 w0 = __ll; \ 684 } while (0) 685 #elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7 686 #define umul_ppmm(w1, w0, u, v) \ 687 __asm__ ("dmultu %2,%3" \ 688 : "=l" ((UDItype)(w0)), \ 689 "=h" ((UDItype)(w1)) \ 690 : "d" ((UDItype)(u)), \ 691 "d" ((UDItype)(v))) 692 #else 693 #define umul_ppmm(w1, w0, u, v) \ 694 __asm__ ("dmultu %2,%3\n" \ 695 "mflo %0\n" \ 696 "mfhi %1" \ 697 : "=d" ((UDItype)(w0)), \ 698 "=d" ((UDItype)(w1)) \ 699 : "d" ((UDItype)(u)), \ 700 "d" ((UDItype)(v))) 701 #endif 702 #define UMUL_TIME 20 703 #define UDIV_TIME 140 704 #endif /* __mips__ */ 705 706 /*************************************** 707 ************** 32000 **************** 708 ***************************************/ 709 #if defined(__ns32000__) && W_TYPE_SIZE == 32 710 #define umul_ppmm(w1, w0, u, v) \ 711 ({union {UDItype __ll; \ 712 struct {USItype __l, __h; } __i; \ 713 } __xx; \ 714 __asm__ ("meid %2,%0" \ 715 : "=g" (__xx.__ll) \ 716 : "%0" ((USItype)(u)), \ 717 "g" ((USItype)(v))); \ 718 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 719 #define __umulsidi3(u, v) \ 720 ({UDItype __w; \ 721 __asm__ ("meid %2,%0" \ 722 : "=g" (__w) \ 723 : "%0" ((USItype)(u)), \ 724 "g" ((USItype)(v))); \ 725 __w; }) 726 #define udiv_qrnnd(q, r, n1, n0, d) \ 727 ({union {UDItype __ll; \ 728 struct {USItype __l, __h; } __i; \ 729 } __xx; \ 730 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ 731 __asm__ ("deid %2,%0" \ 732 : "=g" (__xx.__ll) \ 733 : "0" (__xx.__ll), \ 734 "g" ((USItype)(d))); \ 735 (r) = __xx.__i.__l; (q) = __xx.__i.__h; }) 736 #endif /* __ns32000__ */ 737 738 /*************************************** 739 ************** PPC ****************** 740 ***************************************/ 741 #if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32 742 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 743 do { \ 744 if (__builtin_constant_p(bh) && (bh) == 0) \ 745 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ 746 : "=r" ((USItype)(sh)), \ 747 "=&r" ((USItype)(sl)) \ 748 : "%r" ((USItype)(ah)), \ 749 "%r" ((USItype)(al)), \ 750 "rI" ((USItype)(bl))); \ 751 else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ 752 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ 753 : "=r" ((USItype)(sh)), \ 754 "=&r" ((USItype)(sl)) \ 755 : "%r" ((USItype)(ah)), \ 756 "%r" ((USItype)(al)), \ 757 "rI" ((USItype)(bl))); \ 758 else \ 759 __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ 760 : "=r" ((USItype)(sh)), \ 761 "=&r" ((USItype)(sl)) \ 762 : "%r" ((USItype)(ah)), \ 763 "r" ((USItype)(bh)), \ 764 "%r" ((USItype)(al)), \ 765 "rI" ((USItype)(bl))); \ 766 } while (0) 767 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 768 do { \ 769 if (__builtin_constant_p(ah) && (ah) == 0) \ 770 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ 771 : "=r" ((USItype)(sh)), \ 772 "=&r" ((USItype)(sl)) \ 773 : "r" ((USItype)(bh)), \ 774 "rI" ((USItype)(al)), \ 775 "r" ((USItype)(bl))); \ 776 else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \ 777 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ 778 : "=r" ((USItype)(sh)), \ 779 "=&r" ((USItype)(sl)) \ 780 : "r" ((USItype)(bh)), \ 781 "rI" ((USItype)(al)), \ 782 "r" ((USItype)(bl))); \ 783 else if (__builtin_constant_p(bh) && (bh) == 0) \ 784 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ 785 : "=r" ((USItype)(sh)), \ 786 "=&r" ((USItype)(sl)) \ 787 : "r" ((USItype)(ah)), \ 788 "rI" ((USItype)(al)), \ 789 "r" ((USItype)(bl))); \ 790 else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ 791 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ 792 : "=r" ((USItype)(sh)), \ 793 "=&r" ((USItype)(sl)) \ 794 : "r" ((USItype)(ah)), \ 795 "rI" ((USItype)(al)), \ 796 "r" ((USItype)(bl))); \ 797 else \ 798 __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ 799 : "=r" ((USItype)(sh)), \ 800 "=&r" ((USItype)(sl)) \ 801 : "r" ((USItype)(ah)), \ 802 "r" ((USItype)(bh)), \ 803 "rI" ((USItype)(al)), \ 804 "r" ((USItype)(bl))); \ 805 } while (0) 806 #if defined(_ARCH_PPC) 807 #define umul_ppmm(ph, pl, m0, m1) \ 808 do { \ 809 USItype __m0 = (m0), __m1 = (m1); \ 810 __asm__ ("mulhwu %0,%1,%2" \ 811 : "=r" ((USItype) ph) \ 812 : "%r" (__m0), \ 813 "r" (__m1)); \ 814 (pl) = __m0 * __m1; \ 815 } while (0) 816 #define UMUL_TIME 15 817 #define smul_ppmm(ph, pl, m0, m1) \ 818 do { \ 819 SItype __m0 = (m0), __m1 = (m1); \ 820 __asm__ ("mulhw %0,%1,%2" \ 821 : "=r" ((SItype) ph) \ 822 : "%r" (__m0), \ 823 "r" (__m1)); \ 824 (pl) = __m0 * __m1; \ 825 } while (0) 826 #define SMUL_TIME 14 827 #define UDIV_TIME 120 828 #else 829 #define umul_ppmm(xh, xl, m0, m1) \ 830 do { \ 831 USItype __m0 = (m0), __m1 = (m1); \ 832 __asm__ ("mul %0,%2,%3" \ 833 : "=r" ((USItype)(xh)), \ 834 "=q" ((USItype)(xl)) \ 835 : "r" (__m0), \ 836 "r" (__m1)); \ 837 (xh) += ((((SItype) __m0 >> 31) & __m1) \ 838 + (((SItype) __m1 >> 31) & __m0)); \ 839 } while (0) 840 #define UMUL_TIME 8 841 #define smul_ppmm(xh, xl, m0, m1) \ 842 __asm__ ("mul %0,%2,%3" \ 843 : "=r" ((SItype)(xh)), \ 844 "=q" ((SItype)(xl)) \ 845 : "r" (m0), \ 846 "r" (m1)) 847 #define SMUL_TIME 4 848 #define sdiv_qrnnd(q, r, nh, nl, d) \ 849 __asm__ ("div %0,%2,%4" \ 850 : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \ 851 : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d))) 852 #define UDIV_TIME 100 853 #endif 854 #endif /* Power architecture variants. */ 855 856 /*************************************** 857 ************** PYR ****************** 858 ***************************************/ 859 #if defined(__pyr__) && W_TYPE_SIZE == 32 860 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 861 __asm__ ("addw %5,%1\n" \ 862 "addwc %3,%0" \ 863 : "=r" ((USItype)(sh)), \ 864 "=&r" ((USItype)(sl)) \ 865 : "%0" ((USItype)(ah)), \ 866 "g" ((USItype)(bh)), \ 867 "%1" ((USItype)(al)), \ 868 "g" ((USItype)(bl))) 869 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 870 __asm__ ("subw %5,%1\n" \ 871 "subwb %3,%0" \ 872 : "=r" ((USItype)(sh)), \ 873 "=&r" ((USItype)(sl)) \ 874 : "0" ((USItype)(ah)), \ 875 "g" ((USItype)(bh)), \ 876 "1" ((USItype)(al)), \ 877 "g" ((USItype)(bl))) 878 /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ 879 #define umul_ppmm(w1, w0, u, v) \ 880 ({union {UDItype __ll; \ 881 struct {USItype __h, __l; } __i; \ 882 } __xx; \ 883 __asm__ ("movw %1,%R0\n" \ 884 "uemul %2,%0" \ 885 : "=&r" (__xx.__ll) \ 886 : "g" ((USItype) (u)), \ 887 "g" ((USItype)(v))); \ 888 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 889 #endif /* __pyr__ */ 890 891 /*************************************** 892 ************** RT/ROMP ************** 893 ***************************************/ 894 #if defined(__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 895 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 896 __asm__ ("a %1,%5\n" \ 897 "ae %0,%3" \ 898 : "=r" ((USItype)(sh)), \ 899 "=&r" ((USItype)(sl)) \ 900 : "%0" ((USItype)(ah)), \ 901 "r" ((USItype)(bh)), \ 902 "%1" ((USItype)(al)), \ 903 "r" ((USItype)(bl))) 904 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 905 __asm__ ("s %1,%5\n" \ 906 "se %0,%3" \ 907 : "=r" ((USItype)(sh)), \ 908 "=&r" ((USItype)(sl)) \ 909 : "0" ((USItype)(ah)), \ 910 "r" ((USItype)(bh)), \ 911 "1" ((USItype)(al)), \ 912 "r" ((USItype)(bl))) 913 #define umul_ppmm(ph, pl, m0, m1) \ 914 do { \ 915 USItype __m0 = (m0), __m1 = (m1); \ 916 __asm__ ( \ 917 "s r2,r2\n" \ 918 "mts r10,%2\n" \ 919 "m r2,%3\n" \ 920 "m r2,%3\n" \ 921 "m r2,%3\n" \ 922 "m r2,%3\n" \ 923 "m r2,%3\n" \ 924 "m r2,%3\n" \ 925 "m r2,%3\n" \ 926 "m r2,%3\n" \ 927 "m r2,%3\n" \ 928 "m r2,%3\n" \ 929 "m r2,%3\n" \ 930 "m r2,%3\n" \ 931 "m r2,%3\n" \ 932 "m r2,%3\n" \ 933 "m r2,%3\n" \ 934 "m r2,%3\n" \ 935 "cas %0,r2,r0\n" \ 936 "mfs r10,%1" \ 937 : "=r" ((USItype)(ph)), \ 938 "=r" ((USItype)(pl)) \ 939 : "%r" (__m0), \ 940 "r" (__m1) \ 941 : "r2"); \ 942 (ph) += ((((SItype) __m0 >> 31) & __m1) \ 943 + (((SItype) __m1 >> 31) & __m0)); \ 944 } while (0) 945 #define UMUL_TIME 20 946 #define UDIV_TIME 200 947 #endif /* RT/ROMP */ 948 949 /*************************************** 950 ************** SH2 ****************** 951 ***************************************/ 952 #if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \ 953 && W_TYPE_SIZE == 32 954 #define umul_ppmm(w1, w0, u, v) \ 955 __asm__ ( \ 956 "dmulu.l %2,%3\n" \ 957 "sts macl,%1\n" \ 958 "sts mach,%0" \ 959 : "=r" ((USItype)(w1)), \ 960 "=r" ((USItype)(w0)) \ 961 : "r" ((USItype)(u)), \ 962 "r" ((USItype)(v)) \ 963 : "macl", "mach") 964 #define UMUL_TIME 5 965 #endif 966 967 /*************************************** 968 ************** SPARC **************** 969 ***************************************/ 970 #if defined(__sparc__) && W_TYPE_SIZE == 32 971 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 972 __asm__ ("addcc %r4,%5,%1\n" \ 973 "addx %r2,%3,%0" \ 974 : "=r" ((USItype)(sh)), \ 975 "=&r" ((USItype)(sl)) \ 976 : "%rJ" ((USItype)(ah)), \ 977 "rI" ((USItype)(bh)), \ 978 "%rJ" ((USItype)(al)), \ 979 "rI" ((USItype)(bl)) \ 980 __CLOBBER_CC) 981 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 982 __asm__ ("subcc %r4,%5,%1\n" \ 983 "subx %r2,%3,%0" \ 984 : "=r" ((USItype)(sh)), \ 985 "=&r" ((USItype)(sl)) \ 986 : "rJ" ((USItype)(ah)), \ 987 "rI" ((USItype)(bh)), \ 988 "rJ" ((USItype)(al)), \ 989 "rI" ((USItype)(bl)) \ 990 __CLOBBER_CC) 991 #if defined(__sparc_v8__) 992 /* Don't match immediate range because, 1) it is not often useful, 993 2) the 'I' flag thinks of the range as a 13 bit signed interval, 994 while we want to match a 13 bit interval, sign extended to 32 bits, 995 but INTERPRETED AS UNSIGNED. */ 996 #define umul_ppmm(w1, w0, u, v) \ 997 __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 998 : "=r" ((USItype)(w1)), \ 999 "=r" ((USItype)(w0)) \ 1000 : "r" ((USItype)(u)), \ 1001 "r" ((USItype)(v))) 1002 #define UMUL_TIME 5 1003 #ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */ 1004 #define udiv_qrnnd(q, r, n1, n0, d) \ 1005 do { \ 1006 USItype __q; \ 1007 __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ 1008 : "=r" ((USItype)(__q)) \ 1009 : "r" ((USItype)(n1)), \ 1010 "r" ((USItype)(n0)), \ 1011 "r" ((USItype)(d))); \ 1012 (r) = (n0) - __q * (d); \ 1013 (q) = __q; \ 1014 } while (0) 1015 #define UDIV_TIME 25 1016 #endif /* SUPERSPARC */ 1017 #else /* ! __sparc_v8__ */ 1018 #if defined(__sparclite__) 1019 /* This has hardware multiply but not divide. It also has two additional 1020 instructions scan (ffs from high bit) and divscc. */ 1021 #define umul_ppmm(w1, w0, u, v) \ 1022 __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 1023 : "=r" ((USItype)(w1)), \ 1024 "=r" ((USItype)(w0)) \ 1025 : "r" ((USItype)(u)), \ 1026 "r" ((USItype)(v))) 1027 #define UMUL_TIME 5 1028 #define udiv_qrnnd(q, r, n1, n0, d) \ 1029 __asm__ ("! Inlined udiv_qrnnd\n" \ 1030 "wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ 1031 "tst %%g0\n" \ 1032 "divscc %3,%4,%%g1\n" \ 1033 "divscc %%g1,%4,%%g1\n" \ 1034 "divscc %%g1,%4,%%g1\n" \ 1035 "divscc %%g1,%4,%%g1\n" \ 1036 "divscc %%g1,%4,%%g1\n" \ 1037 "divscc %%g1,%4,%%g1\n" \ 1038 "divscc %%g1,%4,%%g1\n" \ 1039 "divscc %%g1,%4,%%g1\n" \ 1040 "divscc %%g1,%4,%%g1\n" \ 1041 "divscc %%g1,%4,%%g1\n" \ 1042 "divscc %%g1,%4,%%g1\n" \ 1043 "divscc %%g1,%4,%%g1\n" \ 1044 "divscc %%g1,%4,%%g1\n" \ 1045 "divscc %%g1,%4,%%g1\n" \ 1046 "divscc %%g1,%4,%%g1\n" \ 1047 "divscc %%g1,%4,%%g1\n" \ 1048 "divscc %%g1,%4,%%g1\n" \ 1049 "divscc %%g1,%4,%%g1\n" \ 1050 "divscc %%g1,%4,%%g1\n" \ 1051 "divscc %%g1,%4,%%g1\n" \ 1052 "divscc %%g1,%4,%%g1\n" \ 1053 "divscc %%g1,%4,%%g1\n" \ 1054 "divscc %%g1,%4,%%g1\n" \ 1055 "divscc %%g1,%4,%%g1\n" \ 1056 "divscc %%g1,%4,%%g1\n" \ 1057 "divscc %%g1,%4,%%g1\n" \ 1058 "divscc %%g1,%4,%%g1\n" \ 1059 "divscc %%g1,%4,%%g1\n" \ 1060 "divscc %%g1,%4,%%g1\n" \ 1061 "divscc %%g1,%4,%%g1\n" \ 1062 "divscc %%g1,%4,%%g1\n" \ 1063 "divscc %%g1,%4,%0\n" \ 1064 "rd %%y,%1\n" \ 1065 "bl,a 1f\n" \ 1066 "add %1,%4,%1\n" \ 1067 "1: ! End of inline udiv_qrnnd" \ 1068 : "=r" ((USItype)(q)), \ 1069 "=r" ((USItype)(r)) \ 1070 : "r" ((USItype)(n1)), \ 1071 "r" ((USItype)(n0)), \ 1072 "rI" ((USItype)(d)) \ 1073 : "%g1" __AND_CLOBBER_CC) 1074 #define UDIV_TIME 37 1075 #endif /* __sparclite__ */ 1076 #endif /* __sparc_v8__ */ 1077 /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ 1078 #ifndef umul_ppmm 1079 #define umul_ppmm(w1, w0, u, v) \ 1080 __asm__ ("! Inlined umul_ppmm\n" \ 1081 "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \ 1082 "sra %3,31,%%g2 ! Don't move this insn\n" \ 1083 "and %2,%%g2,%%g2 ! Don't move this insn\n" \ 1084 "andcc %%g0,0,%%g1 ! Don't move this insn\n" \ 1085 "mulscc %%g1,%3,%%g1\n" \ 1086 "mulscc %%g1,%3,%%g1\n" \ 1087 "mulscc %%g1,%3,%%g1\n" \ 1088 "mulscc %%g1,%3,%%g1\n" \ 1089 "mulscc %%g1,%3,%%g1\n" \ 1090 "mulscc %%g1,%3,%%g1\n" \ 1091 "mulscc %%g1,%3,%%g1\n" \ 1092 "mulscc %%g1,%3,%%g1\n" \ 1093 "mulscc %%g1,%3,%%g1\n" \ 1094 "mulscc %%g1,%3,%%g1\n" \ 1095 "mulscc %%g1,%3,%%g1\n" \ 1096 "mulscc %%g1,%3,%%g1\n" \ 1097 "mulscc %%g1,%3,%%g1\n" \ 1098 "mulscc %%g1,%3,%%g1\n" \ 1099 "mulscc %%g1,%3,%%g1\n" \ 1100 "mulscc %%g1,%3,%%g1\n" \ 1101 "mulscc %%g1,%3,%%g1\n" \ 1102 "mulscc %%g1,%3,%%g1\n" \ 1103 "mulscc %%g1,%3,%%g1\n" \ 1104 "mulscc %%g1,%3,%%g1\n" \ 1105 "mulscc %%g1,%3,%%g1\n" \ 1106 "mulscc %%g1,%3,%%g1\n" \ 1107 "mulscc %%g1,%3,%%g1\n" \ 1108 "mulscc %%g1,%3,%%g1\n" \ 1109 "mulscc %%g1,%3,%%g1\n" \ 1110 "mulscc %%g1,%3,%%g1\n" \ 1111 "mulscc %%g1,%3,%%g1\n" \ 1112 "mulscc %%g1,%3,%%g1\n" \ 1113 "mulscc %%g1,%3,%%g1\n" \ 1114 "mulscc %%g1,%3,%%g1\n" \ 1115 "mulscc %%g1,%3,%%g1\n" \ 1116 "mulscc %%g1,%3,%%g1\n" \ 1117 "mulscc %%g1,0,%%g1\n" \ 1118 "add %%g1,%%g2,%0\n" \ 1119 "rd %%y,%1" \ 1120 : "=r" ((USItype)(w1)), \ 1121 "=r" ((USItype)(w0)) \ 1122 : "%rI" ((USItype)(u)), \ 1123 "r" ((USItype)(v)) \ 1124 : "%g1", "%g2" __AND_CLOBBER_CC) 1125 #define UMUL_TIME 39 /* 39 instructions */ 1126 /* It's quite necessary to add this much assembler for the sparc. 1127 The default udiv_qrnnd (in C) is more than 10 times slower! */ 1128 #define udiv_qrnnd(q, r, n1, n0, d) \ 1129 __asm__ ("! Inlined udiv_qrnnd\n\t" \ 1130 "mov 32,%%g1\n\t" \ 1131 "subcc %1,%2,%%g0\n\t" \ 1132 "1: bcs 5f\n\t" \ 1133 "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ 1134 "sub %1,%2,%1 ! this kills msb of n\n\t" \ 1135 "addx %1,%1,%1 ! so this can't give carry\n\t" \ 1136 "subcc %%g1,1,%%g1\n\t" \ 1137 "2: bne 1b\n\t" \ 1138 "subcc %1,%2,%%g0\n\t" \ 1139 "bcs 3f\n\t" \ 1140 "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ 1141 "b 3f\n\t" \ 1142 "sub %1,%2,%1 ! this kills msb of n\n\t" \ 1143 "4: sub %1,%2,%1\n\t" \ 1144 "5: addxcc %1,%1,%1\n\t" \ 1145 "bcc 2b\n\t" \ 1146 "subcc %%g1,1,%%g1\n\t" \ 1147 "! Got carry from n. Subtract next step to cancel this carry.\n\t" \ 1148 "bne 4b\n\t" \ 1149 "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \ 1150 "sub %1,%2,%1\n\t" \ 1151 "3: xnor %0,0,%0\n\t" \ 1152 "! End of inline udiv_qrnnd\n" \ 1153 : "=&r" ((USItype)(q)), \ 1154 "=&r" ((USItype)(r)) \ 1155 : "r" ((USItype)(d)), \ 1156 "1" ((USItype)(n1)), \ 1157 "0" ((USItype)(n0)) : "%g1", "cc") 1158 #define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */ 1159 #endif 1160 #endif /* __sparc__ */ 1161 1162 /*************************************** 1163 ************** VAX ****************** 1164 ***************************************/ 1165 #if defined(__vax__) && W_TYPE_SIZE == 32 1166 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1167 __asm__ ("addl2 %5,%1\n" \ 1168 "adwc %3,%0" \ 1169 : "=g" ((USItype)(sh)), \ 1170 "=&g" ((USItype)(sl)) \ 1171 : "%0" ((USItype)(ah)), \ 1172 "g" ((USItype)(bh)), \ 1173 "%1" ((USItype)(al)), \ 1174 "g" ((USItype)(bl))) 1175 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1176 __asm__ ("subl2 %5,%1\n" \ 1177 "sbwc %3,%0" \ 1178 : "=g" ((USItype)(sh)), \ 1179 "=&g" ((USItype)(sl)) \ 1180 : "0" ((USItype)(ah)), \ 1181 "g" ((USItype)(bh)), \ 1182 "1" ((USItype)(al)), \ 1183 "g" ((USItype)(bl))) 1184 #define umul_ppmm(xh, xl, m0, m1) \ 1185 do { \ 1186 union {UDItype __ll; \ 1187 struct {USItype __l, __h; } __i; \ 1188 } __xx; \ 1189 USItype __m0 = (m0), __m1 = (m1); \ 1190 __asm__ ("emul %1,%2,$0,%0" \ 1191 : "=g" (__xx.__ll) \ 1192 : "g" (__m0), \ 1193 "g" (__m1)); \ 1194 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 1195 (xh) += ((((SItype) __m0 >> 31) & __m1) \ 1196 + (((SItype) __m1 >> 31) & __m0)); \ 1197 } while (0) 1198 #define sdiv_qrnnd(q, r, n1, n0, d) \ 1199 do { \ 1200 union {DItype __ll; \ 1201 struct {SItype __l, __h; } __i; \ 1202 } __xx; \ 1203 __xx.__i.__h = n1; __xx.__i.__l = n0; \ 1204 __asm__ ("ediv %3,%2,%0,%1" \ 1205 : "=g" (q), "=g" (r) \ 1206 : "g" (__xx.__ll), "g" (d)); \ 1207 } while (0) 1208 #endif /* __vax__ */ 1209 1210 /*************************************** 1211 ************** Z8000 **************** 1212 ***************************************/ 1213 #if defined(__z8000__) && W_TYPE_SIZE == 16 1214 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1215 __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ 1216 : "=r" ((unsigned int)(sh)), \ 1217 "=&r" ((unsigned int)(sl)) \ 1218 : "%0" ((unsigned int)(ah)), \ 1219 "r" ((unsigned int)(bh)), \ 1220 "%1" ((unsigned int)(al)), \ 1221 "rQR" ((unsigned int)(bl))) 1222 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1223 __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ 1224 : "=r" ((unsigned int)(sh)), \ 1225 "=&r" ((unsigned int)(sl)) \ 1226 : "0" ((unsigned int)(ah)), \ 1227 "r" ((unsigned int)(bh)), \ 1228 "1" ((unsigned int)(al)), \ 1229 "rQR" ((unsigned int)(bl))) 1230 #define umul_ppmm(xh, xl, m0, m1) \ 1231 do { \ 1232 union {long int __ll; \ 1233 struct {unsigned int __h, __l; } __i; \ 1234 } __xx; \ 1235 unsigned int __m0 = (m0), __m1 = (m1); \ 1236 __asm__ ("mult %S0,%H3" \ 1237 : "=r" (__xx.__i.__h), \ 1238 "=r" (__xx.__i.__l) \ 1239 : "%1" (__m0), \ 1240 "rQR" (__m1)); \ 1241 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 1242 (xh) += ((((signed int) __m0 >> 15) & __m1) \ 1243 + (((signed int) __m1 >> 15) & __m0)); \ 1244 } while (0) 1245 #endif /* __z8000__ */ 1246 1247 #endif /* __GNUC__ */ 1248 1249 /*************************************** 1250 *********** Generic Versions ******** 1251 ***************************************/ 1252 #if !defined(umul_ppmm) && defined(__umulsidi3) 1253 #define umul_ppmm(ph, pl, m0, m1) \ 1254 { \ 1255 UDWtype __ll = __umulsidi3(m0, m1); \ 1256 ph = (UWtype) (__ll >> W_TYPE_SIZE); \ 1257 pl = (UWtype) __ll; \ 1258 } 1259 #endif 1260 1261 #if !defined(__umulsidi3) 1262 #define __umulsidi3(u, v) \ 1263 ({UWtype __hi, __lo; \ 1264 umul_ppmm(__hi, __lo, u, v); \ 1265 ((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) 1266 #endif 1267 1268 /* If this machine has no inline assembler, use C macros. */ 1269 1270 #if !defined(add_ssaaaa) 1271 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1272 do { \ 1273 UWtype __x; \ 1274 __x = (al) + (bl); \ 1275 (sh) = (ah) + (bh) + (__x < (al)); \ 1276 (sl) = __x; \ 1277 } while (0) 1278 #endif 1279 1280 #if !defined(sub_ddmmss) 1281 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1282 do { \ 1283 UWtype __x; \ 1284 __x = (al) - (bl); \ 1285 (sh) = (ah) - (bh) - (__x > (al)); \ 1286 (sl) = __x; \ 1287 } while (0) 1288 #endif 1289 1290 #if !defined(umul_ppmm) 1291 #define umul_ppmm(w1, w0, u, v) \ 1292 do { \ 1293 UWtype __x0, __x1, __x2, __x3; \ 1294 UHWtype __ul, __vl, __uh, __vh; \ 1295 UWtype __u = (u), __v = (v); \ 1296 \ 1297 __ul = __ll_lowpart(__u); \ 1298 __uh = __ll_highpart(__u); \ 1299 __vl = __ll_lowpart(__v); \ 1300 __vh = __ll_highpart(__v); \ 1301 \ 1302 __x0 = (UWtype) __ul * __vl; \ 1303 __x1 = (UWtype) __ul * __vh; \ 1304 __x2 = (UWtype) __uh * __vl; \ 1305 __x3 = (UWtype) __uh * __vh; \ 1306 \ 1307 __x1 += __ll_highpart(__x0);/* this can't give carry */ \ 1308 __x1 += __x2; /* but this indeed can */ \ 1309 if (__x1 < __x2) /* did we get it? */ \ 1310 __x3 += __ll_B; /* yes, add it in the proper pos. */ \ 1311 \ 1312 (w1) = __x3 + __ll_highpart(__x1); \ 1313 (w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \ 1314 } while (0) 1315 #endif 1316 1317 #if !defined(umul_ppmm) 1318 #define smul_ppmm(w1, w0, u, v) \ 1319 do { \ 1320 UWtype __w1; \ 1321 UWtype __m0 = (u), __m1 = (v); \ 1322 umul_ppmm(__w1, w0, __m0, __m1); \ 1323 (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \ 1324 - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \ 1325 } while (0) 1326 #endif 1327 1328 /* Define this unconditionally, so it can be used for debugging. */ 1329 #define __udiv_qrnnd_c(q, r, n1, n0, d) \ 1330 do { \ 1331 UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ 1332 __d1 = __ll_highpart(d); \ 1333 __d0 = __ll_lowpart(d); \ 1334 \ 1335 __r1 = (n1) % __d1; \ 1336 __q1 = (n1) / __d1; \ 1337 __m = (UWtype) __q1 * __d0; \ 1338 __r1 = __r1 * __ll_B | __ll_highpart(n0); \ 1339 if (__r1 < __m) { \ 1340 __q1--, __r1 += (d); \ 1341 if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \ 1342 if (__r1 < __m) \ 1343 __q1--, __r1 += (d); \ 1344 } \ 1345 __r1 -= __m; \ 1346 \ 1347 __r0 = __r1 % __d1; \ 1348 __q0 = __r1 / __d1; \ 1349 __m = (UWtype) __q0 * __d0; \ 1350 __r0 = __r0 * __ll_B | __ll_lowpart(n0); \ 1351 if (__r0 < __m) { \ 1352 __q0--, __r0 += (d); \ 1353 if (__r0 >= (d)) \ 1354 if (__r0 < __m) \ 1355 __q0--, __r0 += (d); \ 1356 } \ 1357 __r0 -= __m; \ 1358 \ 1359 (q) = (UWtype) __q1 * __ll_B | __q0; \ 1360 (r) = __r0; \ 1361 } while (0) 1362 1363 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through 1364 __udiv_w_sdiv (defined in libgcc or elsewhere). */ 1365 #if !defined(udiv_qrnnd) && defined(sdiv_qrnnd) 1366 #define udiv_qrnnd(q, r, nh, nl, d) \ 1367 do { \ 1368 UWtype __r; \ 1369 (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \ 1370 (r) = __r; \ 1371 } while (0) 1372 #endif 1373 1374 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ 1375 #if !defined(udiv_qrnnd) 1376 #define UDIV_NEEDS_NORMALIZATION 1 1377 #define udiv_qrnnd __udiv_qrnnd_c 1378 #endif 1379 1380 #ifndef UDIV_NEEDS_NORMALIZATION 1381 #define UDIV_NEEDS_NORMALIZATION 0 1382 #endif 1383