1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic. 2 * Note: I added some stuff for use with gnupg 3 * 4 * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998, 5 * 2000, 2001, 2002, 2003 Free Software Foundation, Inc. 6 * 7 * This file is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU Library General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or (at your 10 * option) any later version. 11 * 12 * This file is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 15 * License for more details. 16 * 17 * You should have received a copy of the GNU Library General Public License 18 * along with this file; see the file COPYING.LIB. If not, write to 19 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, 20 * MA 02111-1307, USA. */ 21 22 #include <linux/count_zeros.h> 23 24 /* You have to define the following before including this file: 25 * 26 * UWtype -- An unsigned type, default type for operations (typically a "word") 27 * UHWtype -- An unsigned type, at least half the size of UWtype. 28 * UDWtype -- An unsigned type, at least twice as large a UWtype 29 * W_TYPE_SIZE -- size in bits of UWtype 30 * 31 * SItype, USItype -- Signed and unsigned 32 bit types. 32 * DItype, UDItype -- Signed and unsigned 64 bit types. 33 * 34 * On a 32 bit machine UWtype should typically be USItype; 35 * on a 64 bit machine, UWtype should typically be UDItype. 36 */ 37 38 #define __BITS4 (W_TYPE_SIZE / 4) 39 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) 40 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) 41 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) 42 43 /* This is used to make sure no undesirable sharing between different libraries 44 that use this file takes place. */ 45 #ifndef __MPN 46 #define __MPN(x) __##x 47 #endif 48 49 /* Define auxiliary asm macros. 50 * 51 * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two 52 * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype 53 * word product in HIGH_PROD and LOW_PROD. 54 * 55 * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a 56 * UDWtype product. This is just a variant of umul_ppmm. 57 58 * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 59 * denominator) divides a UDWtype, composed by the UWtype integers 60 * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient 61 * in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less 62 * than DENOMINATOR for correct operation. If, in addition, the most 63 * significant bit of DENOMINATOR must be 1, then the pre-processor symbol 64 * UDIV_NEEDS_NORMALIZATION is defined to 1. 65 * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 66 * denominator). Like udiv_qrnnd but the numbers are signed. The quotient 67 * is rounded towards 0. 68 * 69 * 5) count_leading_zeros(count, x) counts the number of zero-bits from the 70 * msb to the first non-zero bit in the UWtype X. This is the number of 71 * steps X needs to be shifted left to set the msb. Undefined for X == 0, 72 * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. 73 * 74 * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts 75 * from the least significant end. 76 * 77 * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, 78 * high_addend_2, low_addend_2) adds two UWtype integers, composed by 79 * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 80 * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow 81 * (i.e. carry out) is not stored anywhere, and is lost. 82 * 83 * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, 84 * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, 85 * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and 86 * LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE 87 * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, 88 * and is lost. 89 * 90 * If any of these macros are left undefined for a particular CPU, 91 * C macros are used. */ 92 93 /* The CPUs come in alphabetical order below. 94 * 95 * Please add support for more CPUs here, or improve the current support 96 * for the CPUs below! */ 97 98 #if defined(__GNUC__) && !defined(NO_ASM) 99 100 /* We sometimes need to clobber "cc" with gcc2, but that would not be 101 understood by gcc1. Use cpp to avoid major code duplication. */ 102 #if __GNUC__ < 2 103 #define __CLOBBER_CC 104 #define __AND_CLOBBER_CC 105 #else /* __GNUC__ >= 2 */ 106 #define __CLOBBER_CC : "cc" 107 #define __AND_CLOBBER_CC , "cc" 108 #endif /* __GNUC__ < 2 */ 109 110 /*************************************** 111 ************** A29K ***************** 112 ***************************************/ 113 #if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32 114 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 115 __asm__ ("add %1,%4,%5\n" \ 116 "addc %0,%2,%3" \ 117 : "=r" ((USItype)(sh)), \ 118 "=&r" ((USItype)(sl)) \ 119 : "%r" ((USItype)(ah)), \ 120 "rI" ((USItype)(bh)), \ 121 "%r" ((USItype)(al)), \ 122 "rI" ((USItype)(bl))) 123 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 124 __asm__ ("sub %1,%4,%5\n" \ 125 "subc %0,%2,%3" \ 126 : "=r" ((USItype)(sh)), \ 127 "=&r" ((USItype)(sl)) \ 128 : "r" ((USItype)(ah)), \ 129 "rI" ((USItype)(bh)), \ 130 "r" ((USItype)(al)), \ 131 "rI" ((USItype)(bl))) 132 #define umul_ppmm(xh, xl, m0, m1) \ 133 do { \ 134 USItype __m0 = (m0), __m1 = (m1); \ 135 __asm__ ("multiplu %0,%1,%2" \ 136 : "=r" ((USItype)(xl)) \ 137 : "r" (__m0), \ 138 "r" (__m1)); \ 139 __asm__ ("multmu %0,%1,%2" \ 140 : "=r" ((USItype)(xh)) \ 141 : "r" (__m0), \ 142 "r" (__m1)); \ 143 } while (0) 144 #define udiv_qrnnd(q, r, n1, n0, d) \ 145 __asm__ ("dividu %0,%3,%4" \ 146 : "=r" ((USItype)(q)), \ 147 "=q" ((USItype)(r)) \ 148 : "1" ((USItype)(n1)), \ 149 "r" ((USItype)(n0)), \ 150 "r" ((USItype)(d))) 151 #endif /* __a29k__ */ 152 153 #if defined(__alpha) && W_TYPE_SIZE == 64 154 #define umul_ppmm(ph, pl, m0, m1) \ 155 do { \ 156 UDItype __m0 = (m0), __m1 = (m1); \ 157 (ph) = __builtin_alpha_umulh(__m0, __m1); \ 158 (pl) = __m0 * __m1; \ 159 } while (0) 160 #define UMUL_TIME 46 161 #ifndef LONGLONG_STANDALONE 162 #define udiv_qrnnd(q, r, n1, n0, d) \ 163 do { UDItype __r; \ 164 (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ 165 (r) = __r; \ 166 } while (0) 167 extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype); 168 #define UDIV_TIME 220 169 #endif /* LONGLONG_STANDALONE */ 170 #endif /* __alpha */ 171 172 /*************************************** 173 ************** ARM ****************** 174 ***************************************/ 175 #if defined(__arm__) && W_TYPE_SIZE == 32 176 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 177 __asm__ ("adds %1, %4, %5\n" \ 178 "adc %0, %2, %3" \ 179 : "=r" (sh), \ 180 "=&r" (sl) \ 181 : "%r" ((USItype)(ah)), \ 182 "rI" ((USItype)(bh)), \ 183 "%r" ((USItype)(al)), \ 184 "rI" ((USItype)(bl))) 185 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 186 __asm__ ("subs %1, %4, %5\n" \ 187 "sbc %0, %2, %3" \ 188 : "=r" (sh), \ 189 "=&r" (sl) \ 190 : "r" ((USItype)(ah)), \ 191 "rI" ((USItype)(bh)), \ 192 "r" ((USItype)(al)), \ 193 "rI" ((USItype)(bl))) 194 #if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__ 195 #define umul_ppmm(xh, xl, a, b) \ 196 __asm__ ("@ Inlined umul_ppmm\n" \ 197 "mov %|r0, %2, lsr #16 @ AAAA\n" \ 198 "mov %|r2, %3, lsr #16 @ BBBB\n" \ 199 "bic %|r1, %2, %|r0, lsl #16 @ aaaa\n" \ 200 "bic %0, %3, %|r2, lsl #16 @ bbbb\n" \ 201 "mul %1, %|r1, %|r2 @ aaaa * BBBB\n" \ 202 "mul %|r2, %|r0, %|r2 @ AAAA * BBBB\n" \ 203 "mul %|r1, %0, %|r1 @ aaaa * bbbb\n" \ 204 "mul %0, %|r0, %0 @ AAAA * bbbb\n" \ 205 "adds %|r0, %1, %0 @ central sum\n" \ 206 "addcs %|r2, %|r2, #65536\n" \ 207 "adds %1, %|r1, %|r0, lsl #16\n" \ 208 "adc %0, %|r2, %|r0, lsr #16" \ 209 : "=&r" (xh), \ 210 "=r" (xl) \ 211 : "r" ((USItype)(a)), \ 212 "r" ((USItype)(b)) \ 213 : "r0", "r1", "r2") 214 #else 215 #define umul_ppmm(xh, xl, a, b) \ 216 __asm__ ("@ Inlined umul_ppmm\n" \ 217 "umull %1, %0, %2, %3" \ 218 : "=&r" (xh), \ 219 "=&r" (xl) \ 220 : "r" ((USItype)(a)), \ 221 "r" ((USItype)(b)) \ 222 : "r0", "r1") 223 #endif 224 #define UMUL_TIME 20 225 #define UDIV_TIME 100 226 #endif /* __arm__ */ 227 228 /*************************************** 229 ************** CLIPPER ************** 230 ***************************************/ 231 #if defined(__clipper__) && W_TYPE_SIZE == 32 232 #define umul_ppmm(w1, w0, u, v) \ 233 ({union {UDItype __ll; \ 234 struct {USItype __l, __h; } __i; \ 235 } __xx; \ 236 __asm__ ("mulwux %2,%0" \ 237 : "=r" (__xx.__ll) \ 238 : "%0" ((USItype)(u)), \ 239 "r" ((USItype)(v))); \ 240 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 241 #define smul_ppmm(w1, w0, u, v) \ 242 ({union {DItype __ll; \ 243 struct {SItype __l, __h; } __i; \ 244 } __xx; \ 245 __asm__ ("mulwx %2,%0" \ 246 : "=r" (__xx.__ll) \ 247 : "%0" ((SItype)(u)), \ 248 "r" ((SItype)(v))); \ 249 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 250 #define __umulsidi3(u, v) \ 251 ({UDItype __w; \ 252 __asm__ ("mulwux %2,%0" \ 253 : "=r" (__w) \ 254 : "%0" ((USItype)(u)), \ 255 "r" ((USItype)(v))); \ 256 __w; }) 257 #endif /* __clipper__ */ 258 259 /*************************************** 260 ************** GMICRO *************** 261 ***************************************/ 262 #if defined(__gmicro__) && W_TYPE_SIZE == 32 263 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 264 __asm__ ("add.w %5,%1\n" \ 265 "addx %3,%0" \ 266 : "=g" ((USItype)(sh)), \ 267 "=&g" ((USItype)(sl)) \ 268 : "%0" ((USItype)(ah)), \ 269 "g" ((USItype)(bh)), \ 270 "%1" ((USItype)(al)), \ 271 "g" ((USItype)(bl))) 272 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 273 __asm__ ("sub.w %5,%1\n" \ 274 "subx %3,%0" \ 275 : "=g" ((USItype)(sh)), \ 276 "=&g" ((USItype)(sl)) \ 277 : "0" ((USItype)(ah)), \ 278 "g" ((USItype)(bh)), \ 279 "1" ((USItype)(al)), \ 280 "g" ((USItype)(bl))) 281 #define umul_ppmm(ph, pl, m0, m1) \ 282 __asm__ ("mulx %3,%0,%1" \ 283 : "=g" ((USItype)(ph)), \ 284 "=r" ((USItype)(pl)) \ 285 : "%0" ((USItype)(m0)), \ 286 "g" ((USItype)(m1))) 287 #define udiv_qrnnd(q, r, nh, nl, d) \ 288 __asm__ ("divx %4,%0,%1" \ 289 : "=g" ((USItype)(q)), \ 290 "=r" ((USItype)(r)) \ 291 : "1" ((USItype)(nh)), \ 292 "0" ((USItype)(nl)), \ 293 "g" ((USItype)(d))) 294 #endif 295 296 /*************************************** 297 ************** HPPA ***************** 298 ***************************************/ 299 #if defined(__hppa) && W_TYPE_SIZE == 32 300 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 301 __asm__ ("add %4,%5,%1\n" \ 302 "addc %2,%3,%0" \ 303 : "=r" ((USItype)(sh)), \ 304 "=&r" ((USItype)(sl)) \ 305 : "%rM" ((USItype)(ah)), \ 306 "rM" ((USItype)(bh)), \ 307 "%rM" ((USItype)(al)), \ 308 "rM" ((USItype)(bl))) 309 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 310 __asm__ ("sub %4,%5,%1\n" \ 311 "subb %2,%3,%0" \ 312 : "=r" ((USItype)(sh)), \ 313 "=&r" ((USItype)(sl)) \ 314 : "rM" ((USItype)(ah)), \ 315 "rM" ((USItype)(bh)), \ 316 "rM" ((USItype)(al)), \ 317 "rM" ((USItype)(bl))) 318 #if 0 && defined(_PA_RISC1_1) 319 /* xmpyu uses floating point register which is not allowed in Linux kernel. */ 320 #define umul_ppmm(wh, wl, u, v) \ 321 do { \ 322 union {UDItype __ll; \ 323 struct {USItype __h, __l; } __i; \ 324 } __xx; \ 325 __asm__ ("xmpyu %1,%2,%0" \ 326 : "=*f" (__xx.__ll) \ 327 : "*f" ((USItype)(u)), \ 328 "*f" ((USItype)(v))); \ 329 (wh) = __xx.__i.__h; \ 330 (wl) = __xx.__i.__l; \ 331 } while (0) 332 #define UMUL_TIME 8 333 #define UDIV_TIME 60 334 #else 335 #define UMUL_TIME 40 336 #define UDIV_TIME 80 337 #endif 338 #if 0 /* #ifndef LONGLONG_STANDALONE */ 339 #define udiv_qrnnd(q, r, n1, n0, d) \ 340 do { USItype __r; \ 341 (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ 342 (r) = __r; \ 343 } while (0) 344 extern USItype __udiv_qrnnd(); 345 #endif /* LONGLONG_STANDALONE */ 346 #endif /* hppa */ 347 348 /*************************************** 349 ************** I370 ***************** 350 ***************************************/ 351 #if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32 352 #define umul_ppmm(xh, xl, m0, m1) \ 353 do { \ 354 union {UDItype __ll; \ 355 struct {USItype __h, __l; } __i; \ 356 } __xx; \ 357 USItype __m0 = (m0), __m1 = (m1); \ 358 __asm__ ("mr %0,%3" \ 359 : "=r" (__xx.__i.__h), \ 360 "=r" (__xx.__i.__l) \ 361 : "%1" (__m0), \ 362 "r" (__m1)); \ 363 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 364 (xh) += ((((SItype) __m0 >> 31) & __m1) \ 365 + (((SItype) __m1 >> 31) & __m0)); \ 366 } while (0) 367 #define smul_ppmm(xh, xl, m0, m1) \ 368 do { \ 369 union {DItype __ll; \ 370 struct {USItype __h, __l; } __i; \ 371 } __xx; \ 372 __asm__ ("mr %0,%3" \ 373 : "=r" (__xx.__i.__h), \ 374 "=r" (__xx.__i.__l) \ 375 : "%1" (m0), \ 376 "r" (m1)); \ 377 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 378 } while (0) 379 #define sdiv_qrnnd(q, r, n1, n0, d) \ 380 do { \ 381 union {DItype __ll; \ 382 struct {USItype __h, __l; } __i; \ 383 } __xx; \ 384 __xx.__i.__h = n1; __xx.__i.__l = n0; \ 385 __asm__ ("dr %0,%2" \ 386 : "=r" (__xx.__ll) \ 387 : "0" (__xx.__ll), "r" (d)); \ 388 (q) = __xx.__i.__l; (r) = __xx.__i.__h; \ 389 } while (0) 390 #endif 391 392 /*************************************** 393 ************** I386 ***************** 394 ***************************************/ 395 #undef __i386__ 396 #if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32 397 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 398 __asm__ ("addl %5,%1\n" \ 399 "adcl %3,%0" \ 400 : "=r" (sh), \ 401 "=&r" (sl) \ 402 : "%0" ((USItype)(ah)), \ 403 "g" ((USItype)(bh)), \ 404 "%1" ((USItype)(al)), \ 405 "g" ((USItype)(bl))) 406 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 407 __asm__ ("subl %5,%1\n" \ 408 "sbbl %3,%0" \ 409 : "=r" (sh), \ 410 "=&r" (sl) \ 411 : "0" ((USItype)(ah)), \ 412 "g" ((USItype)(bh)), \ 413 "1" ((USItype)(al)), \ 414 "g" ((USItype)(bl))) 415 #define umul_ppmm(w1, w0, u, v) \ 416 __asm__ ("mull %3" \ 417 : "=a" (w0), \ 418 "=d" (w1) \ 419 : "%0" ((USItype)(u)), \ 420 "rm" ((USItype)(v))) 421 #define udiv_qrnnd(q, r, n1, n0, d) \ 422 __asm__ ("divl %4" \ 423 : "=a" (q), \ 424 "=d" (r) \ 425 : "0" ((USItype)(n0)), \ 426 "1" ((USItype)(n1)), \ 427 "rm" ((USItype)(d))) 428 #ifndef UMUL_TIME 429 #define UMUL_TIME 40 430 #endif 431 #ifndef UDIV_TIME 432 #define UDIV_TIME 40 433 #endif 434 #endif /* 80x86 */ 435 436 /*************************************** 437 ************** I860 ***************** 438 ***************************************/ 439 #if defined(__i860__) && W_TYPE_SIZE == 32 440 #define rshift_rhlc(r, h, l, c) \ 441 __asm__ ("shr %3,r0,r0\n" \ 442 "shrd %1,%2,%0" \ 443 "=r" (r) : "r" (h), "r" (l), "rn" (c)) 444 #endif /* i860 */ 445 446 /*************************************** 447 ************** I960 ***************** 448 ***************************************/ 449 #if defined(__i960__) && W_TYPE_SIZE == 32 450 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 451 __asm__ ("cmpo 1,0\n" \ 452 "addc %5,%4,%1\n" \ 453 "addc %3,%2,%0" \ 454 : "=r" ((USItype)(sh)), \ 455 "=&r" ((USItype)(sl)) \ 456 : "%dI" ((USItype)(ah)), \ 457 "dI" ((USItype)(bh)), \ 458 "%dI" ((USItype)(al)), \ 459 "dI" ((USItype)(bl))) 460 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 461 __asm__ ("cmpo 0,0\n" \ 462 "subc %5,%4,%1\n" \ 463 "subc %3,%2,%0" \ 464 : "=r" ((USItype)(sh)), \ 465 "=&r" ((USItype)(sl)) \ 466 : "dI" ((USItype)(ah)), \ 467 "dI" ((USItype)(bh)), \ 468 "dI" ((USItype)(al)), \ 469 "dI" ((USItype)(bl))) 470 #define umul_ppmm(w1, w0, u, v) \ 471 ({union {UDItype __ll; \ 472 struct {USItype __l, __h; } __i; \ 473 } __xx; \ 474 __asm__ ("emul %2,%1,%0" \ 475 : "=d" (__xx.__ll) \ 476 : "%dI" ((USItype)(u)), \ 477 "dI" ((USItype)(v))); \ 478 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 479 #define __umulsidi3(u, v) \ 480 ({UDItype __w; \ 481 __asm__ ("emul %2,%1,%0" \ 482 : "=d" (__w) \ 483 : "%dI" ((USItype)(u)), \ 484 "dI" ((USItype)(v))); \ 485 __w; }) 486 #define udiv_qrnnd(q, r, nh, nl, d) \ 487 do { \ 488 union {UDItype __ll; \ 489 struct {USItype __l, __h; } __i; \ 490 } __nn; \ 491 __nn.__i.__h = (nh); __nn.__i.__l = (nl); \ 492 __asm__ ("ediv %d,%n,%0" \ 493 : "=d" (__rq.__ll) \ 494 : "dI" (__nn.__ll), \ 495 "dI" ((USItype)(d))); \ 496 (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ 497 } while (0) 498 #if defined(__i960mx) /* what is the proper symbol to test??? */ 499 #define rshift_rhlc(r, h, l, c) \ 500 do { \ 501 union {UDItype __ll; \ 502 struct {USItype __l, __h; } __i; \ 503 } __nn; \ 504 __nn.__i.__h = (h); __nn.__i.__l = (l); \ 505 __asm__ ("shre %2,%1,%0" \ 506 : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \ 507 } 508 #endif /* i960mx */ 509 #endif /* i960 */ 510 511 /*************************************** 512 ************** 68000 **************** 513 ***************************************/ 514 #if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32 515 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 516 __asm__ ("add%.l %5,%1\n" \ 517 "addx%.l %3,%0" \ 518 : "=d" ((USItype)(sh)), \ 519 "=&d" ((USItype)(sl)) \ 520 : "%0" ((USItype)(ah)), \ 521 "d" ((USItype)(bh)), \ 522 "%1" ((USItype)(al)), \ 523 "g" ((USItype)(bl))) 524 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 525 __asm__ ("sub%.l %5,%1\n" \ 526 "subx%.l %3,%0" \ 527 : "=d" ((USItype)(sh)), \ 528 "=&d" ((USItype)(sl)) \ 529 : "0" ((USItype)(ah)), \ 530 "d" ((USItype)(bh)), \ 531 "1" ((USItype)(al)), \ 532 "g" ((USItype)(bl))) 533 #if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) 534 #define umul_ppmm(w1, w0, u, v) \ 535 __asm__ ("mulu%.l %3,%1:%0" \ 536 : "=d" ((USItype)(w0)), \ 537 "=d" ((USItype)(w1)) \ 538 : "%0" ((USItype)(u)), \ 539 "dmi" ((USItype)(v))) 540 #define UMUL_TIME 45 541 #define udiv_qrnnd(q, r, n1, n0, d) \ 542 __asm__ ("divu%.l %4,%1:%0" \ 543 : "=d" ((USItype)(q)), \ 544 "=d" ((USItype)(r)) \ 545 : "0" ((USItype)(n0)), \ 546 "1" ((USItype)(n1)), \ 547 "dmi" ((USItype)(d))) 548 #define UDIV_TIME 90 549 #define sdiv_qrnnd(q, r, n1, n0, d) \ 550 __asm__ ("divs%.l %4,%1:%0" \ 551 : "=d" ((USItype)(q)), \ 552 "=d" ((USItype)(r)) \ 553 : "0" ((USItype)(n0)), \ 554 "1" ((USItype)(n1)), \ 555 "dmi" ((USItype)(d))) 556 #else /* not mc68020 */ 557 #define umul_ppmm(xh, xl, a, b) \ 558 do { USItype __umul_tmp1, __umul_tmp2; \ 559 __asm__ ("| Inlined umul_ppmm\n" \ 560 "move%.l %5,%3\n" \ 561 "move%.l %2,%0\n" \ 562 "move%.w %3,%1\n" \ 563 "swap %3\n" \ 564 "swap %0\n" \ 565 "mulu %2,%1\n" \ 566 "mulu %3,%0\n" \ 567 "mulu %2,%3\n" \ 568 "swap %2\n" \ 569 "mulu %5,%2\n" \ 570 "add%.l %3,%2\n" \ 571 "jcc 1f\n" \ 572 "add%.l %#0x10000,%0\n" \ 573 "1: move%.l %2,%3\n" \ 574 "clr%.w %2\n" \ 575 "swap %2\n" \ 576 "swap %3\n" \ 577 "clr%.w %3\n" \ 578 "add%.l %3,%1\n" \ 579 "addx%.l %2,%0\n" \ 580 "| End inlined umul_ppmm" \ 581 : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ 582 "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ 583 : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ 584 } while (0) 585 #define UMUL_TIME 100 586 #define UDIV_TIME 400 587 #endif /* not mc68020 */ 588 #endif /* mc68000 */ 589 590 /*************************************** 591 ************** 88000 **************** 592 ***************************************/ 593 #if defined(__m88000__) && W_TYPE_SIZE == 32 594 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 595 __asm__ ("addu.co %1,%r4,%r5\n" \ 596 "addu.ci %0,%r2,%r3" \ 597 : "=r" ((USItype)(sh)), \ 598 "=&r" ((USItype)(sl)) \ 599 : "%rJ" ((USItype)(ah)), \ 600 "rJ" ((USItype)(bh)), \ 601 "%rJ" ((USItype)(al)), \ 602 "rJ" ((USItype)(bl))) 603 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 604 __asm__ ("subu.co %1,%r4,%r5\n" \ 605 "subu.ci %0,%r2,%r3" \ 606 : "=r" ((USItype)(sh)), \ 607 "=&r" ((USItype)(sl)) \ 608 : "rJ" ((USItype)(ah)), \ 609 "rJ" ((USItype)(bh)), \ 610 "rJ" ((USItype)(al)), \ 611 "rJ" ((USItype)(bl))) 612 #if defined(__m88110__) 613 #define umul_ppmm(wh, wl, u, v) \ 614 do { \ 615 union {UDItype __ll; \ 616 struct {USItype __h, __l; } __i; \ 617 } __x; \ 618 __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \ 619 (wh) = __x.__i.__h; \ 620 (wl) = __x.__i.__l; \ 621 } while (0) 622 #define udiv_qrnnd(q, r, n1, n0, d) \ 623 ({union {UDItype __ll; \ 624 struct {USItype __h, __l; } __i; \ 625 } __x, __q; \ 626 __x.__i.__h = (n1); __x.__i.__l = (n0); \ 627 __asm__ ("divu.d %0,%1,%2" \ 628 : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ 629 (r) = (n0) - __q.__l * (d); (q) = __q.__l; }) 630 #define UMUL_TIME 5 631 #define UDIV_TIME 25 632 #else 633 #define UMUL_TIME 17 634 #define UDIV_TIME 150 635 #endif /* __m88110__ */ 636 #endif /* __m88000__ */ 637 638 /*************************************** 639 ************** MIPS ***************** 640 ***************************************/ 641 #if defined(__mips__) && W_TYPE_SIZE == 32 642 #define umul_ppmm(w1, w0, u, v) \ 643 do { \ 644 UDItype __ll = (UDItype)(u) * (v); \ 645 w1 = __ll >> 32; \ 646 w0 = __ll; \ 647 } while (0) 648 #define UMUL_TIME 10 649 #define UDIV_TIME 100 650 #endif /* __mips__ */ 651 652 /*************************************** 653 ************** MIPS/64 ************** 654 ***************************************/ 655 #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 656 #if defined(__mips_isa_rev) && __mips_isa_rev >= 6 657 /* 658 * GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C 659 * code below, so we special case MIPS64r6 until the compiler can do better. 660 */ 661 #define umul_ppmm(w1, w0, u, v) \ 662 do { \ 663 __asm__ ("dmulu %0,%1,%2" \ 664 : "=d" ((UDItype)(w0)) \ 665 : "d" ((UDItype)(u)), \ 666 "d" ((UDItype)(v))); \ 667 __asm__ ("dmuhu %0,%1,%2" \ 668 : "=d" ((UDItype)(w1)) \ 669 : "d" ((UDItype)(u)), \ 670 "d" ((UDItype)(v))); \ 671 } while (0) 672 #else 673 #define umul_ppmm(w1, w0, u, v) \ 674 do { \ 675 typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ 676 __ll_UTItype __ll = (__ll_UTItype)(u) * (v); \ 677 w1 = __ll >> 64; \ 678 w0 = __ll; \ 679 } while (0) 680 #endif 681 #define UMUL_TIME 20 682 #define UDIV_TIME 140 683 #endif /* __mips__ */ 684 685 /*************************************** 686 ************** 32000 **************** 687 ***************************************/ 688 #if defined(__ns32000__) && W_TYPE_SIZE == 32 689 #define umul_ppmm(w1, w0, u, v) \ 690 ({union {UDItype __ll; \ 691 struct {USItype __l, __h; } __i; \ 692 } __xx; \ 693 __asm__ ("meid %2,%0" \ 694 : "=g" (__xx.__ll) \ 695 : "%0" ((USItype)(u)), \ 696 "g" ((USItype)(v))); \ 697 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 698 #define __umulsidi3(u, v) \ 699 ({UDItype __w; \ 700 __asm__ ("meid %2,%0" \ 701 : "=g" (__w) \ 702 : "%0" ((USItype)(u)), \ 703 "g" ((USItype)(v))); \ 704 __w; }) 705 #define udiv_qrnnd(q, r, n1, n0, d) \ 706 ({union {UDItype __ll; \ 707 struct {USItype __l, __h; } __i; \ 708 } __xx; \ 709 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ 710 __asm__ ("deid %2,%0" \ 711 : "=g" (__xx.__ll) \ 712 : "0" (__xx.__ll), \ 713 "g" ((USItype)(d))); \ 714 (r) = __xx.__i.__l; (q) = __xx.__i.__h; }) 715 #endif /* __ns32000__ */ 716 717 /*************************************** 718 ************** PPC ****************** 719 ***************************************/ 720 #if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32 721 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 722 do { \ 723 if (__builtin_constant_p(bh) && (bh) == 0) \ 724 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ 725 : "=r" ((USItype)(sh)), \ 726 "=&r" ((USItype)(sl)) \ 727 : "%r" ((USItype)(ah)), \ 728 "%r" ((USItype)(al)), \ 729 "rI" ((USItype)(bl))); \ 730 else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ 731 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ 732 : "=r" ((USItype)(sh)), \ 733 "=&r" ((USItype)(sl)) \ 734 : "%r" ((USItype)(ah)), \ 735 "%r" ((USItype)(al)), \ 736 "rI" ((USItype)(bl))); \ 737 else \ 738 __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ 739 : "=r" ((USItype)(sh)), \ 740 "=&r" ((USItype)(sl)) \ 741 : "%r" ((USItype)(ah)), \ 742 "r" ((USItype)(bh)), \ 743 "%r" ((USItype)(al)), \ 744 "rI" ((USItype)(bl))); \ 745 } while (0) 746 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 747 do { \ 748 if (__builtin_constant_p(ah) && (ah) == 0) \ 749 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ 750 : "=r" ((USItype)(sh)), \ 751 "=&r" ((USItype)(sl)) \ 752 : "r" ((USItype)(bh)), \ 753 "rI" ((USItype)(al)), \ 754 "r" ((USItype)(bl))); \ 755 else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \ 756 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ 757 : "=r" ((USItype)(sh)), \ 758 "=&r" ((USItype)(sl)) \ 759 : "r" ((USItype)(bh)), \ 760 "rI" ((USItype)(al)), \ 761 "r" ((USItype)(bl))); \ 762 else if (__builtin_constant_p(bh) && (bh) == 0) \ 763 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ 764 : "=r" ((USItype)(sh)), \ 765 "=&r" ((USItype)(sl)) \ 766 : "r" ((USItype)(ah)), \ 767 "rI" ((USItype)(al)), \ 768 "r" ((USItype)(bl))); \ 769 else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ 770 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ 771 : "=r" ((USItype)(sh)), \ 772 "=&r" ((USItype)(sl)) \ 773 : "r" ((USItype)(ah)), \ 774 "rI" ((USItype)(al)), \ 775 "r" ((USItype)(bl))); \ 776 else \ 777 __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ 778 : "=r" ((USItype)(sh)), \ 779 "=&r" ((USItype)(sl)) \ 780 : "r" ((USItype)(ah)), \ 781 "r" ((USItype)(bh)), \ 782 "rI" ((USItype)(al)), \ 783 "r" ((USItype)(bl))); \ 784 } while (0) 785 #if defined(_ARCH_PPC) 786 #define umul_ppmm(ph, pl, m0, m1) \ 787 do { \ 788 USItype __m0 = (m0), __m1 = (m1); \ 789 __asm__ ("mulhwu %0,%1,%2" \ 790 : "=r" ((USItype) ph) \ 791 : "%r" (__m0), \ 792 "r" (__m1)); \ 793 (pl) = __m0 * __m1; \ 794 } while (0) 795 #define UMUL_TIME 15 796 #define smul_ppmm(ph, pl, m0, m1) \ 797 do { \ 798 SItype __m0 = (m0), __m1 = (m1); \ 799 __asm__ ("mulhw %0,%1,%2" \ 800 : "=r" ((SItype) ph) \ 801 : "%r" (__m0), \ 802 "r" (__m1)); \ 803 (pl) = __m0 * __m1; \ 804 } while (0) 805 #define SMUL_TIME 14 806 #define UDIV_TIME 120 807 #else 808 #define umul_ppmm(xh, xl, m0, m1) \ 809 do { \ 810 USItype __m0 = (m0), __m1 = (m1); \ 811 __asm__ ("mul %0,%2,%3" \ 812 : "=r" ((USItype)(xh)), \ 813 "=q" ((USItype)(xl)) \ 814 : "r" (__m0), \ 815 "r" (__m1)); \ 816 (xh) += ((((SItype) __m0 >> 31) & __m1) \ 817 + (((SItype) __m1 >> 31) & __m0)); \ 818 } while (0) 819 #define UMUL_TIME 8 820 #define smul_ppmm(xh, xl, m0, m1) \ 821 __asm__ ("mul %0,%2,%3" \ 822 : "=r" ((SItype)(xh)), \ 823 "=q" ((SItype)(xl)) \ 824 : "r" (m0), \ 825 "r" (m1)) 826 #define SMUL_TIME 4 827 #define sdiv_qrnnd(q, r, nh, nl, d) \ 828 __asm__ ("div %0,%2,%4" \ 829 : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \ 830 : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d))) 831 #define UDIV_TIME 100 832 #endif 833 #endif /* Power architecture variants. */ 834 835 /*************************************** 836 ************** PYR ****************** 837 ***************************************/ 838 #if defined(__pyr__) && W_TYPE_SIZE == 32 839 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 840 __asm__ ("addw %5,%1\n" \ 841 "addwc %3,%0" \ 842 : "=r" ((USItype)(sh)), \ 843 "=&r" ((USItype)(sl)) \ 844 : "%0" ((USItype)(ah)), \ 845 "g" ((USItype)(bh)), \ 846 "%1" ((USItype)(al)), \ 847 "g" ((USItype)(bl))) 848 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 849 __asm__ ("subw %5,%1\n" \ 850 "subwb %3,%0" \ 851 : "=r" ((USItype)(sh)), \ 852 "=&r" ((USItype)(sl)) \ 853 : "0" ((USItype)(ah)), \ 854 "g" ((USItype)(bh)), \ 855 "1" ((USItype)(al)), \ 856 "g" ((USItype)(bl))) 857 /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ 858 #define umul_ppmm(w1, w0, u, v) \ 859 ({union {UDItype __ll; \ 860 struct {USItype __h, __l; } __i; \ 861 } __xx; \ 862 __asm__ ("movw %1,%R0\n" \ 863 "uemul %2,%0" \ 864 : "=&r" (__xx.__ll) \ 865 : "g" ((USItype) (u)), \ 866 "g" ((USItype)(v))); \ 867 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 868 #endif /* __pyr__ */ 869 870 /*************************************** 871 ************** RT/ROMP ************** 872 ***************************************/ 873 #if defined(__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 874 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 875 __asm__ ("a %1,%5\n" \ 876 "ae %0,%3" \ 877 : "=r" ((USItype)(sh)), \ 878 "=&r" ((USItype)(sl)) \ 879 : "%0" ((USItype)(ah)), \ 880 "r" ((USItype)(bh)), \ 881 "%1" ((USItype)(al)), \ 882 "r" ((USItype)(bl))) 883 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 884 __asm__ ("s %1,%5\n" \ 885 "se %0,%3" \ 886 : "=r" ((USItype)(sh)), \ 887 "=&r" ((USItype)(sl)) \ 888 : "0" ((USItype)(ah)), \ 889 "r" ((USItype)(bh)), \ 890 "1" ((USItype)(al)), \ 891 "r" ((USItype)(bl))) 892 #define umul_ppmm(ph, pl, m0, m1) \ 893 do { \ 894 USItype __m0 = (m0), __m1 = (m1); \ 895 __asm__ ( \ 896 "s r2,r2\n" \ 897 "mts r10,%2\n" \ 898 "m r2,%3\n" \ 899 "m r2,%3\n" \ 900 "m r2,%3\n" \ 901 "m r2,%3\n" \ 902 "m r2,%3\n" \ 903 "m r2,%3\n" \ 904 "m r2,%3\n" \ 905 "m r2,%3\n" \ 906 "m r2,%3\n" \ 907 "m r2,%3\n" \ 908 "m r2,%3\n" \ 909 "m r2,%3\n" \ 910 "m r2,%3\n" \ 911 "m r2,%3\n" \ 912 "m r2,%3\n" \ 913 "m r2,%3\n" \ 914 "cas %0,r2,r0\n" \ 915 "mfs r10,%1" \ 916 : "=r" ((USItype)(ph)), \ 917 "=r" ((USItype)(pl)) \ 918 : "%r" (__m0), \ 919 "r" (__m1) \ 920 : "r2"); \ 921 (ph) += ((((SItype) __m0 >> 31) & __m1) \ 922 + (((SItype) __m1 >> 31) & __m0)); \ 923 } while (0) 924 #define UMUL_TIME 20 925 #define UDIV_TIME 200 926 #endif /* RT/ROMP */ 927 928 /*************************************** 929 ************** SH2 ****************** 930 ***************************************/ 931 #if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \ 932 && W_TYPE_SIZE == 32 933 #define umul_ppmm(w1, w0, u, v) \ 934 __asm__ ( \ 935 "dmulu.l %2,%3\n" \ 936 "sts macl,%1\n" \ 937 "sts mach,%0" \ 938 : "=r" ((USItype)(w1)), \ 939 "=r" ((USItype)(w0)) \ 940 : "r" ((USItype)(u)), \ 941 "r" ((USItype)(v)) \ 942 : "macl", "mach") 943 #define UMUL_TIME 5 944 #endif 945 946 /*************************************** 947 ************** SPARC **************** 948 ***************************************/ 949 #if defined(__sparc__) && W_TYPE_SIZE == 32 950 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 951 __asm__ ("addcc %r4,%5,%1\n" \ 952 "addx %r2,%3,%0" \ 953 : "=r" ((USItype)(sh)), \ 954 "=&r" ((USItype)(sl)) \ 955 : "%rJ" ((USItype)(ah)), \ 956 "rI" ((USItype)(bh)), \ 957 "%rJ" ((USItype)(al)), \ 958 "rI" ((USItype)(bl)) \ 959 __CLOBBER_CC) 960 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 961 __asm__ ("subcc %r4,%5,%1\n" \ 962 "subx %r2,%3,%0" \ 963 : "=r" ((USItype)(sh)), \ 964 "=&r" ((USItype)(sl)) \ 965 : "rJ" ((USItype)(ah)), \ 966 "rI" ((USItype)(bh)), \ 967 "rJ" ((USItype)(al)), \ 968 "rI" ((USItype)(bl)) \ 969 __CLOBBER_CC) 970 #if defined(__sparc_v8__) 971 /* Don't match immediate range because, 1) it is not often useful, 972 2) the 'I' flag thinks of the range as a 13 bit signed interval, 973 while we want to match a 13 bit interval, sign extended to 32 bits, 974 but INTERPRETED AS UNSIGNED. */ 975 #define umul_ppmm(w1, w0, u, v) \ 976 __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 977 : "=r" ((USItype)(w1)), \ 978 "=r" ((USItype)(w0)) \ 979 : "r" ((USItype)(u)), \ 980 "r" ((USItype)(v))) 981 #define UMUL_TIME 5 982 #ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */ 983 #define udiv_qrnnd(q, r, n1, n0, d) \ 984 do { \ 985 USItype __q; \ 986 __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ 987 : "=r" ((USItype)(__q)) \ 988 : "r" ((USItype)(n1)), \ 989 "r" ((USItype)(n0)), \ 990 "r" ((USItype)(d))); \ 991 (r) = (n0) - __q * (d); \ 992 (q) = __q; \ 993 } while (0) 994 #define UDIV_TIME 25 995 #endif /* SUPERSPARC */ 996 #else /* ! __sparc_v8__ */ 997 #if defined(__sparclite__) 998 /* This has hardware multiply but not divide. It also has two additional 999 instructions scan (ffs from high bit) and divscc. */ 1000 #define umul_ppmm(w1, w0, u, v) \ 1001 __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 1002 : "=r" ((USItype)(w1)), \ 1003 "=r" ((USItype)(w0)) \ 1004 : "r" ((USItype)(u)), \ 1005 "r" ((USItype)(v))) 1006 #define UMUL_TIME 5 1007 #define udiv_qrnnd(q, r, n1, n0, d) \ 1008 __asm__ ("! Inlined udiv_qrnnd\n" \ 1009 "wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ 1010 "tst %%g0\n" \ 1011 "divscc %3,%4,%%g1\n" \ 1012 "divscc %%g1,%4,%%g1\n" \ 1013 "divscc %%g1,%4,%%g1\n" \ 1014 "divscc %%g1,%4,%%g1\n" \ 1015 "divscc %%g1,%4,%%g1\n" \ 1016 "divscc %%g1,%4,%%g1\n" \ 1017 "divscc %%g1,%4,%%g1\n" \ 1018 "divscc %%g1,%4,%%g1\n" \ 1019 "divscc %%g1,%4,%%g1\n" \ 1020 "divscc %%g1,%4,%%g1\n" \ 1021 "divscc %%g1,%4,%%g1\n" \ 1022 "divscc %%g1,%4,%%g1\n" \ 1023 "divscc %%g1,%4,%%g1\n" \ 1024 "divscc %%g1,%4,%%g1\n" \ 1025 "divscc %%g1,%4,%%g1\n" \ 1026 "divscc %%g1,%4,%%g1\n" \ 1027 "divscc %%g1,%4,%%g1\n" \ 1028 "divscc %%g1,%4,%%g1\n" \ 1029 "divscc %%g1,%4,%%g1\n" \ 1030 "divscc %%g1,%4,%%g1\n" \ 1031 "divscc %%g1,%4,%%g1\n" \ 1032 "divscc %%g1,%4,%%g1\n" \ 1033 "divscc %%g1,%4,%%g1\n" \ 1034 "divscc %%g1,%4,%%g1\n" \ 1035 "divscc %%g1,%4,%%g1\n" \ 1036 "divscc %%g1,%4,%%g1\n" \ 1037 "divscc %%g1,%4,%%g1\n" \ 1038 "divscc %%g1,%4,%%g1\n" \ 1039 "divscc %%g1,%4,%%g1\n" \ 1040 "divscc %%g1,%4,%%g1\n" \ 1041 "divscc %%g1,%4,%%g1\n" \ 1042 "divscc %%g1,%4,%0\n" \ 1043 "rd %%y,%1\n" \ 1044 "bl,a 1f\n" \ 1045 "add %1,%4,%1\n" \ 1046 "1: ! End of inline udiv_qrnnd" \ 1047 : "=r" ((USItype)(q)), \ 1048 "=r" ((USItype)(r)) \ 1049 : "r" ((USItype)(n1)), \ 1050 "r" ((USItype)(n0)), \ 1051 "rI" ((USItype)(d)) \ 1052 : "%g1" __AND_CLOBBER_CC) 1053 #define UDIV_TIME 37 1054 #endif /* __sparclite__ */ 1055 #endif /* __sparc_v8__ */ 1056 /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ 1057 #ifndef umul_ppmm 1058 #define umul_ppmm(w1, w0, u, v) \ 1059 __asm__ ("! Inlined umul_ppmm\n" \ 1060 "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \ 1061 "sra %3,31,%%g2 ! Don't move this insn\n" \ 1062 "and %2,%%g2,%%g2 ! Don't move this insn\n" \ 1063 "andcc %%g0,0,%%g1 ! Don't move this insn\n" \ 1064 "mulscc %%g1,%3,%%g1\n" \ 1065 "mulscc %%g1,%3,%%g1\n" \ 1066 "mulscc %%g1,%3,%%g1\n" \ 1067 "mulscc %%g1,%3,%%g1\n" \ 1068 "mulscc %%g1,%3,%%g1\n" \ 1069 "mulscc %%g1,%3,%%g1\n" \ 1070 "mulscc %%g1,%3,%%g1\n" \ 1071 "mulscc %%g1,%3,%%g1\n" \ 1072 "mulscc %%g1,%3,%%g1\n" \ 1073 "mulscc %%g1,%3,%%g1\n" \ 1074 "mulscc %%g1,%3,%%g1\n" \ 1075 "mulscc %%g1,%3,%%g1\n" \ 1076 "mulscc %%g1,%3,%%g1\n" \ 1077 "mulscc %%g1,%3,%%g1\n" \ 1078 "mulscc %%g1,%3,%%g1\n" \ 1079 "mulscc %%g1,%3,%%g1\n" \ 1080 "mulscc %%g1,%3,%%g1\n" \ 1081 "mulscc %%g1,%3,%%g1\n" \ 1082 "mulscc %%g1,%3,%%g1\n" \ 1083 "mulscc %%g1,%3,%%g1\n" \ 1084 "mulscc %%g1,%3,%%g1\n" \ 1085 "mulscc %%g1,%3,%%g1\n" \ 1086 "mulscc %%g1,%3,%%g1\n" \ 1087 "mulscc %%g1,%3,%%g1\n" \ 1088 "mulscc %%g1,%3,%%g1\n" \ 1089 "mulscc %%g1,%3,%%g1\n" \ 1090 "mulscc %%g1,%3,%%g1\n" \ 1091 "mulscc %%g1,%3,%%g1\n" \ 1092 "mulscc %%g1,%3,%%g1\n" \ 1093 "mulscc %%g1,%3,%%g1\n" \ 1094 "mulscc %%g1,%3,%%g1\n" \ 1095 "mulscc %%g1,%3,%%g1\n" \ 1096 "mulscc %%g1,0,%%g1\n" \ 1097 "add %%g1,%%g2,%0\n" \ 1098 "rd %%y,%1" \ 1099 : "=r" ((USItype)(w1)), \ 1100 "=r" ((USItype)(w0)) \ 1101 : "%rI" ((USItype)(u)), \ 1102 "r" ((USItype)(v)) \ 1103 : "%g1", "%g2" __AND_CLOBBER_CC) 1104 #define UMUL_TIME 39 /* 39 instructions */ 1105 /* It's quite necessary to add this much assembler for the sparc. 1106 The default udiv_qrnnd (in C) is more than 10 times slower! */ 1107 #define udiv_qrnnd(q, r, n1, n0, d) \ 1108 __asm__ ("! Inlined udiv_qrnnd\n\t" \ 1109 "mov 32,%%g1\n\t" \ 1110 "subcc %1,%2,%%g0\n\t" \ 1111 "1: bcs 5f\n\t" \ 1112 "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ 1113 "sub %1,%2,%1 ! this kills msb of n\n\t" \ 1114 "addx %1,%1,%1 ! so this can't give carry\n\t" \ 1115 "subcc %%g1,1,%%g1\n\t" \ 1116 "2: bne 1b\n\t" \ 1117 "subcc %1,%2,%%g0\n\t" \ 1118 "bcs 3f\n\t" \ 1119 "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ 1120 "b 3f\n\t" \ 1121 "sub %1,%2,%1 ! this kills msb of n\n\t" \ 1122 "4: sub %1,%2,%1\n\t" \ 1123 "5: addxcc %1,%1,%1\n\t" \ 1124 "bcc 2b\n\t" \ 1125 "subcc %%g1,1,%%g1\n\t" \ 1126 "! Got carry from n. Subtract next step to cancel this carry.\n\t" \ 1127 "bne 4b\n\t" \ 1128 "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \ 1129 "sub %1,%2,%1\n\t" \ 1130 "3: xnor %0,0,%0\n\t" \ 1131 "! End of inline udiv_qrnnd\n" \ 1132 : "=&r" ((USItype)(q)), \ 1133 "=&r" ((USItype)(r)) \ 1134 : "r" ((USItype)(d)), \ 1135 "1" ((USItype)(n1)), \ 1136 "0" ((USItype)(n0)) : "%g1", "cc") 1137 #define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */ 1138 #endif 1139 #endif /* __sparc__ */ 1140 1141 /*************************************** 1142 ************** VAX ****************** 1143 ***************************************/ 1144 #if defined(__vax__) && W_TYPE_SIZE == 32 1145 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1146 __asm__ ("addl2 %5,%1\n" \ 1147 "adwc %3,%0" \ 1148 : "=g" ((USItype)(sh)), \ 1149 "=&g" ((USItype)(sl)) \ 1150 : "%0" ((USItype)(ah)), \ 1151 "g" ((USItype)(bh)), \ 1152 "%1" ((USItype)(al)), \ 1153 "g" ((USItype)(bl))) 1154 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1155 __asm__ ("subl2 %5,%1\n" \ 1156 "sbwc %3,%0" \ 1157 : "=g" ((USItype)(sh)), \ 1158 "=&g" ((USItype)(sl)) \ 1159 : "0" ((USItype)(ah)), \ 1160 "g" ((USItype)(bh)), \ 1161 "1" ((USItype)(al)), \ 1162 "g" ((USItype)(bl))) 1163 #define umul_ppmm(xh, xl, m0, m1) \ 1164 do { \ 1165 union {UDItype __ll; \ 1166 struct {USItype __l, __h; } __i; \ 1167 } __xx; \ 1168 USItype __m0 = (m0), __m1 = (m1); \ 1169 __asm__ ("emul %1,%2,$0,%0" \ 1170 : "=g" (__xx.__ll) \ 1171 : "g" (__m0), \ 1172 "g" (__m1)); \ 1173 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 1174 (xh) += ((((SItype) __m0 >> 31) & __m1) \ 1175 + (((SItype) __m1 >> 31) & __m0)); \ 1176 } while (0) 1177 #define sdiv_qrnnd(q, r, n1, n0, d) \ 1178 do { \ 1179 union {DItype __ll; \ 1180 struct {SItype __l, __h; } __i; \ 1181 } __xx; \ 1182 __xx.__i.__h = n1; __xx.__i.__l = n0; \ 1183 __asm__ ("ediv %3,%2,%0,%1" \ 1184 : "=g" (q), "=g" (r) \ 1185 : "g" (__xx.__ll), "g" (d)); \ 1186 } while (0) 1187 #endif /* __vax__ */ 1188 1189 /*************************************** 1190 ************** Z8000 **************** 1191 ***************************************/ 1192 #if defined(__z8000__) && W_TYPE_SIZE == 16 1193 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1194 __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ 1195 : "=r" ((unsigned int)(sh)), \ 1196 "=&r" ((unsigned int)(sl)) \ 1197 : "%0" ((unsigned int)(ah)), \ 1198 "r" ((unsigned int)(bh)), \ 1199 "%1" ((unsigned int)(al)), \ 1200 "rQR" ((unsigned int)(bl))) 1201 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1202 __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ 1203 : "=r" ((unsigned int)(sh)), \ 1204 "=&r" ((unsigned int)(sl)) \ 1205 : "0" ((unsigned int)(ah)), \ 1206 "r" ((unsigned int)(bh)), \ 1207 "1" ((unsigned int)(al)), \ 1208 "rQR" ((unsigned int)(bl))) 1209 #define umul_ppmm(xh, xl, m0, m1) \ 1210 do { \ 1211 union {long int __ll; \ 1212 struct {unsigned int __h, __l; } __i; \ 1213 } __xx; \ 1214 unsigned int __m0 = (m0), __m1 = (m1); \ 1215 __asm__ ("mult %S0,%H3" \ 1216 : "=r" (__xx.__i.__h), \ 1217 "=r" (__xx.__i.__l) \ 1218 : "%1" (__m0), \ 1219 "rQR" (__m1)); \ 1220 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 1221 (xh) += ((((signed int) __m0 >> 15) & __m1) \ 1222 + (((signed int) __m1 >> 15) & __m0)); \ 1223 } while (0) 1224 #endif /* __z8000__ */ 1225 1226 #endif /* __GNUC__ */ 1227 1228 /*************************************** 1229 *********** Generic Versions ******** 1230 ***************************************/ 1231 #if !defined(umul_ppmm) && defined(__umulsidi3) 1232 #define umul_ppmm(ph, pl, m0, m1) \ 1233 { \ 1234 UDWtype __ll = __umulsidi3(m0, m1); \ 1235 ph = (UWtype) (__ll >> W_TYPE_SIZE); \ 1236 pl = (UWtype) __ll; \ 1237 } 1238 #endif 1239 1240 #if !defined(__umulsidi3) 1241 #define __umulsidi3(u, v) \ 1242 ({UWtype __hi, __lo; \ 1243 umul_ppmm(__hi, __lo, u, v); \ 1244 ((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) 1245 #endif 1246 1247 /* If this machine has no inline assembler, use C macros. */ 1248 1249 #if !defined(add_ssaaaa) 1250 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1251 do { \ 1252 UWtype __x; \ 1253 __x = (al) + (bl); \ 1254 (sh) = (ah) + (bh) + (__x < (al)); \ 1255 (sl) = __x; \ 1256 } while (0) 1257 #endif 1258 1259 #if !defined(sub_ddmmss) 1260 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1261 do { \ 1262 UWtype __x; \ 1263 __x = (al) - (bl); \ 1264 (sh) = (ah) - (bh) - (__x > (al)); \ 1265 (sl) = __x; \ 1266 } while (0) 1267 #endif 1268 1269 #if !defined(umul_ppmm) 1270 #define umul_ppmm(w1, w0, u, v) \ 1271 do { \ 1272 UWtype __x0, __x1, __x2, __x3; \ 1273 UHWtype __ul, __vl, __uh, __vh; \ 1274 UWtype __u = (u), __v = (v); \ 1275 \ 1276 __ul = __ll_lowpart(__u); \ 1277 __uh = __ll_highpart(__u); \ 1278 __vl = __ll_lowpart(__v); \ 1279 __vh = __ll_highpart(__v); \ 1280 \ 1281 __x0 = (UWtype) __ul * __vl; \ 1282 __x1 = (UWtype) __ul * __vh; \ 1283 __x2 = (UWtype) __uh * __vl; \ 1284 __x3 = (UWtype) __uh * __vh; \ 1285 \ 1286 __x1 += __ll_highpart(__x0);/* this can't give carry */ \ 1287 __x1 += __x2; /* but this indeed can */ \ 1288 if (__x1 < __x2) /* did we get it? */ \ 1289 __x3 += __ll_B; /* yes, add it in the proper pos. */ \ 1290 \ 1291 (w1) = __x3 + __ll_highpart(__x1); \ 1292 (w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \ 1293 } while (0) 1294 #endif 1295 1296 #if !defined(umul_ppmm) 1297 #define smul_ppmm(w1, w0, u, v) \ 1298 do { \ 1299 UWtype __w1; \ 1300 UWtype __m0 = (u), __m1 = (v); \ 1301 umul_ppmm(__w1, w0, __m0, __m1); \ 1302 (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \ 1303 - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \ 1304 } while (0) 1305 #endif 1306 1307 /* Define this unconditionally, so it can be used for debugging. */ 1308 #define __udiv_qrnnd_c(q, r, n1, n0, d) \ 1309 do { \ 1310 UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ 1311 __d1 = __ll_highpart(d); \ 1312 __d0 = __ll_lowpart(d); \ 1313 \ 1314 __r1 = (n1) % __d1; \ 1315 __q1 = (n1) / __d1; \ 1316 __m = (UWtype) __q1 * __d0; \ 1317 __r1 = __r1 * __ll_B | __ll_highpart(n0); \ 1318 if (__r1 < __m) { \ 1319 __q1--, __r1 += (d); \ 1320 if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \ 1321 if (__r1 < __m) \ 1322 __q1--, __r1 += (d); \ 1323 } \ 1324 __r1 -= __m; \ 1325 \ 1326 __r0 = __r1 % __d1; \ 1327 __q0 = __r1 / __d1; \ 1328 __m = (UWtype) __q0 * __d0; \ 1329 __r0 = __r0 * __ll_B | __ll_lowpart(n0); \ 1330 if (__r0 < __m) { \ 1331 __q0--, __r0 += (d); \ 1332 if (__r0 >= (d)) \ 1333 if (__r0 < __m) \ 1334 __q0--, __r0 += (d); \ 1335 } \ 1336 __r0 -= __m; \ 1337 \ 1338 (q) = (UWtype) __q1 * __ll_B | __q0; \ 1339 (r) = __r0; \ 1340 } while (0) 1341 1342 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through 1343 __udiv_w_sdiv (defined in libgcc or elsewhere). */ 1344 #if !defined(udiv_qrnnd) && defined(sdiv_qrnnd) 1345 #define udiv_qrnnd(q, r, nh, nl, d) \ 1346 do { \ 1347 UWtype __r; \ 1348 (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \ 1349 (r) = __r; \ 1350 } while (0) 1351 #endif 1352 1353 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ 1354 #if !defined(udiv_qrnnd) 1355 #define UDIV_NEEDS_NORMALIZATION 1 1356 #define udiv_qrnnd __udiv_qrnnd_c 1357 #endif 1358 1359 #ifndef UDIV_NEEDS_NORMALIZATION 1360 #define UDIV_NEEDS_NORMALIZATION 0 1361 #endif 1362