1 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic. 2 * Note: I added some stuff for use with gnupg 3 * 4 * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998, 5 * 2000, 2001, 2002, 2003 Free Software Foundation, Inc. 6 * 7 * This file is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU Library General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or (at your 10 * option) any later version. 11 * 12 * This file is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 15 * License for more details. 16 * 17 * You should have received a copy of the GNU Library General Public License 18 * along with this file; see the file COPYING.LIB. If not, write to 19 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, 20 * MA 02111-1307, USA. */ 21 22 #include <asm-generic/bitops/count_zeros.h> 23 24 /* You have to define the following before including this file: 25 * 26 * UWtype -- An unsigned type, default type for operations (typically a "word") 27 * UHWtype -- An unsigned type, at least half the size of UWtype. 28 * UDWtype -- An unsigned type, at least twice as large a UWtype 29 * W_TYPE_SIZE -- size in bits of UWtype 30 * 31 * SItype, USItype -- Signed and unsigned 32 bit types. 32 * DItype, UDItype -- Signed and unsigned 64 bit types. 33 * 34 * On a 32 bit machine UWtype should typically be USItype; 35 * on a 64 bit machine, UWtype should typically be UDItype. 36 */ 37 38 #define __BITS4 (W_TYPE_SIZE / 4) 39 #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) 40 #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) 41 #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) 42 43 /* This is used to make sure no undesirable sharing between different libraries 44 that use this file takes place. */ 45 #ifndef __MPN 46 #define __MPN(x) __##x 47 #endif 48 49 /* Define auxiliary asm macros. 50 * 51 * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two 52 * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype 53 * word product in HIGH_PROD and LOW_PROD. 54 * 55 * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a 56 * UDWtype product. This is just a variant of umul_ppmm. 57 58 * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 59 * denominator) divides a UDWtype, composed by the UWtype integers 60 * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient 61 * in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less 62 * than DENOMINATOR for correct operation. If, in addition, the most 63 * significant bit of DENOMINATOR must be 1, then the pre-processor symbol 64 * UDIV_NEEDS_NORMALIZATION is defined to 1. 65 * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 66 * denominator). Like udiv_qrnnd but the numbers are signed. The quotient 67 * is rounded towards 0. 68 * 69 * 5) count_leading_zeros(count, x) counts the number of zero-bits from the 70 * msb to the first non-zero bit in the UWtype X. This is the number of 71 * steps X needs to be shifted left to set the msb. Undefined for X == 0, 72 * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. 73 * 74 * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts 75 * from the least significant end. 76 * 77 * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, 78 * high_addend_2, low_addend_2) adds two UWtype integers, composed by 79 * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 80 * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow 81 * (i.e. carry out) is not stored anywhere, and is lost. 82 * 83 * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, 84 * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, 85 * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and 86 * LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE 87 * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, 88 * and is lost. 89 * 90 * If any of these macros are left undefined for a particular CPU, 91 * C macros are used. */ 92 93 /* The CPUs come in alphabetical order below. 94 * 95 * Please add support for more CPUs here, or improve the current support 96 * for the CPUs below! */ 97 98 #if defined(__GNUC__) && !defined(NO_ASM) 99 100 /* We sometimes need to clobber "cc" with gcc2, but that would not be 101 understood by gcc1. Use cpp to avoid major code duplication. */ 102 #if __GNUC__ < 2 103 #define __CLOBBER_CC 104 #define __AND_CLOBBER_CC 105 #else /* __GNUC__ >= 2 */ 106 #define __CLOBBER_CC : "cc" 107 #define __AND_CLOBBER_CC , "cc" 108 #endif /* __GNUC__ < 2 */ 109 110 /*************************************** 111 ************** A29K ***************** 112 ***************************************/ 113 #if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32 114 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 115 __asm__ ("add %1,%4,%5\n" \ 116 "addc %0,%2,%3" \ 117 : "=r" ((USItype)(sh)), \ 118 "=&r" ((USItype)(sl)) \ 119 : "%r" ((USItype)(ah)), \ 120 "rI" ((USItype)(bh)), \ 121 "%r" ((USItype)(al)), \ 122 "rI" ((USItype)(bl))) 123 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 124 __asm__ ("sub %1,%4,%5\n" \ 125 "subc %0,%2,%3" \ 126 : "=r" ((USItype)(sh)), \ 127 "=&r" ((USItype)(sl)) \ 128 : "r" ((USItype)(ah)), \ 129 "rI" ((USItype)(bh)), \ 130 "r" ((USItype)(al)), \ 131 "rI" ((USItype)(bl))) 132 #define umul_ppmm(xh, xl, m0, m1) \ 133 do { \ 134 USItype __m0 = (m0), __m1 = (m1); \ 135 __asm__ ("multiplu %0,%1,%2" \ 136 : "=r" ((USItype)(xl)) \ 137 : "r" (__m0), \ 138 "r" (__m1)); \ 139 __asm__ ("multmu %0,%1,%2" \ 140 : "=r" ((USItype)(xh)) \ 141 : "r" (__m0), \ 142 "r" (__m1)); \ 143 } while (0) 144 #define udiv_qrnnd(q, r, n1, n0, d) \ 145 __asm__ ("dividu %0,%3,%4" \ 146 : "=r" ((USItype)(q)), \ 147 "=q" ((USItype)(r)) \ 148 : "1" ((USItype)(n1)), \ 149 "r" ((USItype)(n0)), \ 150 "r" ((USItype)(d))) 151 #endif /* __a29k__ */ 152 153 #if defined(__alpha) && W_TYPE_SIZE == 64 154 #define umul_ppmm(ph, pl, m0, m1) \ 155 do { \ 156 UDItype __m0 = (m0), __m1 = (m1); \ 157 (ph) = __builtin_alpha_umulh(__m0, __m1); \ 158 (pl) = __m0 * __m1; \ 159 } while (0) 160 #define UMUL_TIME 46 161 #ifndef LONGLONG_STANDALONE 162 #define udiv_qrnnd(q, r, n1, n0, d) \ 163 do { UDItype __r; \ 164 (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ 165 (r) = __r; \ 166 } while (0) 167 extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype); 168 #define UDIV_TIME 220 169 #endif /* LONGLONG_STANDALONE */ 170 #endif /* __alpha */ 171 172 /*************************************** 173 ************** ARM ****************** 174 ***************************************/ 175 #if defined(__arm__) && W_TYPE_SIZE == 32 176 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 177 __asm__ ("adds %1, %4, %5\n" \ 178 "adc %0, %2, %3" \ 179 : "=r" ((USItype)(sh)), \ 180 "=&r" ((USItype)(sl)) \ 181 : "%r" ((USItype)(ah)), \ 182 "rI" ((USItype)(bh)), \ 183 "%r" ((USItype)(al)), \ 184 "rI" ((USItype)(bl))) 185 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 186 __asm__ ("subs %1, %4, %5\n" \ 187 "sbc %0, %2, %3" \ 188 : "=r" ((USItype)(sh)), \ 189 "=&r" ((USItype)(sl)) \ 190 : "r" ((USItype)(ah)), \ 191 "rI" ((USItype)(bh)), \ 192 "r" ((USItype)(al)), \ 193 "rI" ((USItype)(bl))) 194 #if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__ 195 #define umul_ppmm(xh, xl, a, b) \ 196 __asm__ ("%@ Inlined umul_ppmm\n" \ 197 "mov %|r0, %2, lsr #16 @ AAAA\n" \ 198 "mov %|r2, %3, lsr #16 @ BBBB\n" \ 199 "bic %|r1, %2, %|r0, lsl #16 @ aaaa\n" \ 200 "bic %0, %3, %|r2, lsl #16 @ bbbb\n" \ 201 "mul %1, %|r1, %|r2 @ aaaa * BBBB\n" \ 202 "mul %|r2, %|r0, %|r2 @ AAAA * BBBB\n" \ 203 "mul %|r1, %0, %|r1 @ aaaa * bbbb\n" \ 204 "mul %0, %|r0, %0 @ AAAA * bbbb\n" \ 205 "adds %|r0, %1, %0 @ central sum\n" \ 206 "addcs %|r2, %|r2, #65536\n" \ 207 "adds %1, %|r1, %|r0, lsl #16\n" \ 208 "adc %0, %|r2, %|r0, lsr #16" \ 209 : "=&r" ((USItype)(xh)), \ 210 "=r" ((USItype)(xl)) \ 211 : "r" ((USItype)(a)), \ 212 "r" ((USItype)(b)) \ 213 : "r0", "r1", "r2") 214 #else 215 #define umul_ppmm(xh, xl, a, b) \ 216 __asm__ ("%@ Inlined umul_ppmm\n" \ 217 "umull %r1, %r0, %r2, %r3" \ 218 : "=&r" ((USItype)(xh)), \ 219 "=r" ((USItype)(xl)) \ 220 : "r" ((USItype)(a)), \ 221 "r" ((USItype)(b)) \ 222 : "r0", "r1") 223 #endif 224 #define UMUL_TIME 20 225 #define UDIV_TIME 100 226 #endif /* __arm__ */ 227 228 /*************************************** 229 ************** CLIPPER ************** 230 ***************************************/ 231 #if defined(__clipper__) && W_TYPE_SIZE == 32 232 #define umul_ppmm(w1, w0, u, v) \ 233 ({union {UDItype __ll; \ 234 struct {USItype __l, __h; } __i; \ 235 } __xx; \ 236 __asm__ ("mulwux %2,%0" \ 237 : "=r" (__xx.__ll) \ 238 : "%0" ((USItype)(u)), \ 239 "r" ((USItype)(v))); \ 240 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 241 #define smul_ppmm(w1, w0, u, v) \ 242 ({union {DItype __ll; \ 243 struct {SItype __l, __h; } __i; \ 244 } __xx; \ 245 __asm__ ("mulwx %2,%0" \ 246 : "=r" (__xx.__ll) \ 247 : "%0" ((SItype)(u)), \ 248 "r" ((SItype)(v))); \ 249 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 250 #define __umulsidi3(u, v) \ 251 ({UDItype __w; \ 252 __asm__ ("mulwux %2,%0" \ 253 : "=r" (__w) \ 254 : "%0" ((USItype)(u)), \ 255 "r" ((USItype)(v))); \ 256 __w; }) 257 #endif /* __clipper__ */ 258 259 /*************************************** 260 ************** GMICRO *************** 261 ***************************************/ 262 #if defined(__gmicro__) && W_TYPE_SIZE == 32 263 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 264 __asm__ ("add.w %5,%1\n" \ 265 "addx %3,%0" \ 266 : "=g" ((USItype)(sh)), \ 267 "=&g" ((USItype)(sl)) \ 268 : "%0" ((USItype)(ah)), \ 269 "g" ((USItype)(bh)), \ 270 "%1" ((USItype)(al)), \ 271 "g" ((USItype)(bl))) 272 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 273 __asm__ ("sub.w %5,%1\n" \ 274 "subx %3,%0" \ 275 : "=g" ((USItype)(sh)), \ 276 "=&g" ((USItype)(sl)) \ 277 : "0" ((USItype)(ah)), \ 278 "g" ((USItype)(bh)), \ 279 "1" ((USItype)(al)), \ 280 "g" ((USItype)(bl))) 281 #define umul_ppmm(ph, pl, m0, m1) \ 282 __asm__ ("mulx %3,%0,%1" \ 283 : "=g" ((USItype)(ph)), \ 284 "=r" ((USItype)(pl)) \ 285 : "%0" ((USItype)(m0)), \ 286 "g" ((USItype)(m1))) 287 #define udiv_qrnnd(q, r, nh, nl, d) \ 288 __asm__ ("divx %4,%0,%1" \ 289 : "=g" ((USItype)(q)), \ 290 "=r" ((USItype)(r)) \ 291 : "1" ((USItype)(nh)), \ 292 "0" ((USItype)(nl)), \ 293 "g" ((USItype)(d))) 294 #endif 295 296 /*************************************** 297 ************** HPPA ***************** 298 ***************************************/ 299 #if defined(__hppa) && W_TYPE_SIZE == 32 300 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 301 __asm__ ("add %4,%5,%1\n" \ 302 "addc %2,%3,%0" \ 303 : "=r" ((USItype)(sh)), \ 304 "=&r" ((USItype)(sl)) \ 305 : "%rM" ((USItype)(ah)), \ 306 "rM" ((USItype)(bh)), \ 307 "%rM" ((USItype)(al)), \ 308 "rM" ((USItype)(bl))) 309 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 310 __asm__ ("sub %4,%5,%1\n" \ 311 "subb %2,%3,%0" \ 312 : "=r" ((USItype)(sh)), \ 313 "=&r" ((USItype)(sl)) \ 314 : "rM" ((USItype)(ah)), \ 315 "rM" ((USItype)(bh)), \ 316 "rM" ((USItype)(al)), \ 317 "rM" ((USItype)(bl))) 318 #if 0 && defined(_PA_RISC1_1) 319 /* xmpyu uses floating point register which is not allowed in Linux kernel. */ 320 #define umul_ppmm(wh, wl, u, v) \ 321 do { \ 322 union {UDItype __ll; \ 323 struct {USItype __h, __l; } __i; \ 324 } __xx; \ 325 __asm__ ("xmpyu %1,%2,%0" \ 326 : "=*f" (__xx.__ll) \ 327 : "*f" ((USItype)(u)), \ 328 "*f" ((USItype)(v))); \ 329 (wh) = __xx.__i.__h; \ 330 (wl) = __xx.__i.__l; \ 331 } while (0) 332 #define UMUL_TIME 8 333 #define UDIV_TIME 60 334 #else 335 #define UMUL_TIME 40 336 #define UDIV_TIME 80 337 #endif 338 #if 0 /* #ifndef LONGLONG_STANDALONE */ 339 #define udiv_qrnnd(q, r, n1, n0, d) \ 340 do { USItype __r; \ 341 (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ 342 (r) = __r; \ 343 } while (0) 344 extern USItype __udiv_qrnnd(); 345 #endif /* LONGLONG_STANDALONE */ 346 #endif /* hppa */ 347 348 /*************************************** 349 ************** I370 ***************** 350 ***************************************/ 351 #if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32 352 #define umul_ppmm(xh, xl, m0, m1) \ 353 do { \ 354 union {UDItype __ll; \ 355 struct {USItype __h, __l; } __i; \ 356 } __xx; \ 357 USItype __m0 = (m0), __m1 = (m1); \ 358 __asm__ ("mr %0,%3" \ 359 : "=r" (__xx.__i.__h), \ 360 "=r" (__xx.__i.__l) \ 361 : "%1" (__m0), \ 362 "r" (__m1)); \ 363 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 364 (xh) += ((((SItype) __m0 >> 31) & __m1) \ 365 + (((SItype) __m1 >> 31) & __m0)); \ 366 } while (0) 367 #define smul_ppmm(xh, xl, m0, m1) \ 368 do { \ 369 union {DItype __ll; \ 370 struct {USItype __h, __l; } __i; \ 371 } __xx; \ 372 __asm__ ("mr %0,%3" \ 373 : "=r" (__xx.__i.__h), \ 374 "=r" (__xx.__i.__l) \ 375 : "%1" (m0), \ 376 "r" (m1)); \ 377 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 378 } while (0) 379 #define sdiv_qrnnd(q, r, n1, n0, d) \ 380 do { \ 381 union {DItype __ll; \ 382 struct {USItype __h, __l; } __i; \ 383 } __xx; \ 384 __xx.__i.__h = n1; __xx.__i.__l = n0; \ 385 __asm__ ("dr %0,%2" \ 386 : "=r" (__xx.__ll) \ 387 : "0" (__xx.__ll), "r" (d)); \ 388 (q) = __xx.__i.__l; (r) = __xx.__i.__h; \ 389 } while (0) 390 #endif 391 392 /*************************************** 393 ************** I386 ***************** 394 ***************************************/ 395 #undef __i386__ 396 #if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32 397 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 398 __asm__ ("addl %5,%1\n" \ 399 "adcl %3,%0" \ 400 : "=r" ((USItype)(sh)), \ 401 "=&r" ((USItype)(sl)) \ 402 : "%0" ((USItype)(ah)), \ 403 "g" ((USItype)(bh)), \ 404 "%1" ((USItype)(al)), \ 405 "g" ((USItype)(bl))) 406 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 407 __asm__ ("subl %5,%1\n" \ 408 "sbbl %3,%0" \ 409 : "=r" ((USItype)(sh)), \ 410 "=&r" ((USItype)(sl)) \ 411 : "0" ((USItype)(ah)), \ 412 "g" ((USItype)(bh)), \ 413 "1" ((USItype)(al)), \ 414 "g" ((USItype)(bl))) 415 #define umul_ppmm(w1, w0, u, v) \ 416 __asm__ ("mull %3" \ 417 : "=a" ((USItype)(w0)), \ 418 "=d" ((USItype)(w1)) \ 419 : "%0" ((USItype)(u)), \ 420 "rm" ((USItype)(v))) 421 #define udiv_qrnnd(q, r, n1, n0, d) \ 422 __asm__ ("divl %4" \ 423 : "=a" ((USItype)(q)), \ 424 "=d" ((USItype)(r)) \ 425 : "0" ((USItype)(n0)), \ 426 "1" ((USItype)(n1)), \ 427 "rm" ((USItype)(d))) 428 #ifndef UMUL_TIME 429 #define UMUL_TIME 40 430 #endif 431 #ifndef UDIV_TIME 432 #define UDIV_TIME 40 433 #endif 434 #endif /* 80x86 */ 435 436 /*************************************** 437 ************** I860 ***************** 438 ***************************************/ 439 #if defined(__i860__) && W_TYPE_SIZE == 32 440 #define rshift_rhlc(r, h, l, c) \ 441 __asm__ ("shr %3,r0,r0\n" \ 442 "shrd %1,%2,%0" \ 443 "=r" (r) : "r" (h), "r" (l), "rn" (c)) 444 #endif /* i860 */ 445 446 /*************************************** 447 ************** I960 ***************** 448 ***************************************/ 449 #if defined(__i960__) && W_TYPE_SIZE == 32 450 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 451 __asm__ ("cmpo 1,0\n" \ 452 "addc %5,%4,%1\n" \ 453 "addc %3,%2,%0" \ 454 : "=r" ((USItype)(sh)), \ 455 "=&r" ((USItype)(sl)) \ 456 : "%dI" ((USItype)(ah)), \ 457 "dI" ((USItype)(bh)), \ 458 "%dI" ((USItype)(al)), \ 459 "dI" ((USItype)(bl))) 460 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 461 __asm__ ("cmpo 0,0\n" \ 462 "subc %5,%4,%1\n" \ 463 "subc %3,%2,%0" \ 464 : "=r" ((USItype)(sh)), \ 465 "=&r" ((USItype)(sl)) \ 466 : "dI" ((USItype)(ah)), \ 467 "dI" ((USItype)(bh)), \ 468 "dI" ((USItype)(al)), \ 469 "dI" ((USItype)(bl))) 470 #define umul_ppmm(w1, w0, u, v) \ 471 ({union {UDItype __ll; \ 472 struct {USItype __l, __h; } __i; \ 473 } __xx; \ 474 __asm__ ("emul %2,%1,%0" \ 475 : "=d" (__xx.__ll) \ 476 : "%dI" ((USItype)(u)), \ 477 "dI" ((USItype)(v))); \ 478 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 479 #define __umulsidi3(u, v) \ 480 ({UDItype __w; \ 481 __asm__ ("emul %2,%1,%0" \ 482 : "=d" (__w) \ 483 : "%dI" ((USItype)(u)), \ 484 "dI" ((USItype)(v))); \ 485 __w; }) 486 #define udiv_qrnnd(q, r, nh, nl, d) \ 487 do { \ 488 union {UDItype __ll; \ 489 struct {USItype __l, __h; } __i; \ 490 } __nn; \ 491 __nn.__i.__h = (nh); __nn.__i.__l = (nl); \ 492 __asm__ ("ediv %d,%n,%0" \ 493 : "=d" (__rq.__ll) \ 494 : "dI" (__nn.__ll), \ 495 "dI" ((USItype)(d))); \ 496 (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ 497 } while (0) 498 #if defined(__i960mx) /* what is the proper symbol to test??? */ 499 #define rshift_rhlc(r, h, l, c) \ 500 do { \ 501 union {UDItype __ll; \ 502 struct {USItype __l, __h; } __i; \ 503 } __nn; \ 504 __nn.__i.__h = (h); __nn.__i.__l = (l); \ 505 __asm__ ("shre %2,%1,%0" \ 506 : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \ 507 } 508 #endif /* i960mx */ 509 #endif /* i960 */ 510 511 /*************************************** 512 ************** 68000 **************** 513 ***************************************/ 514 #if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32 515 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 516 __asm__ ("add%.l %5,%1\n" \ 517 "addx%.l %3,%0" \ 518 : "=d" ((USItype)(sh)), \ 519 "=&d" ((USItype)(sl)) \ 520 : "%0" ((USItype)(ah)), \ 521 "d" ((USItype)(bh)), \ 522 "%1" ((USItype)(al)), \ 523 "g" ((USItype)(bl))) 524 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 525 __asm__ ("sub%.l %5,%1\n" \ 526 "subx%.l %3,%0" \ 527 : "=d" ((USItype)(sh)), \ 528 "=&d" ((USItype)(sl)) \ 529 : "0" ((USItype)(ah)), \ 530 "d" ((USItype)(bh)), \ 531 "1" ((USItype)(al)), \ 532 "g" ((USItype)(bl))) 533 #if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) 534 #define umul_ppmm(w1, w0, u, v) \ 535 __asm__ ("mulu%.l %3,%1:%0" \ 536 : "=d" ((USItype)(w0)), \ 537 "=d" ((USItype)(w1)) \ 538 : "%0" ((USItype)(u)), \ 539 "dmi" ((USItype)(v))) 540 #define UMUL_TIME 45 541 #define udiv_qrnnd(q, r, n1, n0, d) \ 542 __asm__ ("divu%.l %4,%1:%0" \ 543 : "=d" ((USItype)(q)), \ 544 "=d" ((USItype)(r)) \ 545 : "0" ((USItype)(n0)), \ 546 "1" ((USItype)(n1)), \ 547 "dmi" ((USItype)(d))) 548 #define UDIV_TIME 90 549 #define sdiv_qrnnd(q, r, n1, n0, d) \ 550 __asm__ ("divs%.l %4,%1:%0" \ 551 : "=d" ((USItype)(q)), \ 552 "=d" ((USItype)(r)) \ 553 : "0" ((USItype)(n0)), \ 554 "1" ((USItype)(n1)), \ 555 "dmi" ((USItype)(d))) 556 #else /* not mc68020 */ 557 #define umul_ppmm(xh, xl, a, b) \ 558 do { USItype __umul_tmp1, __umul_tmp2; \ 559 __asm__ ("| Inlined umul_ppmm\n" \ 560 "move%.l %5,%3\n" \ 561 "move%.l %2,%0\n" \ 562 "move%.w %3,%1\n" \ 563 "swap %3\n" \ 564 "swap %0\n" \ 565 "mulu %2,%1\n" \ 566 "mulu %3,%0\n" \ 567 "mulu %2,%3\n" \ 568 "swap %2\n" \ 569 "mulu %5,%2\n" \ 570 "add%.l %3,%2\n" \ 571 "jcc 1f\n" \ 572 "add%.l %#0x10000,%0\n" \ 573 "1: move%.l %2,%3\n" \ 574 "clr%.w %2\n" \ 575 "swap %2\n" \ 576 "swap %3\n" \ 577 "clr%.w %3\n" \ 578 "add%.l %3,%1\n" \ 579 "addx%.l %2,%0\n" \ 580 "| End inlined umul_ppmm" \ 581 : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ 582 "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ 583 : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ 584 } while (0) 585 #define UMUL_TIME 100 586 #define UDIV_TIME 400 587 #endif /* not mc68020 */ 588 #endif /* mc68000 */ 589 590 /*************************************** 591 ************** 88000 **************** 592 ***************************************/ 593 #if defined(__m88000__) && W_TYPE_SIZE == 32 594 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 595 __asm__ ("addu.co %1,%r4,%r5\n" \ 596 "addu.ci %0,%r2,%r3" \ 597 : "=r" ((USItype)(sh)), \ 598 "=&r" ((USItype)(sl)) \ 599 : "%rJ" ((USItype)(ah)), \ 600 "rJ" ((USItype)(bh)), \ 601 "%rJ" ((USItype)(al)), \ 602 "rJ" ((USItype)(bl))) 603 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 604 __asm__ ("subu.co %1,%r4,%r5\n" \ 605 "subu.ci %0,%r2,%r3" \ 606 : "=r" ((USItype)(sh)), \ 607 "=&r" ((USItype)(sl)) \ 608 : "rJ" ((USItype)(ah)), \ 609 "rJ" ((USItype)(bh)), \ 610 "rJ" ((USItype)(al)), \ 611 "rJ" ((USItype)(bl))) 612 #if defined(__m88110__) 613 #define umul_ppmm(wh, wl, u, v) \ 614 do { \ 615 union {UDItype __ll; \ 616 struct {USItype __h, __l; } __i; \ 617 } __x; \ 618 __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \ 619 (wh) = __x.__i.__h; \ 620 (wl) = __x.__i.__l; \ 621 } while (0) 622 #define udiv_qrnnd(q, r, n1, n0, d) \ 623 ({union {UDItype __ll; \ 624 struct {USItype __h, __l; } __i; \ 625 } __x, __q; \ 626 __x.__i.__h = (n1); __x.__i.__l = (n0); \ 627 __asm__ ("divu.d %0,%1,%2" \ 628 : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ 629 (r) = (n0) - __q.__l * (d); (q) = __q.__l; }) 630 #define UMUL_TIME 5 631 #define UDIV_TIME 25 632 #else 633 #define UMUL_TIME 17 634 #define UDIV_TIME 150 635 #endif /* __m88110__ */ 636 #endif /* __m88000__ */ 637 638 /*************************************** 639 ************** MIPS ***************** 640 ***************************************/ 641 #if defined(__mips__) && W_TYPE_SIZE == 32 642 #if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) 643 #define umul_ppmm(w1, w0, u, v) \ 644 do { \ 645 UDItype __ll = (UDItype)(u) * (v); \ 646 w1 = __ll >> 32; \ 647 w0 = __ll; \ 648 } while (0) 649 #elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7 650 #define umul_ppmm(w1, w0, u, v) \ 651 __asm__ ("multu %2,%3" \ 652 : "=l" ((USItype)(w0)), \ 653 "=h" ((USItype)(w1)) \ 654 : "d" ((USItype)(u)), \ 655 "d" ((USItype)(v))) 656 #else 657 #define umul_ppmm(w1, w0, u, v) \ 658 __asm__ ("multu %2,%3\n" \ 659 "mflo %0\n" \ 660 "mfhi %1" \ 661 : "=d" ((USItype)(w0)), \ 662 "=d" ((USItype)(w1)) \ 663 : "d" ((USItype)(u)), \ 664 "d" ((USItype)(v))) 665 #endif 666 #define UMUL_TIME 10 667 #define UDIV_TIME 100 668 #endif /* __mips__ */ 669 670 /*************************************** 671 ************** MIPS/64 ************** 672 ***************************************/ 673 #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 674 #if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) 675 #define umul_ppmm(w1, w0, u, v) \ 676 do { \ 677 typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ 678 __ll_UTItype __ll = (__ll_UTItype)(u) * (v); \ 679 w1 = __ll >> 64; \ 680 w0 = __ll; \ 681 } while (0) 682 #elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7 683 #define umul_ppmm(w1, w0, u, v) \ 684 __asm__ ("dmultu %2,%3" \ 685 : "=l" ((UDItype)(w0)), \ 686 "=h" ((UDItype)(w1)) \ 687 : "d" ((UDItype)(u)), \ 688 "d" ((UDItype)(v))) 689 #else 690 #define umul_ppmm(w1, w0, u, v) \ 691 __asm__ ("dmultu %2,%3\n" \ 692 "mflo %0\n" \ 693 "mfhi %1" \ 694 : "=d" ((UDItype)(w0)), \ 695 "=d" ((UDItype)(w1)) \ 696 : "d" ((UDItype)(u)), \ 697 "d" ((UDItype)(v))) 698 #endif 699 #define UMUL_TIME 20 700 #define UDIV_TIME 140 701 #endif /* __mips__ */ 702 703 /*************************************** 704 ************** 32000 **************** 705 ***************************************/ 706 #if defined(__ns32000__) && W_TYPE_SIZE == 32 707 #define umul_ppmm(w1, w0, u, v) \ 708 ({union {UDItype __ll; \ 709 struct {USItype __l, __h; } __i; \ 710 } __xx; \ 711 __asm__ ("meid %2,%0" \ 712 : "=g" (__xx.__ll) \ 713 : "%0" ((USItype)(u)), \ 714 "g" ((USItype)(v))); \ 715 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 716 #define __umulsidi3(u, v) \ 717 ({UDItype __w; \ 718 __asm__ ("meid %2,%0" \ 719 : "=g" (__w) \ 720 : "%0" ((USItype)(u)), \ 721 "g" ((USItype)(v))); \ 722 __w; }) 723 #define udiv_qrnnd(q, r, n1, n0, d) \ 724 ({union {UDItype __ll; \ 725 struct {USItype __l, __h; } __i; \ 726 } __xx; \ 727 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ 728 __asm__ ("deid %2,%0" \ 729 : "=g" (__xx.__ll) \ 730 : "0" (__xx.__ll), \ 731 "g" ((USItype)(d))); \ 732 (r) = __xx.__i.__l; (q) = __xx.__i.__h; }) 733 #endif /* __ns32000__ */ 734 735 /*************************************** 736 ************** PPC ****************** 737 ***************************************/ 738 #if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32 739 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 740 do { \ 741 if (__builtin_constant_p(bh) && (bh) == 0) \ 742 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ 743 : "=r" ((USItype)(sh)), \ 744 "=&r" ((USItype)(sl)) \ 745 : "%r" ((USItype)(ah)), \ 746 "%r" ((USItype)(al)), \ 747 "rI" ((USItype)(bl))); \ 748 else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ 749 __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ 750 : "=r" ((USItype)(sh)), \ 751 "=&r" ((USItype)(sl)) \ 752 : "%r" ((USItype)(ah)), \ 753 "%r" ((USItype)(al)), \ 754 "rI" ((USItype)(bl))); \ 755 else \ 756 __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ 757 : "=r" ((USItype)(sh)), \ 758 "=&r" ((USItype)(sl)) \ 759 : "%r" ((USItype)(ah)), \ 760 "r" ((USItype)(bh)), \ 761 "%r" ((USItype)(al)), \ 762 "rI" ((USItype)(bl))); \ 763 } while (0) 764 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 765 do { \ 766 if (__builtin_constant_p(ah) && (ah) == 0) \ 767 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ 768 : "=r" ((USItype)(sh)), \ 769 "=&r" ((USItype)(sl)) \ 770 : "r" ((USItype)(bh)), \ 771 "rI" ((USItype)(al)), \ 772 "r" ((USItype)(bl))); \ 773 else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \ 774 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ 775 : "=r" ((USItype)(sh)), \ 776 "=&r" ((USItype)(sl)) \ 777 : "r" ((USItype)(bh)), \ 778 "rI" ((USItype)(al)), \ 779 "r" ((USItype)(bl))); \ 780 else if (__builtin_constant_p(bh) && (bh) == 0) \ 781 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ 782 : "=r" ((USItype)(sh)), \ 783 "=&r" ((USItype)(sl)) \ 784 : "r" ((USItype)(ah)), \ 785 "rI" ((USItype)(al)), \ 786 "r" ((USItype)(bl))); \ 787 else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ 788 __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ 789 : "=r" ((USItype)(sh)), \ 790 "=&r" ((USItype)(sl)) \ 791 : "r" ((USItype)(ah)), \ 792 "rI" ((USItype)(al)), \ 793 "r" ((USItype)(bl))); \ 794 else \ 795 __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ 796 : "=r" ((USItype)(sh)), \ 797 "=&r" ((USItype)(sl)) \ 798 : "r" ((USItype)(ah)), \ 799 "r" ((USItype)(bh)), \ 800 "rI" ((USItype)(al)), \ 801 "r" ((USItype)(bl))); \ 802 } while (0) 803 #if defined(_ARCH_PPC) 804 #define umul_ppmm(ph, pl, m0, m1) \ 805 do { \ 806 USItype __m0 = (m0), __m1 = (m1); \ 807 __asm__ ("mulhwu %0,%1,%2" \ 808 : "=r" ((USItype) ph) \ 809 : "%r" (__m0), \ 810 "r" (__m1)); \ 811 (pl) = __m0 * __m1; \ 812 } while (0) 813 #define UMUL_TIME 15 814 #define smul_ppmm(ph, pl, m0, m1) \ 815 do { \ 816 SItype __m0 = (m0), __m1 = (m1); \ 817 __asm__ ("mulhw %0,%1,%2" \ 818 : "=r" ((SItype) ph) \ 819 : "%r" (__m0), \ 820 "r" (__m1)); \ 821 (pl) = __m0 * __m1; \ 822 } while (0) 823 #define SMUL_TIME 14 824 #define UDIV_TIME 120 825 #else 826 #define umul_ppmm(xh, xl, m0, m1) \ 827 do { \ 828 USItype __m0 = (m0), __m1 = (m1); \ 829 __asm__ ("mul %0,%2,%3" \ 830 : "=r" ((USItype)(xh)), \ 831 "=q" ((USItype)(xl)) \ 832 : "r" (__m0), \ 833 "r" (__m1)); \ 834 (xh) += ((((SItype) __m0 >> 31) & __m1) \ 835 + (((SItype) __m1 >> 31) & __m0)); \ 836 } while (0) 837 #define UMUL_TIME 8 838 #define smul_ppmm(xh, xl, m0, m1) \ 839 __asm__ ("mul %0,%2,%3" \ 840 : "=r" ((SItype)(xh)), \ 841 "=q" ((SItype)(xl)) \ 842 : "r" (m0), \ 843 "r" (m1)) 844 #define SMUL_TIME 4 845 #define sdiv_qrnnd(q, r, nh, nl, d) \ 846 __asm__ ("div %0,%2,%4" \ 847 : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \ 848 : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d))) 849 #define UDIV_TIME 100 850 #endif 851 #endif /* Power architecture variants. */ 852 853 /*************************************** 854 ************** PYR ****************** 855 ***************************************/ 856 #if defined(__pyr__) && W_TYPE_SIZE == 32 857 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 858 __asm__ ("addw %5,%1\n" \ 859 "addwc %3,%0" \ 860 : "=r" ((USItype)(sh)), \ 861 "=&r" ((USItype)(sl)) \ 862 : "%0" ((USItype)(ah)), \ 863 "g" ((USItype)(bh)), \ 864 "%1" ((USItype)(al)), \ 865 "g" ((USItype)(bl))) 866 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 867 __asm__ ("subw %5,%1\n" \ 868 "subwb %3,%0" \ 869 : "=r" ((USItype)(sh)), \ 870 "=&r" ((USItype)(sl)) \ 871 : "0" ((USItype)(ah)), \ 872 "g" ((USItype)(bh)), \ 873 "1" ((USItype)(al)), \ 874 "g" ((USItype)(bl))) 875 /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ 876 #define umul_ppmm(w1, w0, u, v) \ 877 ({union {UDItype __ll; \ 878 struct {USItype __h, __l; } __i; \ 879 } __xx; \ 880 __asm__ ("movw %1,%R0\n" \ 881 "uemul %2,%0" \ 882 : "=&r" (__xx.__ll) \ 883 : "g" ((USItype) (u)), \ 884 "g" ((USItype)(v))); \ 885 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) 886 #endif /* __pyr__ */ 887 888 /*************************************** 889 ************** RT/ROMP ************** 890 ***************************************/ 891 #if defined(__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 892 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 893 __asm__ ("a %1,%5\n" \ 894 "ae %0,%3" \ 895 : "=r" ((USItype)(sh)), \ 896 "=&r" ((USItype)(sl)) \ 897 : "%0" ((USItype)(ah)), \ 898 "r" ((USItype)(bh)), \ 899 "%1" ((USItype)(al)), \ 900 "r" ((USItype)(bl))) 901 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 902 __asm__ ("s %1,%5\n" \ 903 "se %0,%3" \ 904 : "=r" ((USItype)(sh)), \ 905 "=&r" ((USItype)(sl)) \ 906 : "0" ((USItype)(ah)), \ 907 "r" ((USItype)(bh)), \ 908 "1" ((USItype)(al)), \ 909 "r" ((USItype)(bl))) 910 #define umul_ppmm(ph, pl, m0, m1) \ 911 do { \ 912 USItype __m0 = (m0), __m1 = (m1); \ 913 __asm__ ( \ 914 "s r2,r2\n" \ 915 "mts r10,%2\n" \ 916 "m r2,%3\n" \ 917 "m r2,%3\n" \ 918 "m r2,%3\n" \ 919 "m r2,%3\n" \ 920 "m r2,%3\n" \ 921 "m r2,%3\n" \ 922 "m r2,%3\n" \ 923 "m r2,%3\n" \ 924 "m r2,%3\n" \ 925 "m r2,%3\n" \ 926 "m r2,%3\n" \ 927 "m r2,%3\n" \ 928 "m r2,%3\n" \ 929 "m r2,%3\n" \ 930 "m r2,%3\n" \ 931 "m r2,%3\n" \ 932 "cas %0,r2,r0\n" \ 933 "mfs r10,%1" \ 934 : "=r" ((USItype)(ph)), \ 935 "=r" ((USItype)(pl)) \ 936 : "%r" (__m0), \ 937 "r" (__m1) \ 938 : "r2"); \ 939 (ph) += ((((SItype) __m0 >> 31) & __m1) \ 940 + (((SItype) __m1 >> 31) & __m0)); \ 941 } while (0) 942 #define UMUL_TIME 20 943 #define UDIV_TIME 200 944 #endif /* RT/ROMP */ 945 946 /*************************************** 947 ************** SH2 ****************** 948 ***************************************/ 949 #if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \ 950 && W_TYPE_SIZE == 32 951 #define umul_ppmm(w1, w0, u, v) \ 952 __asm__ ( \ 953 "dmulu.l %2,%3\n" \ 954 "sts macl,%1\n" \ 955 "sts mach,%0" \ 956 : "=r" ((USItype)(w1)), \ 957 "=r" ((USItype)(w0)) \ 958 : "r" ((USItype)(u)), \ 959 "r" ((USItype)(v)) \ 960 : "macl", "mach") 961 #define UMUL_TIME 5 962 #endif 963 964 /*************************************** 965 ************** SPARC **************** 966 ***************************************/ 967 #if defined(__sparc__) && W_TYPE_SIZE == 32 968 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 969 __asm__ ("addcc %r4,%5,%1\n" \ 970 "addx %r2,%3,%0" \ 971 : "=r" ((USItype)(sh)), \ 972 "=&r" ((USItype)(sl)) \ 973 : "%rJ" ((USItype)(ah)), \ 974 "rI" ((USItype)(bh)), \ 975 "%rJ" ((USItype)(al)), \ 976 "rI" ((USItype)(bl)) \ 977 __CLOBBER_CC) 978 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 979 __asm__ ("subcc %r4,%5,%1\n" \ 980 "subx %r2,%3,%0" \ 981 : "=r" ((USItype)(sh)), \ 982 "=&r" ((USItype)(sl)) \ 983 : "rJ" ((USItype)(ah)), \ 984 "rI" ((USItype)(bh)), \ 985 "rJ" ((USItype)(al)), \ 986 "rI" ((USItype)(bl)) \ 987 __CLOBBER_CC) 988 #if defined(__sparc_v8__) 989 /* Don't match immediate range because, 1) it is not often useful, 990 2) the 'I' flag thinks of the range as a 13 bit signed interval, 991 while we want to match a 13 bit interval, sign extended to 32 bits, 992 but INTERPRETED AS UNSIGNED. */ 993 #define umul_ppmm(w1, w0, u, v) \ 994 __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 995 : "=r" ((USItype)(w1)), \ 996 "=r" ((USItype)(w0)) \ 997 : "r" ((USItype)(u)), \ 998 "r" ((USItype)(v))) 999 #define UMUL_TIME 5 1000 #ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */ 1001 #define udiv_qrnnd(q, r, n1, n0, d) \ 1002 do { \ 1003 USItype __q; \ 1004 __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ 1005 : "=r" ((USItype)(__q)) \ 1006 : "r" ((USItype)(n1)), \ 1007 "r" ((USItype)(n0)), \ 1008 "r" ((USItype)(d))); \ 1009 (r) = (n0) - __q * (d); \ 1010 (q) = __q; \ 1011 } while (0) 1012 #define UDIV_TIME 25 1013 #endif /* SUPERSPARC */ 1014 #else /* ! __sparc_v8__ */ 1015 #if defined(__sparclite__) 1016 /* This has hardware multiply but not divide. It also has two additional 1017 instructions scan (ffs from high bit) and divscc. */ 1018 #define umul_ppmm(w1, w0, u, v) \ 1019 __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 1020 : "=r" ((USItype)(w1)), \ 1021 "=r" ((USItype)(w0)) \ 1022 : "r" ((USItype)(u)), \ 1023 "r" ((USItype)(v))) 1024 #define UMUL_TIME 5 1025 #define udiv_qrnnd(q, r, n1, n0, d) \ 1026 __asm__ ("! Inlined udiv_qrnnd\n" \ 1027 "wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ 1028 "tst %%g0\n" \ 1029 "divscc %3,%4,%%g1\n" \ 1030 "divscc %%g1,%4,%%g1\n" \ 1031 "divscc %%g1,%4,%%g1\n" \ 1032 "divscc %%g1,%4,%%g1\n" \ 1033 "divscc %%g1,%4,%%g1\n" \ 1034 "divscc %%g1,%4,%%g1\n" \ 1035 "divscc %%g1,%4,%%g1\n" \ 1036 "divscc %%g1,%4,%%g1\n" \ 1037 "divscc %%g1,%4,%%g1\n" \ 1038 "divscc %%g1,%4,%%g1\n" \ 1039 "divscc %%g1,%4,%%g1\n" \ 1040 "divscc %%g1,%4,%%g1\n" \ 1041 "divscc %%g1,%4,%%g1\n" \ 1042 "divscc %%g1,%4,%%g1\n" \ 1043 "divscc %%g1,%4,%%g1\n" \ 1044 "divscc %%g1,%4,%%g1\n" \ 1045 "divscc %%g1,%4,%%g1\n" \ 1046 "divscc %%g1,%4,%%g1\n" \ 1047 "divscc %%g1,%4,%%g1\n" \ 1048 "divscc %%g1,%4,%%g1\n" \ 1049 "divscc %%g1,%4,%%g1\n" \ 1050 "divscc %%g1,%4,%%g1\n" \ 1051 "divscc %%g1,%4,%%g1\n" \ 1052 "divscc %%g1,%4,%%g1\n" \ 1053 "divscc %%g1,%4,%%g1\n" \ 1054 "divscc %%g1,%4,%%g1\n" \ 1055 "divscc %%g1,%4,%%g1\n" \ 1056 "divscc %%g1,%4,%%g1\n" \ 1057 "divscc %%g1,%4,%%g1\n" \ 1058 "divscc %%g1,%4,%%g1\n" \ 1059 "divscc %%g1,%4,%%g1\n" \ 1060 "divscc %%g1,%4,%0\n" \ 1061 "rd %%y,%1\n" \ 1062 "bl,a 1f\n" \ 1063 "add %1,%4,%1\n" \ 1064 "1: ! End of inline udiv_qrnnd" \ 1065 : "=r" ((USItype)(q)), \ 1066 "=r" ((USItype)(r)) \ 1067 : "r" ((USItype)(n1)), \ 1068 "r" ((USItype)(n0)), \ 1069 "rI" ((USItype)(d)) \ 1070 : "%g1" __AND_CLOBBER_CC) 1071 #define UDIV_TIME 37 1072 #endif /* __sparclite__ */ 1073 #endif /* __sparc_v8__ */ 1074 /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ 1075 #ifndef umul_ppmm 1076 #define umul_ppmm(w1, w0, u, v) \ 1077 __asm__ ("! Inlined umul_ppmm\n" \ 1078 "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \ 1079 "sra %3,31,%%g2 ! Don't move this insn\n" \ 1080 "and %2,%%g2,%%g2 ! Don't move this insn\n" \ 1081 "andcc %%g0,0,%%g1 ! Don't move this insn\n" \ 1082 "mulscc %%g1,%3,%%g1\n" \ 1083 "mulscc %%g1,%3,%%g1\n" \ 1084 "mulscc %%g1,%3,%%g1\n" \ 1085 "mulscc %%g1,%3,%%g1\n" \ 1086 "mulscc %%g1,%3,%%g1\n" \ 1087 "mulscc %%g1,%3,%%g1\n" \ 1088 "mulscc %%g1,%3,%%g1\n" \ 1089 "mulscc %%g1,%3,%%g1\n" \ 1090 "mulscc %%g1,%3,%%g1\n" \ 1091 "mulscc %%g1,%3,%%g1\n" \ 1092 "mulscc %%g1,%3,%%g1\n" \ 1093 "mulscc %%g1,%3,%%g1\n" \ 1094 "mulscc %%g1,%3,%%g1\n" \ 1095 "mulscc %%g1,%3,%%g1\n" \ 1096 "mulscc %%g1,%3,%%g1\n" \ 1097 "mulscc %%g1,%3,%%g1\n" \ 1098 "mulscc %%g1,%3,%%g1\n" \ 1099 "mulscc %%g1,%3,%%g1\n" \ 1100 "mulscc %%g1,%3,%%g1\n" \ 1101 "mulscc %%g1,%3,%%g1\n" \ 1102 "mulscc %%g1,%3,%%g1\n" \ 1103 "mulscc %%g1,%3,%%g1\n" \ 1104 "mulscc %%g1,%3,%%g1\n" \ 1105 "mulscc %%g1,%3,%%g1\n" \ 1106 "mulscc %%g1,%3,%%g1\n" \ 1107 "mulscc %%g1,%3,%%g1\n" \ 1108 "mulscc %%g1,%3,%%g1\n" \ 1109 "mulscc %%g1,%3,%%g1\n" \ 1110 "mulscc %%g1,%3,%%g1\n" \ 1111 "mulscc %%g1,%3,%%g1\n" \ 1112 "mulscc %%g1,%3,%%g1\n" \ 1113 "mulscc %%g1,%3,%%g1\n" \ 1114 "mulscc %%g1,0,%%g1\n" \ 1115 "add %%g1,%%g2,%0\n" \ 1116 "rd %%y,%1" \ 1117 : "=r" ((USItype)(w1)), \ 1118 "=r" ((USItype)(w0)) \ 1119 : "%rI" ((USItype)(u)), \ 1120 "r" ((USItype)(v)) \ 1121 : "%g1", "%g2" __AND_CLOBBER_CC) 1122 #define UMUL_TIME 39 /* 39 instructions */ 1123 /* It's quite necessary to add this much assembler for the sparc. 1124 The default udiv_qrnnd (in C) is more than 10 times slower! */ 1125 #define udiv_qrnnd(q, r, n1, n0, d) \ 1126 __asm__ ("! Inlined udiv_qrnnd\n\t" \ 1127 "mov 32,%%g1\n\t" \ 1128 "subcc %1,%2,%%g0\n\t" \ 1129 "1: bcs 5f\n\t" \ 1130 "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ 1131 "sub %1,%2,%1 ! this kills msb of n\n\t" \ 1132 "addx %1,%1,%1 ! so this can't give carry\n\t" \ 1133 "subcc %%g1,1,%%g1\n\t" \ 1134 "2: bne 1b\n\t" \ 1135 "subcc %1,%2,%%g0\n\t" \ 1136 "bcs 3f\n\t" \ 1137 "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ 1138 "b 3f\n\t" \ 1139 "sub %1,%2,%1 ! this kills msb of n\n\t" \ 1140 "4: sub %1,%2,%1\n\t" \ 1141 "5: addxcc %1,%1,%1\n\t" \ 1142 "bcc 2b\n\t" \ 1143 "subcc %%g1,1,%%g1\n\t" \ 1144 "! Got carry from n. Subtract next step to cancel this carry.\n\t" \ 1145 "bne 4b\n\t" \ 1146 "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \ 1147 "sub %1,%2,%1\n\t" \ 1148 "3: xnor %0,0,%0\n\t" \ 1149 "! End of inline udiv_qrnnd\n" \ 1150 : "=&r" ((USItype)(q)), \ 1151 "=&r" ((USItype)(r)) \ 1152 : "r" ((USItype)(d)), \ 1153 "1" ((USItype)(n1)), \ 1154 "0" ((USItype)(n0)) : "%g1", "cc") 1155 #define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */ 1156 #endif 1157 #endif /* __sparc__ */ 1158 1159 /*************************************** 1160 ************** VAX ****************** 1161 ***************************************/ 1162 #if defined(__vax__) && W_TYPE_SIZE == 32 1163 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1164 __asm__ ("addl2 %5,%1\n" \ 1165 "adwc %3,%0" \ 1166 : "=g" ((USItype)(sh)), \ 1167 "=&g" ((USItype)(sl)) \ 1168 : "%0" ((USItype)(ah)), \ 1169 "g" ((USItype)(bh)), \ 1170 "%1" ((USItype)(al)), \ 1171 "g" ((USItype)(bl))) 1172 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1173 __asm__ ("subl2 %5,%1\n" \ 1174 "sbwc %3,%0" \ 1175 : "=g" ((USItype)(sh)), \ 1176 "=&g" ((USItype)(sl)) \ 1177 : "0" ((USItype)(ah)), \ 1178 "g" ((USItype)(bh)), \ 1179 "1" ((USItype)(al)), \ 1180 "g" ((USItype)(bl))) 1181 #define umul_ppmm(xh, xl, m0, m1) \ 1182 do { \ 1183 union {UDItype __ll; \ 1184 struct {USItype __l, __h; } __i; \ 1185 } __xx; \ 1186 USItype __m0 = (m0), __m1 = (m1); \ 1187 __asm__ ("emul %1,%2,$0,%0" \ 1188 : "=g" (__xx.__ll) \ 1189 : "g" (__m0), \ 1190 "g" (__m1)); \ 1191 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 1192 (xh) += ((((SItype) __m0 >> 31) & __m1) \ 1193 + (((SItype) __m1 >> 31) & __m0)); \ 1194 } while (0) 1195 #define sdiv_qrnnd(q, r, n1, n0, d) \ 1196 do { \ 1197 union {DItype __ll; \ 1198 struct {SItype __l, __h; } __i; \ 1199 } __xx; \ 1200 __xx.__i.__h = n1; __xx.__i.__l = n0; \ 1201 __asm__ ("ediv %3,%2,%0,%1" \ 1202 : "=g" (q), "=g" (r) \ 1203 : "g" (__xx.__ll), "g" (d)); \ 1204 } while (0) 1205 #endif /* __vax__ */ 1206 1207 /*************************************** 1208 ************** Z8000 **************** 1209 ***************************************/ 1210 #if defined(__z8000__) && W_TYPE_SIZE == 16 1211 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1212 __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ 1213 : "=r" ((unsigned int)(sh)), \ 1214 "=&r" ((unsigned int)(sl)) \ 1215 : "%0" ((unsigned int)(ah)), \ 1216 "r" ((unsigned int)(bh)), \ 1217 "%1" ((unsigned int)(al)), \ 1218 "rQR" ((unsigned int)(bl))) 1219 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1220 __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ 1221 : "=r" ((unsigned int)(sh)), \ 1222 "=&r" ((unsigned int)(sl)) \ 1223 : "0" ((unsigned int)(ah)), \ 1224 "r" ((unsigned int)(bh)), \ 1225 "1" ((unsigned int)(al)), \ 1226 "rQR" ((unsigned int)(bl))) 1227 #define umul_ppmm(xh, xl, m0, m1) \ 1228 do { \ 1229 union {long int __ll; \ 1230 struct {unsigned int __h, __l; } __i; \ 1231 } __xx; \ 1232 unsigned int __m0 = (m0), __m1 = (m1); \ 1233 __asm__ ("mult %S0,%H3" \ 1234 : "=r" (__xx.__i.__h), \ 1235 "=r" (__xx.__i.__l) \ 1236 : "%1" (__m0), \ 1237 "rQR" (__m1)); \ 1238 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 1239 (xh) += ((((signed int) __m0 >> 15) & __m1) \ 1240 + (((signed int) __m1 >> 15) & __m0)); \ 1241 } while (0) 1242 #endif /* __z8000__ */ 1243 1244 #endif /* __GNUC__ */ 1245 1246 /*************************************** 1247 *********** Generic Versions ******** 1248 ***************************************/ 1249 #if !defined(umul_ppmm) && defined(__umulsidi3) 1250 #define umul_ppmm(ph, pl, m0, m1) \ 1251 { \ 1252 UDWtype __ll = __umulsidi3(m0, m1); \ 1253 ph = (UWtype) (__ll >> W_TYPE_SIZE); \ 1254 pl = (UWtype) __ll; \ 1255 } 1256 #endif 1257 1258 #if !defined(__umulsidi3) 1259 #define __umulsidi3(u, v) \ 1260 ({UWtype __hi, __lo; \ 1261 umul_ppmm(__hi, __lo, u, v); \ 1262 ((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) 1263 #endif 1264 1265 /* If this machine has no inline assembler, use C macros. */ 1266 1267 #if !defined(add_ssaaaa) 1268 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1269 do { \ 1270 UWtype __x; \ 1271 __x = (al) + (bl); \ 1272 (sh) = (ah) + (bh) + (__x < (al)); \ 1273 (sl) = __x; \ 1274 } while (0) 1275 #endif 1276 1277 #if !defined(sub_ddmmss) 1278 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1279 do { \ 1280 UWtype __x; \ 1281 __x = (al) - (bl); \ 1282 (sh) = (ah) - (bh) - (__x > (al)); \ 1283 (sl) = __x; \ 1284 } while (0) 1285 #endif 1286 1287 #if !defined(umul_ppmm) 1288 #define umul_ppmm(w1, w0, u, v) \ 1289 do { \ 1290 UWtype __x0, __x1, __x2, __x3; \ 1291 UHWtype __ul, __vl, __uh, __vh; \ 1292 UWtype __u = (u), __v = (v); \ 1293 \ 1294 __ul = __ll_lowpart(__u); \ 1295 __uh = __ll_highpart(__u); \ 1296 __vl = __ll_lowpart(__v); \ 1297 __vh = __ll_highpart(__v); \ 1298 \ 1299 __x0 = (UWtype) __ul * __vl; \ 1300 __x1 = (UWtype) __ul * __vh; \ 1301 __x2 = (UWtype) __uh * __vl; \ 1302 __x3 = (UWtype) __uh * __vh; \ 1303 \ 1304 __x1 += __ll_highpart(__x0);/* this can't give carry */ \ 1305 __x1 += __x2; /* but this indeed can */ \ 1306 if (__x1 < __x2) /* did we get it? */ \ 1307 __x3 += __ll_B; /* yes, add it in the proper pos. */ \ 1308 \ 1309 (w1) = __x3 + __ll_highpart(__x1); \ 1310 (w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \ 1311 } while (0) 1312 #endif 1313 1314 #if !defined(umul_ppmm) 1315 #define smul_ppmm(w1, w0, u, v) \ 1316 do { \ 1317 UWtype __w1; \ 1318 UWtype __m0 = (u), __m1 = (v); \ 1319 umul_ppmm(__w1, w0, __m0, __m1); \ 1320 (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \ 1321 - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \ 1322 } while (0) 1323 #endif 1324 1325 /* Define this unconditionally, so it can be used for debugging. */ 1326 #define __udiv_qrnnd_c(q, r, n1, n0, d) \ 1327 do { \ 1328 UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ 1329 __d1 = __ll_highpart(d); \ 1330 __d0 = __ll_lowpart(d); \ 1331 \ 1332 __r1 = (n1) % __d1; \ 1333 __q1 = (n1) / __d1; \ 1334 __m = (UWtype) __q1 * __d0; \ 1335 __r1 = __r1 * __ll_B | __ll_highpart(n0); \ 1336 if (__r1 < __m) { \ 1337 __q1--, __r1 += (d); \ 1338 if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \ 1339 if (__r1 < __m) \ 1340 __q1--, __r1 += (d); \ 1341 } \ 1342 __r1 -= __m; \ 1343 \ 1344 __r0 = __r1 % __d1; \ 1345 __q0 = __r1 / __d1; \ 1346 __m = (UWtype) __q0 * __d0; \ 1347 __r0 = __r0 * __ll_B | __ll_lowpart(n0); \ 1348 if (__r0 < __m) { \ 1349 __q0--, __r0 += (d); \ 1350 if (__r0 >= (d)) \ 1351 if (__r0 < __m) \ 1352 __q0--, __r0 += (d); \ 1353 } \ 1354 __r0 -= __m; \ 1355 \ 1356 (q) = (UWtype) __q1 * __ll_B | __q0; \ 1357 (r) = __r0; \ 1358 } while (0) 1359 1360 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through 1361 __udiv_w_sdiv (defined in libgcc or elsewhere). */ 1362 #if !defined(udiv_qrnnd) && defined(sdiv_qrnnd) 1363 #define udiv_qrnnd(q, r, nh, nl, d) \ 1364 do { \ 1365 UWtype __r; \ 1366 (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \ 1367 (r) = __r; \ 1368 } while (0) 1369 #endif 1370 1371 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ 1372 #if !defined(udiv_qrnnd) 1373 #define UDIV_NEEDS_NORMALIZATION 1 1374 #define udiv_qrnnd __udiv_qrnnd_c 1375 #endif 1376 1377 #ifndef UDIV_NEEDS_NORMALIZATION 1378 #define UDIV_NEEDS_NORMALIZATION 0 1379 #endif 1380