1/* 2 * The implementations contained in this file are heavily based on the 3 * implementations found in the Berkeley SoftFloat library. As such, they are 4 * licensed under the same 3-clause BSD license: 5 * 6 * License for Berkeley SoftFloat Release 3e 7 * 8 * John R. Hauser 9 * 2018 January 20 10 * 11 * The following applies to the whole of SoftFloat Release 3e as well as to 12 * each source file individually. 13 * 14 * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the 15 * University of California. All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are met: 19 * 20 * 1. Redistributions of source code must retain the above copyright notice, 21 * this list of conditions, and the following disclaimer. 22 * 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions, and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 27 * 3. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY 32 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 33 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE 34 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY 35 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 36 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 38 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 40 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 41*/ 42 43#version 400 44#extension GL_ARB_gpu_shader_int64 : enable 45#extension GL_ARB_shader_bit_encoding : enable 46#extension GL_EXT_shader_integer_mix : enable 47#extension GL_MESA_shader_integer_functions : enable 48 49#pragma warning(off) 50 51/* Software IEEE floating-point rounding mode. 52 * GLSL spec section "4.7.1 Range and Precision": 53 * The rounding mode cannot be set and is undefined. 54 * But here, we are able to define the rounding mode at the compilation time. 55 */ 56#define FLOAT_ROUND_NEAREST_EVEN 0 57#define FLOAT_ROUND_TO_ZERO 1 58#define FLOAT_ROUND_DOWN 2 59#define FLOAT_ROUND_UP 3 60#define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN 61 62/* Relax propagation of NaN. Binary operations with a NaN source will still 63 * produce a NaN result, but it won't follow strict IEEE rules. 64 */ 65#define RELAXED_NAN_PROPAGATION 66 67/* Absolute value of a Float64 : 68 * Clear the sign bit 69 */ 70uint64_t 71__fabs64(uint64_t __a) 72{ 73 uvec2 a = unpackUint2x32(__a); 74 a.y &= 0x7FFFFFFFu; 75 return packUint2x32(a); 76} 77 78/* Returns 1 if the double-precision floating-point value `a' is a NaN; 79 * otherwise returns 0. 80 */ 81bool 82__is_nan(uint64_t __a) 83{ 84 uvec2 a = unpackUint2x32(__a); 85 return (0xFFE00000u <= (a.y<<1)) && 86 ((a.x != 0u) || ((a.y & 0x000FFFFFu) != 0u)); 87} 88 89/* Negate value of a Float64 : 90 * Toggle the sign bit 91 */ 92uint64_t 93__fneg64(uint64_t __a) 94{ 95 uvec2 a = unpackUint2x32(__a); 96 a.y ^= (1u << 31); 97 return packUint2x32(a); 98} 99 100uint64_t 101__fsign64(uint64_t __a) 102{ 103 uvec2 a = unpackUint2x32(__a); 104 uvec2 retval; 105 retval.x = 0u; 106 retval.y = mix((a.y & 0x80000000u) | 0x3FF00000u, 0u, (a.y << 1 | a.x) == 0u); 107 return packUint2x32(retval); 108} 109 110/* Returns the fraction bits of the double-precision floating-point value `a'.*/ 111uint 112__extractFloat64FracLo(uint64_t a) 113{ 114 return unpackUint2x32(a).x; 115} 116 117uint 118__extractFloat64FracHi(uint64_t a) 119{ 120 return unpackUint2x32(a).y & 0x000FFFFFu; 121} 122 123/* Returns the exponent bits of the double-precision floating-point value `a'.*/ 124int 125__extractFloat64Exp(uint64_t __a) 126{ 127 uvec2 a = unpackUint2x32(__a); 128 return int((a.y>>20) & 0x7FFu); 129} 130 131bool 132__feq64_nonnan(uint64_t __a, uint64_t __b) 133{ 134 uvec2 a = unpackUint2x32(__a); 135 uvec2 b = unpackUint2x32(__b); 136 return (a.x == b.x) && 137 ((a.y == b.y) || ((a.x == 0u) && (((a.y | b.y)<<1) == 0u))); 138} 139 140/* Returns true if the double-precision floating-point value `a' is equal to the 141 * corresponding value `b', and false otherwise. The comparison is performed 142 * according to the IEEE Standard for Floating-Point Arithmetic. 143 */ 144bool 145__feq64(uint64_t a, uint64_t b) 146{ 147 if (__is_nan(a) || __is_nan(b)) 148 return false; 149 150 return __feq64_nonnan(a, b); 151} 152 153/* Returns true if the double-precision floating-point value `a' is not equal 154 * to the corresponding value `b', and false otherwise. The comparison is 155 * performed according to the IEEE Standard for Floating-Point Arithmetic. 156 */ 157bool 158__fneu64(uint64_t a, uint64_t b) 159{ 160 if (__is_nan(a) || __is_nan(b)) 161 return true; 162 163 return !__feq64_nonnan(a, b); 164} 165 166/* Returns the sign bit of the double-precision floating-point value `a'.*/ 167uint 168__extractFloat64Sign(uint64_t a) 169{ 170 return unpackUint2x32(a).y & 0x80000000u; 171} 172 173/* Returns true if the signed 64-bit value formed by concatenating `a0' and 174 * `a1' is less than the signed 64-bit value formed by concatenating `b0' and 175 * `b1'. Otherwise, returns false. 176 */ 177bool 178ilt64(uint a0, uint a1, uint b0, uint b1) 179{ 180 return (int(a0) < int(b0)) || ((a0 == b0) && (a1 < b1)); 181} 182 183bool 184__flt64_nonnan(uint64_t __a, uint64_t __b) 185{ 186 uvec2 a = unpackUint2x32(__a); 187 uvec2 b = unpackUint2x32(__b); 188 189 /* IEEE 754 floating point numbers are specifically designed so that, with 190 * two exceptions, values can be compared by bit-casting to signed integers 191 * with the same number of bits. 192 * 193 * From https://en.wikipedia.org/wiki/IEEE_754-1985#Comparing_floating-point_numbers: 194 * 195 * When comparing as 2's-complement integers: If the sign bits differ, 196 * the negative number precedes the positive number, so 2's complement 197 * gives the correct result (except that negative zero and positive zero 198 * should be considered equal). If both values are positive, the 2's 199 * complement comparison again gives the correct result. Otherwise (two 200 * negative numbers), the correct FP ordering is the opposite of the 2's 201 * complement ordering. 202 * 203 * The logic implied by the above quotation is: 204 * 205 * !both_are_zero(a, b) && (both_negative(a, b) ? a > b : a < b) 206 * 207 * This is equivalent to 208 * 209 * fneu(a, b) && (both_negative(a, b) ? a >= b : a < b) 210 * 211 * fneu(a, b) && (both_negative(a, b) ? !(a < b) : a < b) 212 * 213 * fneu(a, b) && ((both_negative(a, b) && !(a < b)) || 214 * (!both_negative(a, b) && (a < b))) 215 * 216 * (A!|B)&(A|!B) is (A xor B) which is implemented here using !=. 217 * 218 * fneu(a, b) && (both_negative(a, b) != (a < b)) 219 */ 220 bool lt = ilt64(a.y, a.x, b.y, b.x); 221 bool both_negative = (a.y & b.y & 0x80000000u) != 0; 222 223 return !__feq64_nonnan(__a, __b) && (lt != both_negative); 224} 225 226/* Returns true if the double-precision floating-point value `a' is less than 227 * the corresponding value `b', and false otherwise. The comparison is performed 228 * according to the IEEE Standard for Floating-Point Arithmetic. 229 */ 230bool 231__flt64(uint64_t a, uint64_t b) 232{ 233 /* This weird layout matters. Doing the "obvious" thing results in extra 234 * flow control being inserted to implement the short-circuit evaluation 235 * rules. Flow control is bad! 236 */ 237 bool x = !__is_nan(a); 238 bool y = !__is_nan(b); 239 bool z = __flt64_nonnan(a, b); 240 241 return (x && y && z); 242} 243 244/* Returns true if the double-precision floating-point value `a' is greater 245 * than or equal to * the corresponding value `b', and false otherwise. The 246 * comparison is performed * according to the IEEE Standard for Floating-Point 247 * Arithmetic. 248 */ 249bool 250__fge64(uint64_t a, uint64_t b) 251{ 252 /* This weird layout matters. Doing the "obvious" thing results in extra 253 * flow control being inserted to implement the short-circuit evaluation 254 * rules. Flow control is bad! 255 */ 256 bool x = !__is_nan(a); 257 bool y = !__is_nan(b); 258 bool z = !__flt64_nonnan(a, b); 259 260 return (x && y && z); 261} 262 263uint64_t 264__fsat64(uint64_t __a) 265{ 266 uvec2 a = unpackUint2x32(__a); 267 268 /* fsat(NaN) should be zero. */ 269 if (__is_nan(__a) || int(a.y) < 0) 270 return 0ul; 271 272 /* IEEE 754 floating point numbers are specifically designed so that, with 273 * two exceptions, values can be compared by bit-casting to signed integers 274 * with the same number of bits. 275 * 276 * From https://en.wikipedia.org/wiki/IEEE_754-1985#Comparing_floating-point_numbers: 277 * 278 * When comparing as 2's-complement integers: If the sign bits differ, 279 * the negative number precedes the positive number, so 2's complement 280 * gives the correct result (except that negative zero and positive zero 281 * should be considered equal). If both values are positive, the 2's 282 * complement comparison again gives the correct result. Otherwise (two 283 * negative numbers), the correct FP ordering is the opposite of the 2's 284 * complement ordering. 285 * 286 * We know that both values are not negative, and we know that at least one 287 * value is not zero. Therefore, we can just use the 2's complement 288 * comparison ordering. 289 */ 290 if (ilt64(0x3FF00000, 0x00000000, a.y, a.x)) 291 return 0x3FF0000000000000ul; 292 293 return __a; 294} 295 296/* Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit 297 * value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so 298 * any carry out is lost. The result is broken into two 32-bit pieces which 299 * are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 300 */ 301void 302__add64(uint a0, uint a1, uint b0, uint b1, 303 out uint z0Ptr, 304 out uint z1Ptr) 305{ 306 uint z1 = a1 + b1; 307 z1Ptr = z1; 308 z0Ptr = a0 + b0 + uint(z1 < a1); 309} 310 311 312/* Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the 313 * 64-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 314 * 2^64, so any borrow out (carry out) is lost. The result is broken into two 315 * 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and 316 * `z1Ptr'. 317 */ 318void 319__sub64(uint a0, uint a1, uint b0, uint b1, 320 out uint z0Ptr, 321 out uint z1Ptr) 322{ 323 z1Ptr = a1 - b1; 324 z0Ptr = a0 - b0 - uint(a1 < b1); 325} 326 327/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the 328 * number of bits given in `count'. If any nonzero bits are shifted off, they 329 * are "jammed" into the least significant bit of the result by setting the 330 * least significant bit to 1. The value of `count' can be arbitrarily large; 331 * in particular, if `count' is greater than 64, the result will be either 0 332 * or 1, depending on whether the concatenation of `a0' and `a1' is zero or 333 * nonzero. The result is broken into two 32-bit pieces which are stored at 334 * the locations pointed to by `z0Ptr' and `z1Ptr'. 335 */ 336void 337__shift64RightJamming(uint a0, 338 uint a1, 339 int count, 340 out uint z0Ptr, 341 out uint z1Ptr) 342{ 343 uint z0; 344 uint z1; 345 int negCount = (-count) & 31; 346 347 z0 = mix(0u, a0, count == 0); 348 z0 = mix(z0, (a0 >> count), count < 32); 349 350 z1 = uint((a0 | a1) != 0u); /* count >= 64 */ 351 uint z1_lt64 = (a0>>(count & 31)) | uint(((a0<<negCount) | a1) != 0u); 352 z1 = mix(z1, z1_lt64, count < 64); 353 z1 = mix(z1, (a0 | uint(a1 != 0u)), count == 32); 354 uint z1_lt32 = (a0<<negCount) | (a1>>count) | uint ((a1<<negCount) != 0u); 355 z1 = mix(z1, z1_lt32, count < 32); 356 z1 = mix(z1, a1, count == 0); 357 z1Ptr = z1; 358 z0Ptr = z0; 359} 360 361/* Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right 362 * by 32 _plus_ the number of bits given in `count'. The shifted result is 363 * at most 64 nonzero bits; these are broken into two 32-bit pieces which are 364 * stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 365 * off form a third 32-bit result as follows: The _last_ bit shifted off is 366 * the most-significant bit of the extra result, and the other 31 bits of the 367 * extra result are all zero if and only if _all_but_the_last_ bits shifted off 368 * were all zero. This extra result is stored in the location pointed to by 369 * `z2Ptr'. The value of `count' can be arbitrarily large. 370 * (This routine makes more sense if `a0', `a1', and `a2' are considered 371 * to form a fixed-point value with binary point between `a1' and `a2'. This 372 * fixed-point value is shifted right by the number of bits given in `count', 373 * and the integer part of the result is returned at the locations pointed to 374 * by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 375 * corrupted as described above, and is returned at the location pointed to by 376 * `z2Ptr'.) 377 */ 378void 379__shift64ExtraRightJamming(uint a0, uint a1, uint a2, 380 int count, 381 out uint z0Ptr, 382 out uint z1Ptr, 383 out uint z2Ptr) 384{ 385 uint z0 = 0u; 386 uint z1; 387 uint z2; 388 int negCount = (-count) & 31; 389 390 z2 = mix(uint(a0 != 0u), a0, count == 64); 391 z2 = mix(z2, a0 << negCount, count < 64); 392 z2 = mix(z2, a1 << negCount, count < 32); 393 394 z1 = mix(0u, (a0 >> (count & 31)), count < 64); 395 z1 = mix(z1, (a0<<negCount) | (a1>>count), count < 32); 396 397 a2 = mix(a2 | a1, a2, count < 32); 398 z0 = mix(z0, a0 >> count, count < 32); 399 z2 |= uint(a2 != 0u); 400 401 z0 = mix(z0, 0u, (count == 32)); 402 z1 = mix(z1, a0, (count == 32)); 403 z2 = mix(z2, a1, (count == 32)); 404 z0 = mix(z0, a0, (count == 0)); 405 z1 = mix(z1, a1, (count == 0)); 406 z2 = mix(z2, a2, (count == 0)); 407 z2Ptr = z2; 408 z1Ptr = z1; 409 z0Ptr = z0; 410} 411 412/* Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the 413 * number of bits given in `count'. Any bits shifted off are lost. The value 414 * of `count' must be less than 32. The result is broken into two 32-bit 415 * pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 416 */ 417void 418__shortShift64Left(uint a0, uint a1, 419 int count, 420 out uint z0Ptr, 421 out uint z1Ptr) 422{ 423 z1Ptr = a1<<count; 424 z0Ptr = mix((a0 << count | (a1 >> ((-count) & 31))), a0, count == 0); 425} 426 427/* Packs the sign `zSign', the exponent `zExp', and the significand formed by 428 * the concatenation of `zFrac0' and `zFrac1' into a double-precision floating- 429 * point value, returning the result. After being shifted into the proper 430 * positions, the three fields `zSign', `zExp', and `zFrac0' are simply added 431 * together to form the most significant 32 bits of the result. This means 432 * that any integer portion of `zFrac0' will be added into the exponent. Since 433 * a properly normalized significand will have an integer portion equal to 1, 434 * the `zExp' input should be 1 less than the desired result exponent whenever 435 * `zFrac0' and `zFrac1' concatenated form a complete, normalized significand. 436 */ 437uint64_t 438__packFloat64(uint zSign, int zExp, uint zFrac0, uint zFrac1) 439{ 440 uvec2 z; 441 442 z.y = zSign + (uint(zExp) << 20) + zFrac0; 443 z.x = zFrac1; 444 return packUint2x32(z); 445} 446 447/* Takes an abstract floating-point value having sign `zSign', exponent `zExp', 448 * and extended significand formed by the concatenation of `zFrac0', `zFrac1', 449 * and `zFrac2', and returns the proper double-precision floating-point value 450 * corresponding to the abstract input. Ordinarily, the abstract value is 451 * simply rounded and packed into the double-precision format, with the inexact 452 * exception raised if the abstract input cannot be represented exactly. 453 * However, if the abstract value is too large, the overflow and inexact 454 * exceptions are raised and an infinity or maximal finite value is returned. 455 * If the abstract value is too small, the input value is rounded to a 456 * subnormal number, and the underflow and inexact exceptions are raised if the 457 * abstract input cannot be represented exactly as a subnormal double-precision 458 * floating-point number. 459 * The input significand must be normalized or smaller. If the input 460 * significand is not normalized, `zExp' must be 0; in that case, the result 461 * returned is a subnormal number, and it must not require rounding. In the 462 * usual case that the input significand is normalized, `zExp' must be 1 less 463 * than the "true" floating-point exponent. The handling of underflow and 464 * overflow follows the IEEE Standard for Floating-Point Arithmetic. 465 */ 466uint64_t 467__roundAndPackFloat64(uint zSign, 468 int zExp, 469 uint zFrac0, 470 uint zFrac1, 471 uint zFrac2) 472{ 473 bool roundNearestEven; 474 bool increment; 475 476 roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN; 477 increment = int(zFrac2) < 0; 478 if (!roundNearestEven) { 479 if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) { 480 increment = false; 481 } else { 482 if (zSign != 0u) { 483 increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && 484 (zFrac2 != 0u); 485 } else { 486 increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && 487 (zFrac2 != 0u); 488 } 489 } 490 } 491 if (0x7FD <= zExp) { 492 if ((0x7FD < zExp) || 493 ((zExp == 0x7FD) && 494 (0x001FFFFFu == zFrac0 && 0xFFFFFFFFu == zFrac1) && 495 increment)) { 496 if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) || 497 ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) || 498 ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) { 499 return __packFloat64(zSign, 0x7FE, 0x000FFFFFu, 0xFFFFFFFFu); 500 } 501 return __packFloat64(zSign, 0x7FF, 0u, 0u); 502 } 503 } 504 505 if (zExp < 0) { 506 __shift64ExtraRightJamming( 507 zFrac0, zFrac1, zFrac2, -zExp, zFrac0, zFrac1, zFrac2); 508 zExp = 0; 509 if (roundNearestEven) { 510 increment = zFrac2 < 0u; 511 } else { 512 if (zSign != 0u) { 513 increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && 514 (zFrac2 != 0u); 515 } else { 516 increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && 517 (zFrac2 != 0u); 518 } 519 } 520 } 521 522 if (increment) { 523 __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1); 524 zFrac1 &= ~((zFrac2 + uint(zFrac2 == 0u)) & uint(roundNearestEven)); 525 } else { 526 zExp = mix(zExp, 0, (zFrac0 | zFrac1) == 0u); 527 } 528 return __packFloat64(zSign, zExp, zFrac0, zFrac1); 529} 530 531uint64_t 532__roundAndPackUInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2) 533{ 534 bool roundNearestEven; 535 bool increment; 536 uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL; 537 538 roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN; 539 540 if (zFrac2 >= 0x80000000u) 541 increment = false; 542 543 if (!roundNearestEven) { 544 if (zSign != 0u) { 545 if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u)) { 546 increment = false; 547 } 548 } else { 549 increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && 550 (zFrac2 != 0u); 551 } 552 } 553 554 if (increment) { 555 __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1); 556 if ((zFrac0 | zFrac1) != 0u) 557 zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven); 558 } 559 return mix(packUint2x32(uvec2(zFrac1, zFrac0)), default_nan, 560 (zSign != 0u && (zFrac0 | zFrac1) != 0u)); 561} 562 563int64_t 564__roundAndPackInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2) 565{ 566 bool roundNearestEven; 567 bool increment; 568 int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL; 569 int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL; 570 571 roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN; 572 573 if (zFrac2 >= 0x80000000u) 574 increment = false; 575 576 if (!roundNearestEven) { 577 if (zSign != 0u) { 578 increment = ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && 579 (zFrac2 != 0u)); 580 } else { 581 increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && 582 (zFrac2 != 0u); 583 } 584 } 585 586 if (increment) { 587 __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1); 588 if ((zFrac0 | zFrac1) != 0u) 589 zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven); 590 } 591 592 int64_t absZ = mix(int64_t(packUint2x32(uvec2(zFrac1, zFrac0))), 593 -int64_t(packUint2x32(uvec2(zFrac1, zFrac0))), 594 zSign != 0u); 595 int64_t nan = mix(default_PosNaN, default_NegNaN, zSign != 0u); 596 return mix(absZ, nan, ((zSign != 0u) != (absZ < 0)) && bool(absZ)); 597} 598 599/* Returns the number of leading 0 bits before the most-significant 1 bit of 600 * `a'. If `a' is zero, 32 is returned. 601 */ 602int 603__countLeadingZeros32(uint a) 604{ 605 return 31 - findMSB(a); 606} 607 608/* Takes an abstract floating-point value having sign `zSign', exponent `zExp', 609 * and significand formed by the concatenation of `zSig0' and `zSig1', and 610 * returns the proper double-precision floating-point value corresponding 611 * to the abstract input. This routine is just like `__roundAndPackFloat64' 612 * except that the input significand has fewer bits and does not have to be 613 * normalized. In all cases, `zExp' must be 1 less than the "true" floating- 614 * point exponent. 615 */ 616uint64_t 617__normalizeRoundAndPackFloat64(uint zSign, 618 int zExp, 619 uint zFrac0, 620 uint zFrac1) 621{ 622 int shiftCount; 623 uint zFrac2; 624 625 if (zFrac0 == 0u) { 626 zExp -= 32; 627 zFrac0 = zFrac1; 628 zFrac1 = 0u; 629 } 630 631 shiftCount = __countLeadingZeros32(zFrac0) - 11; 632 if (0 <= shiftCount) { 633 zFrac2 = 0u; 634 __shortShift64Left(zFrac0, zFrac1, shiftCount, zFrac0, zFrac1); 635 } else { 636 __shift64ExtraRightJamming( 637 zFrac0, zFrac1, 0u, -shiftCount, zFrac0, zFrac1, zFrac2); 638 } 639 zExp -= shiftCount; 640 return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2); 641} 642 643/* Takes two double-precision floating-point values `a' and `b', one of which 644 * is a NaN, and returns the appropriate NaN result. 645 */ 646uint64_t 647__propagateFloat64NaN(uint64_t __a, uint64_t __b) 648{ 649#if defined RELAXED_NAN_PROPAGATION 650 uvec2 a = unpackUint2x32(__a); 651 uvec2 b = unpackUint2x32(__b); 652 653 return packUint2x32(uvec2(a.x | b.x, a.y | b.y)); 654#else 655 bool aIsNaN = __is_nan(__a); 656 bool bIsNaN = __is_nan(__b); 657 uvec2 a = unpackUint2x32(__a); 658 uvec2 b = unpackUint2x32(__b); 659 a.y |= 0x00080000u; 660 b.y |= 0x00080000u; 661 662 return packUint2x32(mix(b, mix(a, b, bvec2(bIsNaN, bIsNaN)), bvec2(aIsNaN, aIsNaN))); 663#endif 664} 665 666/* If a shader is in the soft-fp64 path, it almost certainly has register 667 * pressure problems. Choose a method to exchange two values that does not 668 * require a temporary. 669 */ 670#define EXCHANGE(a, b) \ 671 do { \ 672 a ^= b; \ 673 b ^= a; \ 674 a ^= b; \ 675 } while (false) 676 677/* Returns the result of adding the double-precision floating-point values 678 * `a' and `b'. The operation is performed according to the IEEE Standard for 679 * Floating-Point Arithmetic. 680 */ 681uint64_t 682__fadd64(uint64_t a, uint64_t b) 683{ 684 uint aSign = __extractFloat64Sign(a); 685 uint bSign = __extractFloat64Sign(b); 686 uint aFracLo = __extractFloat64FracLo(a); 687 uint aFracHi = __extractFloat64FracHi(a); 688 uint bFracLo = __extractFloat64FracLo(b); 689 uint bFracHi = __extractFloat64FracHi(b); 690 int aExp = __extractFloat64Exp(a); 691 int bExp = __extractFloat64Exp(b); 692 int expDiff = aExp - bExp; 693 if (aSign == bSign) { 694 uint zFrac0; 695 uint zFrac1; 696 uint zFrac2; 697 int zExp; 698 699 if (expDiff == 0) { 700 if (aExp == 0x7FF) { 701 bool propagate = ((aFracHi | bFracHi) | (aFracLo| bFracLo)) != 0u; 702 return mix(a, __propagateFloat64NaN(a, b), propagate); 703 } 704 __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); 705 if (aExp == 0) 706 return __packFloat64(aSign, 0, zFrac0, zFrac1); 707 zFrac2 = 0u; 708 zFrac0 |= 0x00200000u; 709 zExp = aExp; 710 __shift64ExtraRightJamming( 711 zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2); 712 } else { 713 if (expDiff < 0) { 714 EXCHANGE(aFracHi, bFracHi); 715 EXCHANGE(aFracLo, bFracLo); 716 EXCHANGE(aExp, bExp); 717 } 718 719 if (aExp == 0x7FF) { 720 bool propagate = (aFracHi | aFracLo) != 0u; 721 return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate); 722 } 723 724 expDiff = mix(abs(expDiff), abs(expDiff) - 1, bExp == 0); 725 bFracHi = mix(bFracHi | 0x00100000u, bFracHi, bExp == 0); 726 __shift64ExtraRightJamming( 727 bFracHi, bFracLo, 0u, expDiff, bFracHi, bFracLo, zFrac2); 728 zExp = aExp; 729 730 aFracHi |= 0x00100000u; 731 __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); 732 --zExp; 733 if (!(zFrac0 < 0x00200000u)) { 734 __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2); 735 ++zExp; 736 } 737 } 738 return __roundAndPackFloat64(aSign, zExp, zFrac0, zFrac1, zFrac2); 739 740 } else { 741 int zExp; 742 743 __shortShift64Left(aFracHi, aFracLo, 10, aFracHi, aFracLo); 744 __shortShift64Left(bFracHi, bFracLo, 10, bFracHi, bFracLo); 745 if (expDiff != 0) { 746 uint zFrac0; 747 uint zFrac1; 748 749 if (expDiff < 0) { 750 EXCHANGE(aFracHi, bFracHi); 751 EXCHANGE(aFracLo, bFracLo); 752 EXCHANGE(aExp, bExp); 753 aSign ^= 0x80000000u; 754 } 755 756 if (aExp == 0x7FF) { 757 bool propagate = (aFracHi | aFracLo) != 0u; 758 return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate); 759 } 760 761 expDiff = mix(abs(expDiff), abs(expDiff) - 1, bExp == 0); 762 bFracHi = mix(bFracHi | 0x40000000u, bFracHi, bExp == 0); 763 __shift64RightJamming(bFracHi, bFracLo, expDiff, bFracHi, bFracLo); 764 aFracHi |= 0x40000000u; 765 __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); 766 zExp = aExp; 767 --zExp; 768 return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1); 769 } 770 if (aExp == 0x7FF) { 771 bool propagate = ((aFracHi | bFracHi) | (aFracLo | bFracLo)) != 0u; 772 return mix(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate); 773 } 774 bExp = mix(bExp, 1, aExp == 0); 775 aExp = mix(aExp, 1, aExp == 0); 776 777 uint zFrac0; 778 uint zFrac1; 779 uint sign_of_difference = 0; 780 if (bFracHi < aFracHi) { 781 __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); 782 } 783 else if (aFracHi < bFracHi) { 784 __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1); 785 sign_of_difference = 0x80000000; 786 } 787 else if (bFracLo <= aFracLo) { 788 /* It is possible that zFrac0 and zFrac1 may be zero after this. */ 789 __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); 790 } 791 else { 792 __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1); 793 sign_of_difference = 0x80000000; 794 } 795 zExp = mix(bExp, aExp, sign_of_difference == 0u); 796 aSign ^= sign_of_difference; 797 uint64_t retval_0 = __packFloat64(uint(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u); 798 uint64_t retval_1 = __normalizeRoundAndPackFloat64(aSign, zExp - 11, zFrac0, zFrac1); 799 return mix(retval_0, retval_1, zFrac0 != 0u || zFrac1 != 0u); 800 } 801} 802 803/* Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the 804 * 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit 805 * product. The product is broken into four 32-bit pieces which are stored at 806 * the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 807 */ 808void 809__mul64To128(uint a0, uint a1, uint b0, uint b1, 810 out uint z0Ptr, 811 out uint z1Ptr, 812 out uint z2Ptr, 813 out uint z3Ptr) 814{ 815 uint z0 = 0u; 816 uint z1 = 0u; 817 uint z2 = 0u; 818 uint z3 = 0u; 819 uint more1 = 0u; 820 uint more2 = 0u; 821 822 umulExtended(a1, b1, z2, z3); 823 umulExtended(a1, b0, z1, more2); 824 __add64(z1, more2, 0u, z2, z1, z2); 825 umulExtended(a0, b0, z0, more1); 826 __add64(z0, more1, 0u, z1, z0, z1); 827 umulExtended(a0, b1, more1, more2); 828 __add64(more1, more2, 0u, z2, more1, z2); 829 __add64(z0, z1, 0u, more1, z0, z1); 830 z3Ptr = z3; 831 z2Ptr = z2; 832 z1Ptr = z1; 833 z0Ptr = z0; 834} 835 836/* Normalizes the subnormal double-precision floating-point value represented 837 * by the denormalized significand formed by the concatenation of `aFrac0' and 838 * `aFrac1'. The normalized exponent is stored at the location pointed to by 839 * `zExpPtr'. The most significant 21 bits of the normalized significand are 840 * stored at the location pointed to by `zFrac0Ptr', and the least significant 841 * 32 bits of the normalized significand are stored at the location pointed to 842 * by `zFrac1Ptr'. 843 */ 844void 845__normalizeFloat64Subnormal(uint aFrac0, uint aFrac1, 846 out int zExpPtr, 847 out uint zFrac0Ptr, 848 out uint zFrac1Ptr) 849{ 850 int shiftCount; 851 uint temp_zfrac0, temp_zfrac1; 852 shiftCount = __countLeadingZeros32(mix(aFrac0, aFrac1, aFrac0 == 0u)) - 11; 853 zExpPtr = mix(1 - shiftCount, -shiftCount - 31, aFrac0 == 0u); 854 855 temp_zfrac0 = mix(aFrac1<<shiftCount, aFrac1>>(-shiftCount), shiftCount < 0); 856 temp_zfrac1 = mix(0u, aFrac1<<(shiftCount & 31), shiftCount < 0); 857 858 __shortShift64Left(aFrac0, aFrac1, shiftCount, zFrac0Ptr, zFrac1Ptr); 859 860 zFrac0Ptr = mix(zFrac0Ptr, temp_zfrac0, aFrac0 == 0); 861 zFrac1Ptr = mix(zFrac1Ptr, temp_zfrac1, aFrac0 == 0); 862} 863 864/* Returns the result of multiplying the double-precision floating-point values 865 * `a' and `b'. The operation is performed according to the IEEE Standard for 866 * Floating-Point Arithmetic. 867 */ 868uint64_t 869__fmul64(uint64_t a, uint64_t b) 870{ 871 uint zFrac0 = 0u; 872 uint zFrac1 = 0u; 873 uint zFrac2 = 0u; 874 uint zFrac3 = 0u; 875 int zExp; 876 877 uint aFracLo = __extractFloat64FracLo(a); 878 uint aFracHi = __extractFloat64FracHi(a); 879 uint bFracLo = __extractFloat64FracLo(b); 880 uint bFracHi = __extractFloat64FracHi(b); 881 int aExp = __extractFloat64Exp(a); 882 uint aSign = __extractFloat64Sign(a); 883 int bExp = __extractFloat64Exp(b); 884 uint bSign = __extractFloat64Sign(b); 885 uint zSign = aSign ^ bSign; 886 if (aExp == 0x7FF) { 887 if (((aFracHi | aFracLo) != 0u) || 888 ((bExp == 0x7FF) && ((bFracHi | bFracLo) != 0u))) { 889 return __propagateFloat64NaN(a, b); 890 } 891 if ((uint(bExp) | bFracHi | bFracLo) == 0u) 892 return 0xFFFFFFFFFFFFFFFFUL; 893 return __packFloat64(zSign, 0x7FF, 0u, 0u); 894 } 895 if (bExp == 0x7FF) { 896 /* a cannot be NaN, but is b NaN? */ 897 if ((bFracHi | bFracLo) != 0u) 898#if defined RELAXED_NAN_PROPAGATION 899 return b; 900#else 901 return __propagateFloat64NaN(a, b); 902#endif 903 if ((uint(aExp) | aFracHi | aFracLo) == 0u) 904 return 0xFFFFFFFFFFFFFFFFUL; 905 return __packFloat64(zSign, 0x7FF, 0u, 0u); 906 } 907 if (aExp == 0) { 908 if ((aFracHi | aFracLo) == 0u) 909 return __packFloat64(zSign, 0, 0u, 0u); 910 __normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo); 911 } 912 if (bExp == 0) { 913 if ((bFracHi | bFracLo) == 0u) 914 return __packFloat64(zSign, 0, 0u, 0u); 915 __normalizeFloat64Subnormal(bFracHi, bFracLo, bExp, bFracHi, bFracLo); 916 } 917 zExp = aExp + bExp - 0x400; 918 aFracHi |= 0x00100000u; 919 __shortShift64Left(bFracHi, bFracLo, 12, bFracHi, bFracLo); 920 __mul64To128( 921 aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1, zFrac2, zFrac3); 922 __add64(zFrac0, zFrac1, aFracHi, aFracLo, zFrac0, zFrac1); 923 zFrac2 |= uint(zFrac3 != 0u); 924 if (0x00200000u <= zFrac0) { 925 __shift64ExtraRightJamming( 926 zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2); 927 ++zExp; 928 } 929 return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2); 930} 931 932uint64_t 933__ffma64(uint64_t a, uint64_t b, uint64_t c) 934{ 935 return __fadd64(__fmul64(a, b), c); 936} 937 938/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the 939 * number of bits given in `count'. Any bits shifted off are lost. The value 940 * of `count' can be arbitrarily large; in particular, if `count' is greater 941 * than 64, the result will be 0. The result is broken into two 32-bit pieces 942 * which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 943 */ 944void 945__shift64Right(uint a0, uint a1, 946 int count, 947 out uint z0Ptr, 948 out uint z1Ptr) 949{ 950 uint z0; 951 uint z1; 952 int negCount = (-count) & 31; 953 954 z0 = 0u; 955 z0 = mix(z0, (a0 >> count), count < 32); 956 z0 = mix(z0, a0, count == 0); 957 958 z1 = mix(0u, (a0 >> (count & 31)), count < 64); 959 z1 = mix(z1, (a0<<negCount) | (a1>>count), count < 32); 960 z1 = mix(z1, a0, count == 0); 961 962 z1Ptr = z1; 963 z0Ptr = z0; 964} 965 966/* Returns the result of converting the double-precision floating-point value 967 * `a' to the unsigned integer format. The conversion is performed according 968 * to the IEEE Standard for Floating-Point Arithmetic. 969 */ 970uint 971__fp64_to_uint(uint64_t a) 972{ 973 uint aFracLo = __extractFloat64FracLo(a); 974 uint aFracHi = __extractFloat64FracHi(a); 975 int aExp = __extractFloat64Exp(a); 976 uint aSign = __extractFloat64Sign(a); 977 978 if ((aExp == 0x7FF) && ((aFracHi | aFracLo) != 0u)) 979 return 0xFFFFFFFFu; 980 981 aFracHi |= mix(0u, 0x00100000u, aExp != 0); 982 983 int shiftDist = 0x427 - aExp; 984 if (0 < shiftDist) 985 __shift64RightJamming(aFracHi, aFracLo, shiftDist, aFracHi, aFracLo); 986 987 if ((aFracHi & 0xFFFFF000u) != 0u) 988 return mix(~0u, 0u, aSign != 0u); 989 990 uint z = 0u; 991 uint zero = 0u; 992 __shift64Right(aFracHi, aFracLo, 12, zero, z); 993 994 uint expt = mix(~0u, 0u, aSign != 0u); 995 996 return mix(z, expt, (aSign != 0u) && (z != 0u)); 997} 998 999uint64_t 1000__uint_to_fp64(uint a) 1001{ 1002 if (a == 0u) 1003 return 0ul; 1004 1005 int shiftDist = __countLeadingZeros32(a) + 21; 1006 1007 uint aHigh = 0u; 1008 uint aLow = 0u; 1009 int negCount = (- shiftDist) & 31; 1010 1011 aHigh = mix(0u, a<< shiftDist - 32, shiftDist < 64); 1012 aLow = 0u; 1013 aHigh = mix(aHigh, 0u, shiftDist == 0); 1014 aLow = mix(aLow, a, shiftDist ==0); 1015 aHigh = mix(aHigh, a >> negCount, shiftDist < 32); 1016 aLow = mix(aLow, a << shiftDist, shiftDist < 32); 1017 1018 return __packFloat64(0u, 0x432 - shiftDist, aHigh, aLow); 1019} 1020 1021uint64_t 1022__uint64_to_fp64(uint64_t a) 1023{ 1024 if (a == 0u) 1025 return 0ul; 1026 1027 uvec2 aFrac = unpackUint2x32(a); 1028 uint aFracLo = __extractFloat64FracLo(a); 1029 uint aFracHi = __extractFloat64FracHi(a); 1030 1031 if ((aFracHi & 0x80000000u) != 0u) { 1032 __shift64RightJamming(aFracHi, aFracLo, 1, aFracHi, aFracLo); 1033 return __roundAndPackFloat64(0, 0x433, aFracHi, aFracLo, 0u); 1034 } else { 1035 return __normalizeRoundAndPackFloat64(0, 0x432, aFrac.y, aFrac.x); 1036 } 1037} 1038 1039uint64_t 1040__fp64_to_uint64(uint64_t a) 1041{ 1042 uint aFracLo = __extractFloat64FracLo(a); 1043 uint aFracHi = __extractFloat64FracHi(a); 1044 int aExp = __extractFloat64Exp(a); 1045 uint aSign = __extractFloat64Sign(a); 1046 uint zFrac2 = 0u; 1047 uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL; 1048 1049 aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0); 1050 int shiftCount = 0x433 - aExp; 1051 1052 if ( shiftCount <= 0 ) { 1053 if (shiftCount < -11 && aExp == 0x7FF) { 1054 if ((aFracHi | aFracLo) != 0u) 1055 return __propagateFloat64NaN(a, a); 1056 return mix(default_nan, a, aSign == 0u); 1057 } 1058 __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo); 1059 } else { 1060 __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount, 1061 aFracHi, aFracLo, zFrac2); 1062 } 1063 return __roundAndPackUInt64(aSign, aFracHi, aFracLo, zFrac2); 1064} 1065 1066int64_t 1067__fp64_to_int64(uint64_t a) 1068{ 1069 uint zFrac2 = 0u; 1070 uint aFracLo = __extractFloat64FracLo(a); 1071 uint aFracHi = __extractFloat64FracHi(a); 1072 int aExp = __extractFloat64Exp(a); 1073 uint aSign = __extractFloat64Sign(a); 1074 int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL; 1075 int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL; 1076 1077 aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0); 1078 int shiftCount = 0x433 - aExp; 1079 1080 if (shiftCount <= 0) { 1081 if (shiftCount < -11 && aExp == 0x7FF) { 1082 if ((aFracHi | aFracLo) != 0u) 1083 return default_NegNaN; 1084 return mix(default_NegNaN, default_PosNaN, aSign == 0u); 1085 } 1086 __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo); 1087 } else { 1088 __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount, 1089 aFracHi, aFracLo, zFrac2); 1090 } 1091 1092 return __roundAndPackInt64(aSign, aFracHi, aFracLo, zFrac2); 1093} 1094 1095uint64_t 1096__int64_to_fp64(int64_t a) 1097{ 1098 if (a==0) 1099 return 0ul; 1100 1101 uint64_t absA = mix(uint64_t(a), uint64_t(-a), a < 0); 1102 uint aFracHi = __extractFloat64FracHi(absA); 1103 uvec2 aFrac = unpackUint2x32(absA); 1104 uint zSign = uint(unpackInt2x32(a).y) & 0x80000000u; 1105 1106 if ((aFracHi & 0x80000000u) != 0u) { 1107 return mix(0ul, __packFloat64(0x80000000u, 0x434, 0u, 0u), a < 0); 1108 } 1109 1110 return __normalizeRoundAndPackFloat64(zSign, 0x432, aFrac.y, aFrac.x); 1111} 1112 1113/* Returns the result of converting the double-precision floating-point value 1114 * `a' to the 32-bit two's complement integer format. The conversion is 1115 * performed according to the IEEE Standard for Floating-Point Arithmetic--- 1116 * which means in particular that the conversion is rounded according to the 1117 * current rounding mode. If `a' is a NaN, the largest positive integer is 1118 * returned. Otherwise, if the conversion overflows, the largest integer with 1119 * the same sign as `a' is returned. 1120 */ 1121int 1122__fp64_to_int(uint64_t a) 1123{ 1124 uint aFracLo = __extractFloat64FracLo(a); 1125 uint aFracHi = __extractFloat64FracHi(a); 1126 int aExp = __extractFloat64Exp(a); 1127 uint aSign = __extractFloat64Sign(a); 1128 1129 uint absZ = 0u; 1130 uint aFracExtra = 0u; 1131 int shiftCount = aExp - 0x413; 1132 1133 if (0 <= shiftCount) { 1134 if (0x41E < aExp) { 1135 if ((aExp == 0x7FF) && bool(aFracHi | aFracLo)) 1136 aSign = 0u; 1137 return mix(0x7FFFFFFF, 0x80000000, aSign != 0u); 1138 } 1139 __shortShift64Left(aFracHi | 0x00100000u, aFracLo, shiftCount, absZ, aFracExtra); 1140 } else { 1141 if (aExp < 0x3FF) 1142 return 0; 1143 1144 aFracHi |= 0x00100000u; 1145 aFracExtra = ( aFracHi << (shiftCount & 31)) | aFracLo; 1146 absZ = aFracHi >> (- shiftCount); 1147 } 1148 1149 int z = mix(int(absZ), -int(absZ), aSign != 0u); 1150 int nan = mix(0x7FFFFFFF, 0x80000000, aSign != 0u); 1151 return mix(z, nan, ((aSign != 0u) != (z < 0)) && bool(z)); 1152} 1153 1154/* Returns the result of converting the 32-bit two's complement integer `a' 1155 * to the double-precision floating-point format. The conversion is performed 1156 * according to the IEEE Standard for Floating-Point Arithmetic. 1157 */ 1158uint64_t 1159__int_to_fp64(int a) 1160{ 1161 uint zFrac0 = 0u; 1162 uint zFrac1 = 0u; 1163 if (a==0) 1164 return __packFloat64(0u, 0, 0u, 0u); 1165 uint zSign = uint(a) & 0x80000000u; 1166 uint absA = mix(uint(a), uint(-a), a < 0); 1167 int shiftCount = __countLeadingZeros32(absA) - 11; 1168 if (0 <= shiftCount) { 1169 zFrac0 = absA << shiftCount; 1170 zFrac1 = 0u; 1171 } else { 1172 __shift64Right(absA, 0u, -shiftCount, zFrac0, zFrac1); 1173 } 1174 return __packFloat64(zSign, 0x412 - shiftCount, zFrac0, zFrac1); 1175} 1176 1177bool 1178__fp64_to_bool(uint64_t a) 1179{ 1180 return !__feq64_nonnan(__fabs64(a), 0ul); 1181} 1182 1183uint64_t 1184__bool_to_fp64(bool a) 1185{ 1186 return packUint2x32(uvec2(0x00000000u, uint(-int(a) & 0x3ff00000))); 1187} 1188 1189/* Packs the sign `zSign', exponent `zExp', and significand `zFrac' into a 1190 * single-precision floating-point value, returning the result. After being 1191 * shifted into the proper positions, the three fields are simply added 1192 * together to form the result. This means that any integer portion of `zSig' 1193 * will be added into the exponent. Since a properly normalized significand 1194 * will have an integer portion equal to 1, the `zExp' input should be 1 less 1195 * than the desired result exponent whenever `zFrac' is a complete, normalized 1196 * significand. 1197 */ 1198float 1199__packFloat32(uint zSign, int zExp, uint zFrac) 1200{ 1201 return uintBitsToFloat(zSign + (uint(zExp)<<23) + zFrac); 1202} 1203 1204/* Takes an abstract floating-point value having sign `zSign', exponent `zExp', 1205 * and significand `zFrac', and returns the proper single-precision floating- 1206 * point value corresponding to the abstract input. Ordinarily, the abstract 1207 * value is simply rounded and packed into the single-precision format, with 1208 * the inexact exception raised if the abstract input cannot be represented 1209 * exactly. However, if the abstract value is too large, the overflow and 1210 * inexact exceptions are raised and an infinity or maximal finite value is 1211 * returned. If the abstract value is too small, the input value is rounded to 1212 * a subnormal number, and the underflow and inexact exceptions are raised if 1213 * the abstract input cannot be represented exactly as a subnormal single- 1214 * precision floating-point number. 1215 * The input significand `zFrac' has its binary point between bits 30 1216 * and 29, which is 7 bits to the left of the usual location. This shifted 1217 * significand must be normalized or smaller. If `zFrac' is not normalized, 1218 * `zExp' must be 0; in that case, the result returned is a subnormal number, 1219 * and it must not require rounding. In the usual case that `zFrac' is 1220 * normalized, `zExp' must be 1 less than the "true" floating-point exponent. 1221 * The handling of underflow and overflow follows the IEEE Standard for 1222 * Floating-Point Arithmetic. 1223 */ 1224float 1225__roundAndPackFloat32(uint zSign, int zExp, uint zFrac) 1226{ 1227 bool roundNearestEven; 1228 int roundIncrement; 1229 int roundBits; 1230 1231 roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN; 1232 roundIncrement = 0x40; 1233 if (!roundNearestEven) { 1234 if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) { 1235 roundIncrement = 0; 1236 } else { 1237 roundIncrement = 0x7F; 1238 if (zSign != 0u) { 1239 if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) 1240 roundIncrement = 0; 1241 } else { 1242 if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) 1243 roundIncrement = 0; 1244 } 1245 } 1246 } 1247 roundBits = int(zFrac & 0x7Fu); 1248 if (0xFDu <= uint(zExp)) { 1249 if ((0xFD < zExp) || ((zExp == 0xFD) && (int(zFrac) + roundIncrement) < 0)) 1250 return __packFloat32(zSign, 0xFF, 0u) - float(roundIncrement == 0); 1251 int count = -zExp; 1252 bool zexp_lt0 = zExp < 0; 1253 uint zFrac_lt0 = mix(uint(zFrac != 0u), (zFrac>>count) | uint((zFrac<<((-count) & 31)) != 0u), (-zExp) < 32); 1254 zFrac = mix(zFrac, zFrac_lt0, zexp_lt0); 1255 roundBits = mix(roundBits, int(zFrac) & 0x7f, zexp_lt0); 1256 zExp = mix(zExp, 0, zexp_lt0); 1257 } 1258 zFrac = (zFrac + uint(roundIncrement))>>7; 1259 zFrac &= ~uint(((roundBits ^ 0x40) == 0) && roundNearestEven); 1260 1261 return __packFloat32(zSign, mix(zExp, 0, zFrac == 0u), zFrac); 1262} 1263 1264/* Returns the result of converting the double-precision floating-point value 1265 * `a' to the single-precision floating-point format. The conversion is 1266 * performed according to the IEEE Standard for Floating-Point Arithmetic. 1267 */ 1268float 1269__fp64_to_fp32(uint64_t __a) 1270{ 1271 uvec2 a = unpackUint2x32(__a); 1272 uint zFrac = 0u; 1273 uint allZero = 0u; 1274 1275 uint aFracLo = __extractFloat64FracLo(__a); 1276 uint aFracHi = __extractFloat64FracHi(__a); 1277 int aExp = __extractFloat64Exp(__a); 1278 uint aSign = __extractFloat64Sign(__a); 1279 if (aExp == 0x7FF) { 1280 __shortShift64Left(a.y, a.x, 12, a.y, a.x); 1281 float rval = uintBitsToFloat(aSign | 0x7FC00000u | (a.y>>9)); 1282 rval = mix(__packFloat32(aSign, 0xFF, 0u), rval, (aFracHi | aFracLo) != 0u); 1283 return rval; 1284 } 1285 __shift64RightJamming(aFracHi, aFracLo, 22, allZero, zFrac); 1286 zFrac = mix(zFrac, zFrac | 0x40000000u, aExp != 0); 1287 return __roundAndPackFloat32(aSign, aExp - 0x381, zFrac); 1288} 1289 1290/* Returns the result of converting the single-precision floating-point value 1291 * `a' to the double-precision floating-point format. 1292 */ 1293uint64_t 1294__fp32_to_fp64(float f) 1295{ 1296 uint a = floatBitsToUint(f); 1297 uint aFrac = a & 0x007FFFFFu; 1298 int aExp = int((a>>23) & 0xFFu); 1299 uint aSign = a & 0x80000000u; 1300 uint zFrac0 = 0u; 1301 uint zFrac1 = 0u; 1302 1303 if (aExp == 0xFF) { 1304 if (aFrac != 0u) { 1305 uint nanLo = 0u; 1306 uint nanHi = a<<9; 1307 __shift64Right(nanHi, nanLo, 12, nanHi, nanLo); 1308 nanHi |= aSign | 0x7FF80000u; 1309 return packUint2x32(uvec2(nanLo, nanHi)); 1310 } 1311 return __packFloat64(aSign, 0x7FF, 0u, 0u); 1312 } 1313 1314 if (aExp == 0) { 1315 if (aFrac == 0u) 1316 return __packFloat64(aSign, 0, 0u, 0u); 1317 /* Normalize subnormal */ 1318 int shiftCount = __countLeadingZeros32(aFrac) - 8; 1319 aFrac <<= shiftCount; 1320 aExp = 1 - shiftCount; 1321 --aExp; 1322 } 1323 1324 __shift64Right(aFrac, 0u, 3, zFrac0, zFrac1); 1325 return __packFloat64(aSign, aExp + 0x380, zFrac0, zFrac1); 1326} 1327 1328/* Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the 1329 * 96-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 1330 * modulo 2^96, so any carry out is lost. The result is broken into three 1331 * 32-bit pieces which are stored at the locations pointed to by `z0Ptr', 1332 * `z1Ptr', and `z2Ptr'. 1333 */ 1334void 1335__add96(uint a0, uint a1, uint a2, 1336 uint b0, uint b1, uint b2, 1337 out uint z0Ptr, 1338 out uint z1Ptr, 1339 out uint z2Ptr) 1340{ 1341 uint z2 = a2 + b2; 1342 uint carry1 = uint(z2 < a2); 1343 uint z1 = a1 + b1; 1344 uint carry0 = uint(z1 < a1); 1345 uint z0 = a0 + b0; 1346 z1 += carry1; 1347 z0 += uint(z1 < carry1); 1348 z0 += carry0; 1349 z2Ptr = z2; 1350 z1Ptr = z1; 1351 z0Ptr = z0; 1352} 1353 1354/* Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from 1355 * the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction 1356 * is modulo 2^96, so any borrow out (carry out) is lost. The result is broken 1357 * into three 32-bit pieces which are stored at the locations pointed to by 1358 * `z0Ptr', `z1Ptr', and `z2Ptr'. 1359 */ 1360void 1361__sub96(uint a0, uint a1, uint a2, 1362 uint b0, uint b1, uint b2, 1363 out uint z0Ptr, 1364 out uint z1Ptr, 1365 out uint z2Ptr) 1366{ 1367 uint z2 = a2 - b2; 1368 uint borrow1 = uint(a2 < b2); 1369 uint z1 = a1 - b1; 1370 uint borrow0 = uint(a1 < b1); 1371 uint z0 = a0 - b0; 1372 z0 -= uint(z1 < borrow1); 1373 z1 -= borrow1; 1374 z0 -= borrow0; 1375 z2Ptr = z2; 1376 z1Ptr = z1; 1377 z0Ptr = z0; 1378} 1379 1380/* Returns an approximation to the 32-bit integer quotient obtained by dividing 1381 * `b' into the 64-bit value formed by concatenating `a0' and `a1'. The 1382 * divisor `b' must be at least 2^31. If q is the exact quotient truncated 1383 * toward zero, the approximation returned lies between q and q + 2 inclusive. 1384 * If the exact quotient q is larger than 32 bits, the maximum positive 32-bit 1385 * unsigned integer is returned. 1386 */ 1387uint 1388__estimateDiv64To32(uint a0, uint a1, uint b) 1389{ 1390 uint b0; 1391 uint b1; 1392 uint rem0 = 0u; 1393 uint rem1 = 0u; 1394 uint term0 = 0u; 1395 uint term1 = 0u; 1396 uint z; 1397 1398 if (b <= a0) 1399 return 0xFFFFFFFFu; 1400 b0 = b>>16; 1401 z = (b0<<16 <= a0) ? 0xFFFF0000u : (a0 / b0)<<16; 1402 umulExtended(b, z, term0, term1); 1403 __sub64(a0, a1, term0, term1, rem0, rem1); 1404 while (int(rem0) < 0) { 1405 z -= 0x10000u; 1406 b1 = b<<16; 1407 __add64(rem0, rem1, b0, b1, rem0, rem1); 1408 } 1409 rem0 = (rem0<<16) | (rem1>>16); 1410 z |= (b0<<16 <= rem0) ? 0xFFFFu : rem0 / b0; 1411 return z; 1412} 1413 1414uint 1415__sqrtOddAdjustments(int index) 1416{ 1417 uint res = 0u; 1418 if (index == 0) 1419 res = 0x0004u; 1420 if (index == 1) 1421 res = 0x0022u; 1422 if (index == 2) 1423 res = 0x005Du; 1424 if (index == 3) 1425 res = 0x00B1u; 1426 if (index == 4) 1427 res = 0x011Du; 1428 if (index == 5) 1429 res = 0x019Fu; 1430 if (index == 6) 1431 res = 0x0236u; 1432 if (index == 7) 1433 res = 0x02E0u; 1434 if (index == 8) 1435 res = 0x039Cu; 1436 if (index == 9) 1437 res = 0x0468u; 1438 if (index == 10) 1439 res = 0x0545u; 1440 if (index == 11) 1441 res = 0x631u; 1442 if (index == 12) 1443 res = 0x072Bu; 1444 if (index == 13) 1445 res = 0x0832u; 1446 if (index == 14) 1447 res = 0x0946u; 1448 if (index == 15) 1449 res = 0x0A67u; 1450 1451 return res; 1452} 1453 1454uint 1455__sqrtEvenAdjustments(int index) 1456{ 1457 uint res = 0u; 1458 if (index == 0) 1459 res = 0x0A2Du; 1460 if (index == 1) 1461 res = 0x08AFu; 1462 if (index == 2) 1463 res = 0x075Au; 1464 if (index == 3) 1465 res = 0x0629u; 1466 if (index == 4) 1467 res = 0x051Au; 1468 if (index == 5) 1469 res = 0x0429u; 1470 if (index == 6) 1471 res = 0x0356u; 1472 if (index == 7) 1473 res = 0x029Eu; 1474 if (index == 8) 1475 res = 0x0200u; 1476 if (index == 9) 1477 res = 0x0179u; 1478 if (index == 10) 1479 res = 0x0109u; 1480 if (index == 11) 1481 res = 0x00AFu; 1482 if (index == 12) 1483 res = 0x0068u; 1484 if (index == 13) 1485 res = 0x0034u; 1486 if (index == 14) 1487 res = 0x0012u; 1488 if (index == 15) 1489 res = 0x0002u; 1490 1491 return res; 1492} 1493 1494/* Returns an approximation to the square root of the 32-bit significand given 1495 * by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 1496 * `aExp' (the least significant bit) is 1, the integer returned approximates 1497 * 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 1498 * is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 1499 * case, the approximation returned lies strictly within +/-2 of the exact 1500 * value. 1501 */ 1502uint 1503__estimateSqrt32(int aExp, uint a) 1504{ 1505 uint z; 1506 1507 int index = int(a>>27 & 15u); 1508 if ((aExp & 1) != 0) { 1509 z = 0x4000u + (a>>17) - __sqrtOddAdjustments(index); 1510 z = ((a / z)<<14) + (z<<15); 1511 a >>= 1; 1512 } else { 1513 z = 0x8000u + (a>>17) - __sqrtEvenAdjustments(index); 1514 z = a / z + z; 1515 z = (0x20000u <= z) ? 0xFFFF8000u : (z<<15); 1516 if (z <= a) 1517 return uint(int(a)>>1); 1518 } 1519 return ((__estimateDiv64To32(a, 0u, z))>>1) + (z>>1); 1520} 1521 1522/* Returns the square root of the double-precision floating-point value `a'. 1523 * The operation is performed according to the IEEE Standard for Floating-Point 1524 * Arithmetic. 1525 */ 1526uint64_t 1527__fsqrt64(uint64_t a) 1528{ 1529 uint zFrac0 = 0u; 1530 uint zFrac1 = 0u; 1531 uint zFrac2 = 0u; 1532 uint doubleZFrac0 = 0u; 1533 uint rem0 = 0u; 1534 uint rem1 = 0u; 1535 uint rem2 = 0u; 1536 uint rem3 = 0u; 1537 uint term0 = 0u; 1538 uint term1 = 0u; 1539 uint term2 = 0u; 1540 uint term3 = 0u; 1541 uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL; 1542 1543 uint aFracLo = __extractFloat64FracLo(a); 1544 uint aFracHi = __extractFloat64FracHi(a); 1545 int aExp = __extractFloat64Exp(a); 1546 uint aSign = __extractFloat64Sign(a); 1547 if (aExp == 0x7FF) { 1548 if ((aFracHi | aFracLo) != 0u) 1549 return __propagateFloat64NaN(a, a); 1550 if (aSign == 0u) 1551 return a; 1552 return default_nan; 1553 } 1554 if (aSign != 0u) { 1555 if ((uint(aExp) | aFracHi | aFracLo) == 0u) 1556 return a; 1557 return default_nan; 1558 } 1559 if (aExp == 0) { 1560 if ((aFracHi | aFracLo) == 0u) 1561 return __packFloat64(0u, 0, 0u, 0u); 1562 __normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo); 1563 } 1564 int zExp = ((aExp - 0x3FF)>>1) + 0x3FE; 1565 aFracHi |= 0x00100000u; 1566 __shortShift64Left(aFracHi, aFracLo, 11, term0, term1); 1567 zFrac0 = (__estimateSqrt32(aExp, term0)>>1) + 1u; 1568 if (zFrac0 == 0u) 1569 zFrac0 = 0x7FFFFFFFu; 1570 doubleZFrac0 = zFrac0 + zFrac0; 1571 __shortShift64Left(aFracHi, aFracLo, 9 - (aExp & 1), aFracHi, aFracLo); 1572 umulExtended(zFrac0, zFrac0, term0, term1); 1573 __sub64(aFracHi, aFracLo, term0, term1, rem0, rem1); 1574 while (int(rem0) < 0) { 1575 --zFrac0; 1576 doubleZFrac0 -= 2u; 1577 __add64(rem0, rem1, 0u, doubleZFrac0 | 1u, rem0, rem1); 1578 } 1579 zFrac1 = __estimateDiv64To32(rem1, 0u, doubleZFrac0); 1580 if ((zFrac1 & 0x1FFu) <= 5u) { 1581 if (zFrac1 == 0u) 1582 zFrac1 = 1u; 1583 umulExtended(doubleZFrac0, zFrac1, term1, term2); 1584 __sub64(rem1, 0u, term1, term2, rem1, rem2); 1585 umulExtended(zFrac1, zFrac1, term2, term3); 1586 __sub96(rem1, rem2, 0u, 0u, term2, term3, rem1, rem2, rem3); 1587 while (int(rem1) < 0) { 1588 --zFrac1; 1589 __shortShift64Left(0u, zFrac1, 1, term2, term3); 1590 term3 |= 1u; 1591 term2 |= doubleZFrac0; 1592 __add96(rem1, rem2, rem3, 0u, term2, term3, rem1, rem2, rem3); 1593 } 1594 zFrac1 |= uint((rem1 | rem2 | rem3) != 0u); 1595 } 1596 __shift64ExtraRightJamming(zFrac0, zFrac1, 0u, 10, zFrac0, zFrac1, zFrac2); 1597 return __roundAndPackFloat64(0u, zExp, zFrac0, zFrac1, zFrac2); 1598} 1599 1600uint64_t 1601__ftrunc64(uint64_t __a) 1602{ 1603 uvec2 a = unpackUint2x32(__a); 1604 int aExp = __extractFloat64Exp(__a); 1605 uint zLo; 1606 uint zHi; 1607 1608 int unbiasedExp = aExp - 1023; 1609 int fracBits = 52 - unbiasedExp; 1610 uint maskLo = mix(~0u << fracBits, 0u, fracBits >= 32); 1611 uint maskHi = mix(~0u << (fracBits - 32), ~0u, fracBits < 33); 1612 zLo = maskLo & a.x; 1613 zHi = maskHi & a.y; 1614 1615 zLo = mix(zLo, 0u, unbiasedExp < 0); 1616 zHi = mix(zHi, 0u, unbiasedExp < 0); 1617 zLo = mix(zLo, a.x, unbiasedExp > 52); 1618 zHi = mix(zHi, a.y, unbiasedExp > 52); 1619 return packUint2x32(uvec2(zLo, zHi)); 1620} 1621 1622uint64_t 1623__ffloor64(uint64_t a) 1624{ 1625 /* The big assumtion is that when 'a' is NaN, __ftrunc(a) returns a. Based 1626 * on that assumption, NaN values that don't have the sign bit will safely 1627 * return NaN (identity). This is guarded by RELAXED_NAN_PROPAGATION 1628 * because otherwise the NaN should have the "signal" bit set. The 1629 * __fadd64 will ensure that occurs. 1630 */ 1631 bool is_positive = 1632#if defined RELAXED_NAN_PROPAGATION 1633 int(unpackUint2x32(a).y) >= 0 1634#else 1635 __fge64(a, 0ul) 1636#endif 1637 ; 1638 uint64_t tr = __ftrunc64(a); 1639 1640 if (is_positive || __feq64(tr, a)) { 1641 return tr; 1642 } else { 1643 return __fadd64(tr, 0xbff0000000000000ul /* -1.0 */); 1644 } 1645} 1646 1647uint64_t 1648__fround64(uint64_t __a) 1649{ 1650 uvec2 a = unpackUint2x32(__a); 1651 int unbiasedExp = __extractFloat64Exp(__a) - 1023; 1652 uint aHi = a.y; 1653 uint aLo = a.x; 1654 1655 if (unbiasedExp < 20) { 1656 if (unbiasedExp < 0) { 1657 if ((aHi & 0x80000000u) != 0u && aLo == 0u) { 1658 return 0ul; 1659 } 1660 aHi &= 0x80000000u; 1661 if ((a.y & 0x000FFFFFu) == 0u && a.x == 0u) { 1662 aLo = 0u; 1663 return packUint2x32(uvec2(aLo, aHi)); 1664 } 1665 aHi = mix(aHi, (aHi | 0x3FF00000u), unbiasedExp == -1); 1666 aLo = 0u; 1667 } else { 1668 uint maskExp = 0x000FFFFFu >> unbiasedExp; 1669 uint lastBit = maskExp + 1; 1670 aHi += 0x00080000u >> unbiasedExp; 1671 if ((aHi & maskExp) == 0u) 1672 aHi &= ~lastBit; 1673 aHi &= ~maskExp; 1674 aLo = 0u; 1675 } 1676 } else if (unbiasedExp > 51 || unbiasedExp == 1024) { 1677 return __a; 1678 } else { 1679 uint maskExp = 0xFFFFFFFFu >> (unbiasedExp - 20); 1680 if ((aLo & maskExp) == 0u) 1681 return __a; 1682 uint tmp = aLo + (1u << (51 - unbiasedExp)); 1683 if(tmp < aLo) 1684 aHi += 1u; 1685 aLo = tmp; 1686 aLo &= ~maskExp; 1687 } 1688 1689 return packUint2x32(uvec2(aLo, aHi)); 1690} 1691 1692uint64_t 1693__fmin64(uint64_t a, uint64_t b) 1694{ 1695 /* This weird layout matters. Doing the "obvious" thing results in extra 1696 * flow control being inserted to implement the short-circuit evaluation 1697 * rules. Flow control is bad! 1698 */ 1699 bool b_nan = __is_nan(b); 1700 bool a_lt_b = __flt64_nonnan(a, b); 1701 bool a_nan = __is_nan(a); 1702 1703 return (b_nan || a_lt_b) && !a_nan ? a : b; 1704} 1705 1706uint64_t 1707__fmax64(uint64_t a, uint64_t b) 1708{ 1709 /* This weird layout matters. Doing the "obvious" thing results in extra 1710 * flow control being inserted to implement the short-circuit evaluation 1711 * rules. Flow control is bad! 1712 */ 1713 bool b_nan = __is_nan(b); 1714 bool a_lt_b = __flt64_nonnan(a, b); 1715 bool a_nan = __is_nan(a); 1716 1717 return (b_nan || a_lt_b) && !a_nan ? b : a; 1718} 1719 1720uint64_t 1721__ffract64(uint64_t a) 1722{ 1723 return __fadd64(a, __fneg64(__ffloor64(a))); 1724} 1725