1 // Copyright 2015, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #ifndef VIXL_UTILS_H 28 #define VIXL_UTILS_H 29 30 #include <cmath> 31 #include <cstring> 32 #include <limits> 33 #include <optional> 34 #include <type_traits> 35 #include <vector> 36 37 #include "compiler-intrinsics-vixl.h" 38 #include "globals-vixl.h" 39 40 #if defined(VIXL_USE_PANDA_ALLOC) && !defined(PANDA_BUILD) 41 #error "PANDA_BUILD should be defined for VIXL_USE_PANDA_ALLOC" 42 #endif 43 44 #ifdef VIXL_USE_PANDA_ALLOC 45 #include "mem/arena_allocator_stl_adapter.h" 46 #include "mem/arena_allocator.h" 47 #include "utils/arena_containers.h" 48 #else 49 #include <list> 50 #include <map> 51 #include <memory> 52 #include <string> 53 #include <unordered_map> 54 #include <vector> 55 #endif 56 57 #if defined(PANDA_BUILD) && !defined(VIXL_USE_PANDA_ALLOC) 58 namespace ark { 59 template <bool> class ArenaAllocatorT; 60 using ArenaAllocator = ArenaAllocatorT<false>; 61 } 62 #endif 63 64 namespace vixl { 65 #ifdef VIXL_USE_PANDA_ALLOC 66 template <typename T> 67 using List = ark::ArenaList<T>; 68 69 template <typename K, typename V> 70 using Map = ark::ArenaMap<K, V>; 71 72 template <typename K, typename V> 73 using UnorderedMap = ark::ArenaUnorderedMap<K, V>; 74 75 using String = ark::ArenaString; 76 77 template <typename T> 78 using Vector = ark::ArenaVector<T>; 79 #else 80 template <typename T> 81 using List = std::list<T>; 82 83 template <typename K, typename V> 84 using Map = std::map<K, V>; 85 86 template <typename K, typename V> 87 using UnorderedMap = std::unordered_map<K, V>; 88 89 using String = std::string; 90 91 template <typename T> 92 using Vector = std::vector<T>; 93 #endif 94 95 #ifdef PANDA_BUILD 96 using PandaAllocator = ark::ArenaAllocator; 97 #endif // PANDA_BUILD 98 99 template <typename T> 100 struct is_unbounded_array : public std::false_type {}; 101 102 template <typename T> 103 struct is_unbounded_array<T[]> : public std::true_type {}; 104 105 template <typename T> 106 constexpr bool is_unbounded_array_v = is_unbounded_array<T>::value; 107 108 class AllocatorWrapper { 109 public: 110 #ifndef PANDA_BUILD 111 AllocatorWrapper() = default; 112 #else // PANDA_BUILD 113 AllocatorWrapper([[maybe_unused]] PandaAllocator* allocator) 114 #ifdef VIXL_USE_PANDA_ALLOC 115 : allocator_(allocator) 116 #endif 117 {} 118 #endif // PANDA_BUILD 119 120 auto Adapter() { 121 #ifdef VIXL_USE_PANDA_ALLOC 122 return allocator_->Adapter(); 123 #else 124 return std::allocator<void>(); 125 #endif 126 } 127 128 template <typename T, typename... Args> 129 [[nodiscard]] std::enable_if_t<!std::is_array_v<T>, T*> New(Args&&... args) { 130 #ifdef VIXL_USE_PANDA_ALLOC 131 return allocator_->template New<T>(std::forward<Args>(args)...); 132 #else 133 return new T(std::forward<Args>(args)...); 134 #endif 135 } 136 137 template <typename T> 138 [[nodiscard]] std::enable_if_t<is_unbounded_array_v<T>, std::remove_extent_t<T>*> New(size_t size) { 139 #ifdef VIXL_USE_PANDA_ALLOC 140 return allocator_->template New<T>(size); 141 #else 142 return new std::remove_extent_t<T>[size]; 143 #endif 144 } 145 146 [[nodiscard]] void* Alloc(size_t size) { 147 #ifdef VIXL_USE_PANDA_ALLOC 148 return allocator_->Alloc(size); 149 #else 150 return malloc(size); 151 #endif 152 } 153 154 template <typename T> 155 void DeleteObject([[maybe_unused]] T* obj) { 156 #ifndef VIXL_USE_PANDA_ALLOC 157 delete obj; 158 #endif 159 } 160 161 template <typename T> 162 void DeleteArray([[maybe_unused]] T* arr) { 163 #ifndef VIXL_USE_PANDA_ALLOC 164 delete[] arr; 165 #endif 166 } 167 168 void Free([[maybe_unused]] void* ptr) { 169 #ifndef VIXL_USE_PANDA_ALLOC 170 free(ptr); 171 #endif 172 } 173 174 private: 175 #ifdef VIXL_USE_PANDA_ALLOC 176 PandaAllocator* allocator_; 177 #endif 178 }; 179 180 // Macros for compile-time format checking. 181 #if GCC_VERSION_OR_NEWER(4, 4, 0) 182 #define PRINTF_CHECK(format_index, varargs_index) \ 183 __attribute__((format(gnu_printf, format_index, varargs_index))) 184 #else 185 #define PRINTF_CHECK(format_index, varargs_index) 186 #endif 187 188 #ifdef __GNUC__ 189 #define VIXL_HAS_DEPRECATED_WITH_MSG 190 #elif defined(__clang__) 191 #ifdef __has_extension(attribute_deprecated_with_message) 192 #define VIXL_HAS_DEPRECATED_WITH_MSG 193 #endif 194 #endif 195 196 #ifdef VIXL_HAS_DEPRECATED_WITH_MSG 197 #define VIXL_DEPRECATED(replaced_by, declarator) \ 198 __attribute__((deprecated("Use \"" replaced_by "\" instead"))) declarator 199 #else 200 #define VIXL_DEPRECATED(replaced_by, declarator) declarator 201 #endif 202 203 #ifdef VIXL_DEBUG 204 #define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_UNREACHABLE() 205 #else 206 #define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_FALLTHROUGH() 207 #endif 208 209 template <typename T, size_t n> 210 constexpr size_t ArrayLength(const T (&)[n]) { 211 return n; 212 } 213 214 inline uint64_t GetUintMask(unsigned bits) { 215 VIXL_ASSERT(bits <= 64); 216 uint64_t base = (bits >= 64) ? 0 : (UINT64_C(1) << bits); 217 return base - 1; 218 } 219 220 inline uint64_t GetSignMask(unsigned bits) { 221 VIXL_ASSERT(bits <= 64); 222 return UINT64_C(1) << (bits - 1); 223 } 224 225 // Check number width. 226 // TODO: Refactor these using templates. 227 inline bool IsIntN(unsigned n, uint32_t x) { 228 VIXL_ASSERT((0 < n) && (n <= 32)); 229 return x <= static_cast<uint32_t>(INT32_MAX >> (32 - n)); 230 } 231 inline bool IsIntN(unsigned n, int32_t x) { 232 VIXL_ASSERT((0 < n) && (n <= 32)); 233 if (n == 32) return true; 234 int32_t limit = INT32_C(1) << (n - 1); 235 return (-limit <= x) && (x < limit); 236 } 237 inline bool IsIntN(unsigned n, uint64_t x) { 238 VIXL_ASSERT((0 < n) && (n <= 64)); 239 return x <= static_cast<uint64_t>(INT64_MAX >> (64 - n)); 240 } 241 inline bool IsIntN(unsigned n, int64_t x) { 242 VIXL_ASSERT((0 < n) && (n <= 64)); 243 if (n == 64) return true; 244 int64_t limit = INT64_C(1) << (n - 1); 245 return (-limit <= x) && (x < limit); 246 } 247 VIXL_DEPRECATED("IsIntN", inline bool is_intn(unsigned n, int64_t x)) { 248 return IsIntN(n, x); 249 } 250 251 inline bool IsUintN(unsigned n, uint32_t x) { 252 VIXL_ASSERT((0 < n) && (n <= 32)); 253 if (n >= 32) return true; 254 return !(x >> n); 255 } 256 inline bool IsUintN(unsigned n, int32_t x) { 257 VIXL_ASSERT((0 < n) && (n < 32)); 258 // Convert to an unsigned integer to avoid implementation-defined behavior. 259 return !(static_cast<uint32_t>(x) >> n); 260 } 261 inline bool IsUintN(unsigned n, uint64_t x) { 262 VIXL_ASSERT((0 < n) && (n <= 64)); 263 if (n >= 64) return true; 264 return !(x >> n); 265 } 266 inline bool IsUintN(unsigned n, int64_t x) { 267 VIXL_ASSERT((0 < n) && (n < 64)); 268 // Convert to an unsigned integer to avoid implementation-defined behavior. 269 return !(static_cast<uint64_t>(x) >> n); 270 } 271 VIXL_DEPRECATED("IsUintN", inline bool is_uintn(unsigned n, int64_t x)) { 272 return IsUintN(n, x); 273 } 274 275 inline uint64_t TruncateToUintN(unsigned n, uint64_t x) { 276 VIXL_ASSERT((0 < n) && (n < 64)); 277 return static_cast<uint64_t>(x) & ((UINT64_C(1) << n) - 1); 278 } 279 VIXL_DEPRECATED("TruncateToUintN", 280 inline uint64_t truncate_to_intn(unsigned n, int64_t x)) { 281 return TruncateToUintN(n, x); 282 } 283 284 // clang-format off 285 #define INT_1_TO_32_LIST(V) \ 286 V(1) V(2) V(3) V(4) V(5) V(6) V(7) V(8) \ 287 V(9) V(10) V(11) V(12) V(13) V(14) V(15) V(16) \ 288 V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24) \ 289 V(25) V(26) V(27) V(28) V(29) V(30) V(31) V(32) 290 291 #define INT_33_TO_63_LIST(V) \ 292 V(33) V(34) V(35) V(36) V(37) V(38) V(39) V(40) \ 293 V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48) \ 294 V(49) V(50) V(51) V(52) V(53) V(54) V(55) V(56) \ 295 V(57) V(58) V(59) V(60) V(61) V(62) V(63) 296 297 #define INT_1_TO_63_LIST(V) INT_1_TO_32_LIST(V) INT_33_TO_63_LIST(V) 298 299 // clang-format on 300 301 #define DECLARE_IS_INT_N(N) \ 302 inline bool IsInt##N(int64_t x) { return IsIntN(N, x); } \ 303 VIXL_DEPRECATED("IsInt" #N, inline bool is_int##N(int64_t x)) { \ 304 return IsIntN(N, x); \ 305 } 306 307 #define DECLARE_IS_UINT_N(N) \ 308 inline bool IsUint##N(int64_t x) { return IsUintN(N, x); } \ 309 VIXL_DEPRECATED("IsUint" #N, inline bool is_uint##N(int64_t x)) { \ 310 return IsUintN(N, x); \ 311 } 312 313 #define DECLARE_TRUNCATE_TO_UINT_32(N) \ 314 inline uint32_t TruncateToUint##N(uint64_t x) { \ 315 return static_cast<uint32_t>(TruncateToUintN(N, x)); \ 316 } \ 317 VIXL_DEPRECATED("TruncateToUint" #N, \ 318 inline uint32_t truncate_to_int##N(int64_t x)) { \ 319 return TruncateToUint##N(x); \ 320 } 321 322 INT_1_TO_63_LIST(DECLARE_IS_INT_N) 323 INT_1_TO_63_LIST(DECLARE_IS_UINT_N) 324 INT_1_TO_32_LIST(DECLARE_TRUNCATE_TO_UINT_32) 325 326 #undef DECLARE_IS_INT_N 327 #undef DECLARE_IS_UINT_N 328 #undef DECLARE_TRUNCATE_TO_INT_N 329 330 // Bit field extraction. 331 inline uint64_t ExtractUnsignedBitfield64(int msb, int lsb, uint64_t x) { 332 VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && 333 (msb >= lsb)); 334 if ((msb == 63) && (lsb == 0)) return x; 335 return (x >> lsb) & ((static_cast<uint64_t>(1) << (1 + msb - lsb)) - 1); 336 } 337 338 339 inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint64_t x) { 340 VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && 341 (msb >= lsb)); 342 return TruncateToUint32(ExtractUnsignedBitfield64(msb, lsb, x)); 343 } 344 345 346 inline int64_t ExtractSignedBitfield64(int msb, int lsb, uint64_t x) { 347 VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && 348 (msb >= lsb)); 349 uint64_t temp = ExtractUnsignedBitfield64(msb, lsb, x); 350 // If the highest extracted bit is set, sign extend. 351 if ((temp >> (msb - lsb)) == 1) { 352 temp |= ~UINT64_C(0) << (msb - lsb); 353 } 354 int64_t result; 355 memcpy(&result, &temp, sizeof(result)); 356 return result; 357 } 358 359 inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint64_t x) { 360 VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && 361 (msb >= lsb)); 362 uint32_t temp = TruncateToUint32(ExtractSignedBitfield64(msb, lsb, x)); 363 int32_t result; 364 memcpy(&result, &temp, sizeof(result)); 365 return result; 366 } 367 368 inline uint64_t RotateRight(uint64_t value, 369 unsigned int rotate, 370 unsigned int width) { 371 VIXL_ASSERT((width > 0) && (width <= 64)); 372 uint64_t width_mask = ~UINT64_C(0) >> (64 - width); 373 rotate &= 63; 374 if (rotate > 0) { 375 value &= width_mask; 376 value = (value << (width - rotate)) | (value >> rotate); 377 } 378 return value & width_mask; 379 } 380 381 382 // Wrapper class for passing FP16 values through the assembler. 383 // This is purely to aid with type checking/casting. 384 class Float16 { 385 public: 386 explicit Float16(double dvalue); 387 Float16() : rawbits_(0x0) {} 388 friend uint16_t Float16ToRawbits(Float16 value); 389 friend Float16 RawbitsToFloat16(uint16_t bits); 390 391 protected: 392 uint16_t rawbits_; 393 }; 394 395 // Floating point representation. 396 uint16_t Float16ToRawbits(Float16 value); 397 398 399 uint32_t FloatToRawbits(float value); 400 VIXL_DEPRECATED("FloatToRawbits", 401 inline uint32_t float_to_rawbits(float value)) { 402 return FloatToRawbits(value); 403 } 404 405 uint64_t DoubleToRawbits(double value); 406 VIXL_DEPRECATED("DoubleToRawbits", 407 inline uint64_t double_to_rawbits(double value)) { 408 return DoubleToRawbits(value); 409 } 410 411 Float16 RawbitsToFloat16(uint16_t bits); 412 413 float RawbitsToFloat(uint32_t bits); 414 VIXL_DEPRECATED("RawbitsToFloat", 415 inline float rawbits_to_float(uint32_t bits)) { 416 return RawbitsToFloat(bits); 417 } 418 419 double RawbitsToDouble(uint64_t bits); 420 VIXL_DEPRECATED("RawbitsToDouble", 421 inline double rawbits_to_double(uint64_t bits)) { 422 return RawbitsToDouble(bits); 423 } 424 425 // Convert unsigned to signed numbers in a well-defined way (using two's 426 // complement representations). 427 inline int64_t RawbitsToInt64(uint64_t bits) { 428 return (bits >= UINT64_C(0x8000000000000000)) 429 ? (-static_cast<int64_t>(-bits - 1) - 1) 430 : static_cast<int64_t>(bits); 431 } 432 433 inline int32_t RawbitsToInt32(uint32_t bits) { 434 return (bits >= UINT64_C(0x80000000)) ? (-static_cast<int32_t>(-bits - 1) - 1) 435 : static_cast<int32_t>(bits); 436 } 437 438 namespace internal { 439 440 // Internal simulation class used solely by the simulator to 441 // provide an abstraction layer for any half-precision arithmetic. 442 class SimFloat16 : public Float16 { 443 public: 444 // TODO: We should investigate making this constructor explicit. 445 // This is currently difficult to do due to a number of templated 446 // functions in the simulator which rely on returning double values. 447 SimFloat16(double dvalue) : Float16(dvalue) {} // NOLINT(runtime/explicit) 448 SimFloat16(Float16 f) { // NOLINT(runtime/explicit) 449 this->rawbits_ = Float16ToRawbits(f); 450 } 451 SimFloat16() : Float16() {} 452 SimFloat16 operator-() const; 453 SimFloat16 operator+(SimFloat16 rhs) const; 454 SimFloat16 operator-(SimFloat16 rhs) const; 455 SimFloat16 operator*(SimFloat16 rhs) const; 456 SimFloat16 operator/(SimFloat16 rhs) const; 457 bool operator<(SimFloat16 rhs) const; 458 bool operator>(SimFloat16 rhs) const; 459 bool operator==(SimFloat16 rhs) const; 460 bool operator!=(SimFloat16 rhs) const; 461 // This is necessary for conversions peformed in (macro asm) Fmov. 462 bool operator==(double rhs) const; 463 operator double() const; 464 }; 465 } // namespace internal 466 467 uint32_t Float16Sign(internal::SimFloat16 value); 468 469 uint32_t Float16Exp(internal::SimFloat16 value); 470 471 uint32_t Float16Mantissa(internal::SimFloat16 value); 472 473 uint32_t FloatSign(float value); 474 VIXL_DEPRECATED("FloatSign", inline uint32_t float_sign(float value)) { 475 return FloatSign(value); 476 } 477 478 uint32_t FloatExp(float value); 479 VIXL_DEPRECATED("FloatExp", inline uint32_t float_exp(float value)) { 480 return FloatExp(value); 481 } 482 483 uint32_t FloatMantissa(float value); 484 VIXL_DEPRECATED("FloatMantissa", inline uint32_t float_mantissa(float value)) { 485 return FloatMantissa(value); 486 } 487 488 uint32_t DoubleSign(double value); 489 VIXL_DEPRECATED("DoubleSign", inline uint32_t double_sign(double value)) { 490 return DoubleSign(value); 491 } 492 493 uint32_t DoubleExp(double value); 494 VIXL_DEPRECATED("DoubleExp", inline uint32_t double_exp(double value)) { 495 return DoubleExp(value); 496 } 497 498 uint64_t DoubleMantissa(double value); 499 VIXL_DEPRECATED("DoubleMantissa", 500 inline uint64_t double_mantissa(double value)) { 501 return DoubleMantissa(value); 502 } 503 504 internal::SimFloat16 Float16Pack(uint16_t sign, 505 uint16_t exp, 506 uint16_t mantissa); 507 508 float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa); 509 VIXL_DEPRECATED("FloatPack", 510 inline float float_pack(uint32_t sign, 511 uint32_t exp, 512 uint32_t mantissa)) { 513 return FloatPack(sign, exp, mantissa); 514 } 515 516 double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa); 517 VIXL_DEPRECATED("DoublePack", 518 inline double double_pack(uint32_t sign, 519 uint32_t exp, 520 uint64_t mantissa)) { 521 return DoublePack(sign, exp, mantissa); 522 } 523 524 // An fpclassify() function for 16-bit half-precision floats. 525 int Float16Classify(Float16 value); 526 VIXL_DEPRECATED("Float16Classify", inline int float16classify(uint16_t value)) { 527 return Float16Classify(RawbitsToFloat16(value)); 528 } 529 530 bool IsZero(Float16 value); 531 532 inline bool IsPositiveZero(double value) { 533 return (value == 0.0) && (copysign(1.0, value) > 0.0); 534 } 535 536 inline bool IsNaN(float value) { return std::isnan(value); } 537 538 inline bool IsNaN(double value) { return std::isnan(value); } 539 540 inline bool IsNaN(Float16 value) { return Float16Classify(value) == FP_NAN; } 541 542 inline bool IsInf(float value) { return std::isinf(value); } 543 544 inline bool IsInf(double value) { return std::isinf(value); } 545 546 inline bool IsInf(Float16 value) { 547 return Float16Classify(value) == FP_INFINITE; 548 } 549 550 551 // NaN tests. 552 inline bool IsSignallingNaN(double num) { 553 const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000); 554 uint64_t raw = DoubleToRawbits(num); 555 if (IsNaN(num) && ((raw & kFP64QuietNaNMask) == 0)) { 556 return true; 557 } 558 return false; 559 } 560 561 562 inline bool IsSignallingNaN(float num) { 563 const uint32_t kFP32QuietNaNMask = 0x00400000; 564 uint32_t raw = FloatToRawbits(num); 565 if (IsNaN(num) && ((raw & kFP32QuietNaNMask) == 0)) { 566 return true; 567 } 568 return false; 569 } 570 571 572 inline bool IsSignallingNaN(Float16 num) { 573 const uint16_t kFP16QuietNaNMask = 0x0200; 574 return IsNaN(num) && ((Float16ToRawbits(num) & kFP16QuietNaNMask) == 0); 575 } 576 577 578 template <typename T> 579 inline bool IsQuietNaN(T num) { 580 return IsNaN(num) && !IsSignallingNaN(num); 581 } 582 583 584 // Convert the NaN in 'num' to a quiet NaN. 585 inline double ToQuietNaN(double num) { 586 const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000); 587 VIXL_ASSERT(IsNaN(num)); 588 return RawbitsToDouble(DoubleToRawbits(num) | kFP64QuietNaNMask); 589 } 590 591 592 inline float ToQuietNaN(float num) { 593 const uint32_t kFP32QuietNaNMask = 0x00400000; 594 VIXL_ASSERT(IsNaN(num)); 595 return RawbitsToFloat(FloatToRawbits(num) | kFP32QuietNaNMask); 596 } 597 598 599 inline internal::SimFloat16 ToQuietNaN(internal::SimFloat16 num) { 600 const uint16_t kFP16QuietNaNMask = 0x0200; 601 VIXL_ASSERT(IsNaN(num)); 602 return internal::SimFloat16( 603 RawbitsToFloat16(Float16ToRawbits(num) | kFP16QuietNaNMask)); 604 } 605 606 607 // Fused multiply-add. 608 inline double FusedMultiplyAdd(double op1, double op2, double a) { 609 return fma(op1, op2, a); 610 } 611 612 613 inline float FusedMultiplyAdd(float op1, float op2, float a) { 614 return fmaf(op1, op2, a); 615 } 616 617 618 inline uint64_t LowestSetBit(uint64_t value) { return value & -value; } 619 620 621 template <typename T> 622 inline int HighestSetBitPosition(T value) { 623 VIXL_ASSERT(value != 0); 624 return (sizeof(value) * 8 - 1) - CountLeadingZeros(value); 625 } 626 627 628 template <typename V> 629 inline int WhichPowerOf2(V value) { 630 VIXL_ASSERT(IsPowerOf2(value)); 631 return CountTrailingZeros(value); 632 } 633 634 635 unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size); 636 637 638 int BitCount(uint64_t value); 639 640 641 template <typename T> 642 T ReverseBits(T value) { 643 VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) || 644 (sizeof(value) == 4) || (sizeof(value) == 8)); 645 T result = 0; 646 for (unsigned i = 0; i < (sizeof(value) * 8); i++) { 647 result = (result << 1) | (value & 1); 648 value >>= 1; 649 } 650 return result; 651 } 652 653 654 template <typename T> 655 inline T SignExtend(T val, int size_in_bits) { 656 VIXL_ASSERT(size_in_bits > 0); 657 T mask = (T(2) << (size_in_bits - 1)) - T(1); 658 val &= mask; 659 T sign_bits = -((val >> (size_in_bits - 1)) << size_in_bits); 660 val |= sign_bits; 661 return val; 662 } 663 664 665 template <typename T> 666 T ReverseBytes(T value, int block_bytes_log2) { 667 VIXL_ASSERT((sizeof(value) == 4) || (sizeof(value) == 8)); 668 VIXL_ASSERT((1U << block_bytes_log2) <= sizeof(value)); 669 // Split the 64-bit value into an 8-bit array, where b[0] is the least 670 // significant byte, and b[7] is the most significant. 671 uint8_t bytes[8]; 672 uint64_t mask = UINT64_C(0xff00000000000000); 673 for (int i = 7; i >= 0; i--) { 674 bytes[i] = (static_cast<uint64_t>(value) & mask) >> (i * 8); 675 mask >>= 8; 676 } 677 678 // Permutation tables for REV instructions. 679 // permute_table[0] is used by REV16_x, REV16_w 680 // permute_table[1] is used by REV32_x, REV_w 681 // permute_table[2] is used by REV_x 682 VIXL_ASSERT((0 < block_bytes_log2) && (block_bytes_log2 < 4)); 683 static const uint8_t permute_table[3][8] = {{6, 7, 4, 5, 2, 3, 0, 1}, 684 {4, 5, 6, 7, 0, 1, 2, 3}, 685 {0, 1, 2, 3, 4, 5, 6, 7}}; 686 uint64_t temp = 0; 687 for (int i = 0; i < 8; i++) { 688 temp <<= 8; 689 temp |= bytes[permute_table[block_bytes_log2 - 1][i]]; 690 } 691 692 T result; 693 VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(temp)); 694 memcpy(&result, &temp, sizeof(result)); 695 return result; 696 } 697 698 template <unsigned MULTIPLE, typename T> 699 inline bool IsMultiple(T value) { 700 VIXL_ASSERT(IsPowerOf2(MULTIPLE)); 701 return (value & (MULTIPLE - 1)) == 0; 702 } 703 704 template <typename T> 705 inline bool IsMultiple(T value, unsigned multiple) { 706 VIXL_ASSERT(IsPowerOf2(multiple)); 707 return (value & (multiple - 1)) == 0; 708 } 709 710 template <typename T> 711 inline bool IsAligned(T pointer, int alignment) { 712 VIXL_ASSERT(IsPowerOf2(alignment)); 713 return (pointer & (alignment - 1)) == 0; 714 } 715 716 // Pointer alignment 717 // TODO: rename/refactor to make it specific to instructions. 718 template <unsigned ALIGN, typename T> 719 inline bool IsAligned(T pointer) { 720 VIXL_ASSERT(sizeof(pointer) == sizeof(intptr_t)); // NOLINT(runtime/sizeof) 721 // Use C-style casts to get static_cast behaviour for integral types (T), and 722 // reinterpret_cast behaviour for other types. 723 return IsAligned((intptr_t)(pointer), ALIGN); 724 } 725 726 template <typename T> 727 bool IsWordAligned(T pointer) { 728 return IsAligned<4>(pointer); 729 } 730 731 // Increment a pointer until it has the specified alignment. The alignment must 732 // be a power of two. 733 template <class T> 734 T AlignUp(T pointer, 735 typename Unsigned<sizeof(T) * kBitsPerByte>::type alignment) { 736 VIXL_ASSERT(IsPowerOf2(alignment)); 737 // Use C-style casts to get static_cast behaviour for integral types (T), and 738 // reinterpret_cast behaviour for other types. 739 740 typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw = 741 (typename Unsigned<sizeof(T) * kBitsPerByte>::type) pointer; 742 VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); 743 744 size_t mask = alignment - 1; 745 T result = (T)((pointer_raw + mask) & ~mask); 746 VIXL_ASSERT(result >= pointer); 747 748 return result; 749 } 750 751 // Decrement a pointer until it has the specified alignment. The alignment must 752 // be a power of two. 753 template <class T> 754 T AlignDown(T pointer, 755 typename Unsigned<sizeof(T) * kBitsPerByte>::type alignment) { 756 VIXL_ASSERT(IsPowerOf2(alignment)); 757 // Use C-style casts to get static_cast behaviour for integral types (T), and 758 // reinterpret_cast behaviour for other types. 759 760 typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw = 761 (typename Unsigned<sizeof(T) * kBitsPerByte>::type) pointer; 762 VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); 763 764 size_t mask = alignment - 1; 765 return (T)(pointer_raw & ~mask); 766 } 767 768 769 template <typename T> 770 inline T ExtractBit(T value, unsigned bit) { 771 return (value >> bit) & T(1); 772 } 773 774 template <typename Ts, typename Td> 775 inline Td ExtractBits(Ts value, int least_significant_bit, Td mask) { 776 return Td((value >> least_significant_bit) & Ts(mask)); 777 } 778 779 template <typename Ts, typename Td> 780 inline void AssignBit(Td& dst, // NOLINT(runtime/references) 781 int bit, 782 Ts value) { 783 VIXL_ASSERT((value == Ts(0)) || (value == Ts(1))); 784 VIXL_ASSERT(bit >= 0); 785 VIXL_ASSERT(bit < static_cast<int>(sizeof(Td) * 8)); 786 Td mask(1); 787 dst &= ~(mask << bit); 788 dst |= Td(value) << bit; 789 } 790 791 template <typename Td, typename Ts> 792 inline void AssignBits(Td& dst, // NOLINT(runtime/references) 793 int least_significant_bit, 794 Ts mask, 795 Ts value) { 796 VIXL_ASSERT(least_significant_bit >= 0); 797 VIXL_ASSERT(least_significant_bit < static_cast<int>(sizeof(Td) * 8)); 798 VIXL_ASSERT(((Td(mask) << least_significant_bit) >> least_significant_bit) == 799 Td(mask)); 800 VIXL_ASSERT((value & mask) == value); 801 dst &= ~(Td(mask) << least_significant_bit); 802 dst |= Td(value) << least_significant_bit; 803 } 804 805 class VFP { 806 public: 807 static uint32_t FP32ToImm8(float imm) { 808 // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000 809 uint32_t bits = FloatToRawbits(imm); 810 // bit7: a000.0000 811 uint32_t bit7 = ((bits >> 31) & 0x1) << 7; 812 // bit6: 0b00.0000 813 uint32_t bit6 = ((bits >> 29) & 0x1) << 6; 814 // bit5_to_0: 00cd.efgh 815 uint32_t bit5_to_0 = (bits >> 19) & 0x3f; 816 return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0); 817 } 818 static uint32_t FP64ToImm8(double imm) { 819 // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 820 // 0000.0000.0000.0000.0000.0000.0000.0000 821 uint64_t bits = DoubleToRawbits(imm); 822 // bit7: a000.0000 823 uint64_t bit7 = ((bits >> 63) & 0x1) << 7; 824 // bit6: 0b00.0000 825 uint64_t bit6 = ((bits >> 61) & 0x1) << 6; 826 // bit5_to_0: 00cd.efgh 827 uint64_t bit5_to_0 = (bits >> 48) & 0x3f; 828 829 return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0); 830 } 831 static float Imm8ToFP32(uint32_t imm8) { 832 // Imm8: abcdefgh (8 bits) 833 // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits) 834 // where B is b ^ 1 835 uint32_t bits = imm8; 836 uint32_t bit7 = (bits >> 7) & 0x1; 837 uint32_t bit6 = (bits >> 6) & 0x1; 838 uint32_t bit5_to_0 = bits & 0x3f; 839 uint32_t result = (bit7 << 31) | ((32 - bit6) << 25) | (bit5_to_0 << 19); 840 841 return RawbitsToFloat(result); 842 } 843 static double Imm8ToFP64(uint32_t imm8) { 844 // Imm8: abcdefgh (8 bits) 845 // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 846 // 0000.0000.0000.0000.0000.0000.0000.0000 (64 bits) 847 // where B is b ^ 1 848 uint32_t bits = imm8; 849 uint64_t bit7 = (bits >> 7) & 0x1; 850 uint64_t bit6 = (bits >> 6) & 0x1; 851 uint64_t bit5_to_0 = bits & 0x3f; 852 uint64_t result = (bit7 << 63) | ((256 - bit6) << 54) | (bit5_to_0 << 48); 853 return RawbitsToDouble(result); 854 } 855 static bool IsImmFP32(float imm) { 856 // Valid values will have the form: 857 // aBbb.bbbc.defg.h000.0000.0000.0000.0000 858 uint32_t bits = FloatToRawbits(imm); 859 // bits[19..0] are cleared. 860 if ((bits & 0x7ffff) != 0) { 861 return false; 862 } 863 864 865 // bits[29..25] are all set or all cleared. 866 uint32_t b_pattern = (bits >> 16) & 0x3e00; 867 if (b_pattern != 0 && b_pattern != 0x3e00) { 868 return false; 869 } 870 // bit[30] and bit[29] are opposite. 871 if (((bits ^ (bits << 1)) & 0x40000000) == 0) { 872 return false; 873 } 874 return true; 875 } 876 static bool IsImmFP64(double imm) { 877 // Valid values will have the form: 878 // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 879 // 0000.0000.0000.0000.0000.0000.0000.0000 880 uint64_t bits = DoubleToRawbits(imm); 881 // bits[47..0] are cleared. 882 if ((bits & 0x0000ffffffffffff) != 0) { 883 return false; 884 } 885 // bits[61..54] are all set or all cleared. 886 uint32_t b_pattern = (bits >> 48) & 0x3fc0; 887 if ((b_pattern != 0) && (b_pattern != 0x3fc0)) { 888 return false; 889 } 890 // bit[62] and bit[61] are opposite. 891 if (((bits ^ (bits << 1)) & (UINT64_C(1) << 62)) == 0) { 892 return false; 893 } 894 return true; 895 } 896 }; 897 898 class BitField { 899 // ForEachBitHelper is a functor that will call 900 // bool ForEachBitHelper::execute(ElementType id) const 901 // and expects a boolean in return whether to continue (if true) 902 // or stop (if false) 903 // check_set will check if the bits are on (true) or off(false) 904 template <typename ForEachBitHelper, bool check_set> 905 bool ForEachBit(const ForEachBitHelper& helper) { 906 for (int i = 0; static_cast<size_t>(i) < bitfield_.size(); i++) { 907 if (bitfield_[i] == check_set) 908 if (!helper.execute(i)) return false; 909 } 910 return true; 911 } 912 913 public: 914 #ifndef PANDA_BUILD 915 explicit BitField(unsigned size) : bitfield_(size, 0) {} 916 #else 917 explicit BitField(unsigned size) = delete; 918 explicit BitField(PandaAllocator* allocator, unsigned size) : bitfield_(size, 0, AllocatorWrapper(allocator).Adapter()) {} 919 #endif 920 921 void Set(int i) { 922 VIXL_ASSERT((i >= 0) && (static_cast<size_t>(i) < bitfield_.size())); 923 bitfield_[i] = true; 924 } 925 926 void Unset(int i) { 927 VIXL_ASSERT((i >= 0) && (static_cast<size_t>(i) < bitfield_.size())); 928 bitfield_[i] = true; 929 } 930 931 bool IsSet(int i) const { return bitfield_[i]; } 932 933 // For each bit not set in the bitfield call the execute functor 934 // execute. 935 // ForEachBitSetHelper::execute returns true if the iteration through 936 // the bits can continue, otherwise it will stop. 937 // struct ForEachBitSetHelper { 938 // bool execute(int /*id*/) { return false; } 939 // }; 940 template <typename ForEachBitNotSetHelper> 941 bool ForEachBitNotSet(const ForEachBitNotSetHelper& helper) { 942 return ForEachBit<ForEachBitNotSetHelper, false>(helper); 943 } 944 945 // For each bit set in the bitfield call the execute functor 946 // execute. 947 template <typename ForEachBitSetHelper> 948 bool ForEachBitSet(const ForEachBitSetHelper& helper) { 949 return ForEachBit<ForEachBitSetHelper, true>(helper); 950 } 951 952 private: 953 #ifndef PANDA_BUILD 954 std::vector<bool> bitfield_; 955 #else 956 Vector<bool> bitfield_; 957 #endif 958 }; 959 960 namespace internal { 961 962 typedef int64_t Int64; 963 class Uint64; 964 class Uint128; 965 966 class Uint32 { 967 uint32_t data_; 968 969 public: 970 // Unlike uint32_t, Uint32 has a default constructor. 971 Uint32() { data_ = 0; } 972 explicit Uint32(uint32_t data) : data_(data) {} 973 inline explicit Uint32(Uint64 data); 974 uint32_t Get() const { return data_; } 975 template <int N> 976 int32_t GetSigned() const { 977 return ExtractSignedBitfield32(N - 1, 0, data_); 978 } 979 int32_t GetSigned() const { return data_; } 980 Uint32 operator~() const { return Uint32(~data_); } 981 Uint32 operator-() const { return Uint32(-data_); } 982 bool operator==(Uint32 value) const { return data_ == value.data_; } 983 bool operator!=(Uint32 value) const { return data_ != value.data_; } 984 bool operator>(Uint32 value) const { return data_ > value.data_; } 985 Uint32 operator+(Uint32 value) const { return Uint32(data_ + value.data_); } 986 Uint32 operator-(Uint32 value) const { return Uint32(data_ - value.data_); } 987 Uint32 operator&(Uint32 value) const { return Uint32(data_ & value.data_); } 988 Uint32 operator&=(Uint32 value) { 989 data_ &= value.data_; 990 return *this; 991 } 992 Uint32 operator^(Uint32 value) const { return Uint32(data_ ^ value.data_); } 993 Uint32 operator^=(Uint32 value) { 994 data_ ^= value.data_; 995 return *this; 996 } 997 Uint32 operator|(Uint32 value) const { return Uint32(data_ | value.data_); } 998 Uint32 operator|=(Uint32 value) { 999 data_ |= value.data_; 1000 return *this; 1001 } 1002 // Unlike uint32_t, the shift functions can accept negative shift and 1003 // return 0 when the shift is too big. 1004 Uint32 operator>>(int shift) const { 1005 if (shift == 0) return *this; 1006 if (shift < 0) { 1007 int tmp = -shift; 1008 if (tmp >= 32) return Uint32(0); 1009 return Uint32(data_ << tmp); 1010 } 1011 int tmp = shift; 1012 if (tmp >= 32) return Uint32(0); 1013 return Uint32(data_ >> tmp); 1014 } 1015 Uint32 operator<<(int shift) const { 1016 if (shift == 0) return *this; 1017 if (shift < 0) { 1018 int tmp = -shift; 1019 if (tmp >= 32) return Uint32(0); 1020 return Uint32(data_ >> tmp); 1021 } 1022 int tmp = shift; 1023 if (tmp >= 32) return Uint32(0); 1024 return Uint32(data_ << tmp); 1025 } 1026 }; 1027 1028 class Uint64 { 1029 uint64_t data_; 1030 1031 public: 1032 // Unlike uint64_t, Uint64 has a default constructor. 1033 Uint64() { data_ = 0; } 1034 explicit Uint64(uint64_t data) : data_(data) {} 1035 explicit Uint64(Uint32 data) : data_(data.Get()) {} 1036 inline explicit Uint64(Uint128 data); 1037 uint64_t Get() const { return data_; } 1038 int64_t GetSigned(int N) const { 1039 return ExtractSignedBitfield64(N - 1, 0, data_); 1040 } 1041 int64_t GetSigned() const { return data_; } 1042 Uint32 ToUint32() const { 1043 VIXL_ASSERT((data_ >> 32) == 0); 1044 return Uint32(static_cast<uint32_t>(data_)); 1045 } 1046 Uint32 GetHigh32() const { return Uint32(data_ >> 32); } 1047 Uint32 GetLow32() const { return Uint32(data_ & 0xffffffff); } 1048 Uint64 operator~() const { return Uint64(~data_); } 1049 Uint64 operator-() const { return Uint64(-data_); } 1050 bool operator==(Uint64 value) const { return data_ == value.data_; } 1051 bool operator!=(Uint64 value) const { return data_ != value.data_; } 1052 Uint64 operator+(Uint64 value) const { return Uint64(data_ + value.data_); } 1053 Uint64 operator-(Uint64 value) const { return Uint64(data_ - value.data_); } 1054 Uint64 operator&(Uint64 value) const { return Uint64(data_ & value.data_); } 1055 Uint64 operator&=(Uint64 value) { 1056 data_ &= value.data_; 1057 return *this; 1058 } 1059 Uint64 operator^(Uint64 value) const { return Uint64(data_ ^ value.data_); } 1060 Uint64 operator^=(Uint64 value) { 1061 data_ ^= value.data_; 1062 return *this; 1063 } 1064 Uint64 operator|(Uint64 value) const { return Uint64(data_ | value.data_); } 1065 Uint64 operator|=(Uint64 value) { 1066 data_ |= value.data_; 1067 return *this; 1068 } 1069 // Unlike uint64_t, the shift functions can accept negative shift and 1070 // return 0 when the shift is too big. 1071 Uint64 operator>>(int shift) const { 1072 if (shift == 0) return *this; 1073 if (shift < 0) { 1074 int tmp = -shift; 1075 if (tmp >= 64) return Uint64(0); 1076 return Uint64(data_ << tmp); 1077 } 1078 int tmp = shift; 1079 if (tmp >= 64) return Uint64(0); 1080 return Uint64(data_ >> tmp); 1081 } 1082 Uint64 operator<<(int shift) const { 1083 if (shift == 0) return *this; 1084 if (shift < 0) { 1085 int tmp = -shift; 1086 if (tmp >= 64) return Uint64(0); 1087 return Uint64(data_ >> tmp); 1088 } 1089 int tmp = shift; 1090 if (tmp >= 64) return Uint64(0); 1091 return Uint64(data_ << tmp); 1092 } 1093 }; 1094 1095 class Uint128 { 1096 uint64_t data_high_; 1097 uint64_t data_low_; 1098 1099 public: 1100 Uint128() : data_high_(0), data_low_(0) {} 1101 explicit Uint128(uint64_t data_low) : data_high_(0), data_low_(data_low) {} 1102 explicit Uint128(Uint64 data_low) 1103 : data_high_(0), data_low_(data_low.Get()) {} 1104 Uint128(uint64_t data_high, uint64_t data_low) 1105 : data_high_(data_high), data_low_(data_low) {} 1106 Uint64 ToUint64() const { 1107 VIXL_ASSERT(data_high_ == 0); 1108 return Uint64(data_low_); 1109 } 1110 Uint64 GetHigh64() const { return Uint64(data_high_); } 1111 Uint64 GetLow64() const { return Uint64(data_low_); } 1112 Uint128 operator~() const { return Uint128(~data_high_, ~data_low_); } 1113 bool operator==(Uint128 value) const { 1114 return (data_high_ == value.data_high_) && (data_low_ == value.data_low_); 1115 } 1116 Uint128 operator&(Uint128 value) const { 1117 return Uint128(data_high_ & value.data_high_, data_low_ & value.data_low_); 1118 } 1119 Uint128 operator&=(Uint128 value) { 1120 data_high_ &= value.data_high_; 1121 data_low_ &= value.data_low_; 1122 return *this; 1123 } 1124 Uint128 operator|=(Uint128 value) { 1125 data_high_ |= value.data_high_; 1126 data_low_ |= value.data_low_; 1127 return *this; 1128 } 1129 Uint128 operator>>(int shift) const { 1130 VIXL_ASSERT((shift >= 0) && (shift < 128)); 1131 if (shift == 0) return *this; 1132 if (shift >= 64) { 1133 return Uint128(0, data_high_ >> (shift - 64)); 1134 } 1135 uint64_t tmp = (data_high_ << (64 - shift)) | (data_low_ >> shift); 1136 return Uint128(data_high_ >> shift, tmp); 1137 } 1138 Uint128 operator<<(int shift) const { 1139 VIXL_ASSERT((shift >= 0) && (shift < 128)); 1140 if (shift == 0) return *this; 1141 if (shift >= 64) { 1142 return Uint128(data_low_ << (shift - 64), 0); 1143 } 1144 uint64_t tmp = (data_high_ << shift) | (data_low_ >> (64 - shift)); 1145 return Uint128(tmp, data_low_ << shift); 1146 } 1147 }; 1148 1149 Uint32::Uint32(Uint64 data) : data_(data.ToUint32().Get()) {} 1150 Uint64::Uint64(Uint128 data) : data_(data.ToUint64().Get()) {} 1151 1152 Int64 BitCount(Uint32 value); 1153 1154 // The algorithm used is adapted from the one described in section 8.2 of 1155 // Hacker's Delight, by Henry S. Warren, Jr. 1156 template <unsigned N, typename T> 1157 int64_t MultiplyHigh(T u, T v) { 1158 uint64_t u0, v0, w0, u1, v1, w1, w2, t; 1159 VIXL_STATIC_ASSERT((N == 8) || (N == 16) || (N == 32) || (N == 64)); 1160 uint64_t sign_mask = UINT64_C(1) << (N - 1); 1161 uint64_t sign_ext = 0; 1162 unsigned half_bits = N / 2; 1163 uint64_t half_mask = GetUintMask(half_bits); 1164 if (std::numeric_limits<T>::is_signed) { 1165 sign_ext = UINT64_C(0xffffffffffffffff) << half_bits; 1166 } 1167 1168 VIXL_ASSERT(sizeof(u) == sizeof(uint64_t)); 1169 VIXL_ASSERT(sizeof(u) == sizeof(u0)); 1170 1171 u0 = u & half_mask; 1172 u1 = u >> half_bits | (((u & sign_mask) != 0) ? sign_ext : 0); 1173 v0 = v & half_mask; 1174 v1 = v >> half_bits | (((v & sign_mask) != 0) ? sign_ext : 0); 1175 1176 w0 = u0 * v0; 1177 t = u1 * v0 + (w0 >> half_bits); 1178 1179 w1 = t & half_mask; 1180 w2 = t >> half_bits | (((t & sign_mask) != 0) ? sign_ext : 0); 1181 w1 = u0 * v1 + w1; 1182 w1 = w1 >> half_bits | (((w1 & sign_mask) != 0) ? sign_ext : 0); 1183 1184 uint64_t value = u1 * v1 + w2 + w1; 1185 int64_t result; 1186 memcpy(&result, &value, sizeof(result)); 1187 return result; 1188 } 1189 1190 } // namespace internal 1191 1192 // The default NaN values (for FPCR.DN=1). 1193 extern const double kFP64DefaultNaN; 1194 extern const float kFP32DefaultNaN; 1195 extern const Float16 kFP16DefaultNaN; 1196 1197 // Floating-point infinity values. 1198 extern const Float16 kFP16PositiveInfinity; 1199 extern const Float16 kFP16NegativeInfinity; 1200 extern const float kFP32PositiveInfinity; 1201 extern const float kFP32NegativeInfinity; 1202 extern const double kFP64PositiveInfinity; 1203 extern const double kFP64NegativeInfinity; 1204 1205 // Floating-point zero values. 1206 extern const Float16 kFP16PositiveZero; 1207 extern const Float16 kFP16NegativeZero; 1208 1209 // AArch64 floating-point specifics. These match IEEE-754. 1210 const unsigned kDoubleMantissaBits = 52; 1211 const unsigned kDoubleExponentBits = 11; 1212 const unsigned kFloatMantissaBits = 23; 1213 const unsigned kFloatExponentBits = 8; 1214 const unsigned kFloat16MantissaBits = 10; 1215 const unsigned kFloat16ExponentBits = 5; 1216 1217 enum FPRounding { 1218 // The first four values are encodable directly by FPCR<RMode>. 1219 FPTieEven = 0x0, 1220 FPPositiveInfinity = 0x1, 1221 FPNegativeInfinity = 0x2, 1222 FPZero = 0x3, 1223 1224 // The final rounding modes are only available when explicitly specified by 1225 // the instruction (such as with fcvta). It cannot be set in FPCR. 1226 FPTieAway, 1227 FPRoundOdd 1228 }; 1229 1230 enum UseDefaultNaN { kUseDefaultNaN, kIgnoreDefaultNaN }; 1231 1232 // Assemble the specified IEEE-754 components into the target type and apply 1233 // appropriate rounding. 1234 // sign: 0 = positive, 1 = negative 1235 // exponent: Unbiased IEEE-754 exponent. 1236 // mantissa: The mantissa of the input. The top bit (which is not encoded for 1237 // normal IEEE-754 values) must not be omitted. This bit has the 1238 // value 'pow(2, exponent)'. 1239 // 1240 // The input value is assumed to be a normalized value. That is, the input may 1241 // not be infinity or NaN. If the source value is subnormal, it must be 1242 // normalized before calling this function such that the highest set bit in the 1243 // mantissa has the value 'pow(2, exponent)'. 1244 // 1245 // Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than 1246 // calling a templated FPRound. 1247 template <class T, int ebits, int mbits> 1248 T FPRound(int64_t sign, 1249 int64_t exponent, 1250 uint64_t mantissa, 1251 FPRounding round_mode) { 1252 VIXL_ASSERT((sign == 0) || (sign == 1)); 1253 1254 // Only FPTieEven and FPRoundOdd rounding modes are implemented. 1255 VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); 1256 1257 // Rounding can promote subnormals to normals, and normals to infinities. For 1258 // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be 1259 // encodable as a float, but rounding based on the low-order mantissa bits 1260 // could make it overflow. With ties-to-even rounding, this value would become 1261 // an infinity. 1262 1263 // ---- Rounding Method ---- 1264 // 1265 // The exponent is irrelevant in the rounding operation, so we treat the 1266 // lowest-order bit that will fit into the result ('onebit') as having 1267 // the value '1'. Similarly, the highest-order bit that won't fit into 1268 // the result ('halfbit') has the value '0.5'. The 'point' sits between 1269 // 'onebit' and 'halfbit': 1270 // 1271 // These bits fit into the result. 1272 // |---------------------| 1273 // mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 1274 // || 1275 // / | 1276 // / halfbit 1277 // onebit 1278 // 1279 // For subnormal outputs, the range of representable bits is smaller and 1280 // the position of onebit and halfbit depends on the exponent of the 1281 // input, but the method is otherwise similar. 1282 // 1283 // onebit(frac) 1284 // | 1285 // | halfbit(frac) halfbit(adjusted) 1286 // | / / 1287 // | | | 1288 // 0b00.0 (exact) -> 0b00.0 (exact) -> 0b00 1289 // 0b00.0... -> 0b00.0... -> 0b00 1290 // 0b00.1 (exact) -> 0b00.0111..111 -> 0b00 1291 // 0b00.1... -> 0b00.1... -> 0b01 1292 // 0b01.0 (exact) -> 0b01.0 (exact) -> 0b01 1293 // 0b01.0... -> 0b01.0... -> 0b01 1294 // 0b01.1 (exact) -> 0b01.1 (exact) -> 0b10 1295 // 0b01.1... -> 0b01.1... -> 0b10 1296 // 0b10.0 (exact) -> 0b10.0 (exact) -> 0b10 1297 // 0b10.0... -> 0b10.0... -> 0b10 1298 // 0b10.1 (exact) -> 0b10.0111..111 -> 0b10 1299 // 0b10.1... -> 0b10.1... -> 0b11 1300 // 0b11.0 (exact) -> 0b11.0 (exact) -> 0b11 1301 // ... / | / | 1302 // / | / | 1303 // / | 1304 // adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / | 1305 // 1306 // mantissa = (mantissa >> shift) + halfbit(adjusted); 1307 1308 static const int mantissa_offset = 0; 1309 static const int exponent_offset = mantissa_offset + mbits; 1310 static const int sign_offset = exponent_offset + ebits; 1311 VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1)); 1312 1313 // Bail out early for zero inputs. 1314 if (mantissa == 0) { 1315 return static_cast<T>(sign << sign_offset); 1316 } 1317 1318 // If all bits in the exponent are set, the value is infinite or NaN. 1319 // This is true for all binary IEEE-754 formats. 1320 static const int infinite_exponent = (1 << ebits) - 1; 1321 static const int max_normal_exponent = infinite_exponent - 1; 1322 1323 // Apply the exponent bias to encode it for the result. Doing this early makes 1324 // it easy to detect values that will be infinite or subnormal. 1325 exponent += max_normal_exponent >> 1; 1326 1327 if (exponent > max_normal_exponent) { 1328 // Overflow: the input is too large for the result type to represent. 1329 if (round_mode == FPTieEven) { 1330 // FPTieEven rounding mode handles overflows using infinities. 1331 exponent = infinite_exponent; 1332 mantissa = 0; 1333 } else { 1334 VIXL_ASSERT(round_mode == FPRoundOdd); 1335 // FPRoundOdd rounding mode handles overflows using the largest magnitude 1336 // normal number. 1337 exponent = max_normal_exponent; 1338 mantissa = (UINT64_C(1) << exponent_offset) - 1; 1339 } 1340 return static_cast<T>((sign << sign_offset) | 1341 (exponent << exponent_offset) | 1342 (mantissa << mantissa_offset)); 1343 } 1344 1345 // Calculate the shift required to move the top mantissa bit to the proper 1346 // place in the destination type. 1347 const int highest_significant_bit = 63 - CountLeadingZeros(mantissa); 1348 int shift = highest_significant_bit - mbits; 1349 1350 if (exponent <= 0) { 1351 // The output will be subnormal (before rounding). 1352 // For subnormal outputs, the shift must be adjusted by the exponent. The +1 1353 // is necessary because the exponent of a subnormal value (encoded as 0) is 1354 // the same as the exponent of the smallest normal value (encoded as 1). 1355 shift += -exponent + 1; 1356 1357 // Handle inputs that would produce a zero output. 1358 // 1359 // Shifts higher than highest_significant_bit+1 will always produce a zero 1360 // result. A shift of exactly highest_significant_bit+1 might produce a 1361 // non-zero result after rounding. 1362 if (shift > (highest_significant_bit + 1)) { 1363 if (round_mode == FPTieEven) { 1364 // The result will always be +/-0.0. 1365 return static_cast<T>(sign << sign_offset); 1366 } else { 1367 VIXL_ASSERT(round_mode == FPRoundOdd); 1368 VIXL_ASSERT(mantissa != 0); 1369 // For FPRoundOdd, if the mantissa is too small to represent and 1370 // non-zero return the next "odd" value. 1371 return static_cast<T>((sign << sign_offset) | 1); 1372 } 1373 } 1374 1375 // Properly encode the exponent for a subnormal output. 1376 exponent = 0; 1377 } else { 1378 // Clear the topmost mantissa bit, since this is not encoded in IEEE-754 1379 // normal values. 1380 mantissa &= ~(UINT64_C(1) << highest_significant_bit); 1381 } 1382 1383 // The casts below are only well-defined for unsigned integers. 1384 VIXL_STATIC_ASSERT(std::numeric_limits<T>::is_integer); 1385 VIXL_STATIC_ASSERT(!std::numeric_limits<T>::is_signed); 1386 1387 if (shift > 0) { 1388 if (round_mode == FPTieEven) { 1389 // We have to shift the mantissa to the right. Some precision is lost, so 1390 // we need to apply rounding. 1391 uint64_t onebit_mantissa = (mantissa >> (shift)) & 1; 1392 uint64_t halfbit_mantissa = (mantissa >> (shift - 1)) & 1; 1393 uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa); 1394 uint64_t adjusted = mantissa - adjustment; 1395 T halfbit_adjusted = (adjusted >> (shift - 1)) & 1; 1396 1397 T result = 1398 static_cast<T>((sign << sign_offset) | (exponent << exponent_offset) | 1399 ((mantissa >> shift) << mantissa_offset)); 1400 1401 // A very large mantissa can overflow during rounding. If this happens, 1402 // the exponent should be incremented and the mantissa set to 1.0 1403 // (encoded as 0). Applying halfbit_adjusted after assembling the float 1404 // has the nice side-effect that this case is handled for free. 1405 // 1406 // This also handles cases where a very large finite value overflows to 1407 // infinity, or where a very large subnormal value overflows to become 1408 // normal. 1409 return result + halfbit_adjusted; 1410 } else { 1411 VIXL_ASSERT(round_mode == FPRoundOdd); 1412 // If any bits at position halfbit or below are set, onebit (ie. the 1413 // bottom bit of the resulting mantissa) must be set. 1414 uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1); 1415 if (fractional_bits != 0) { 1416 mantissa |= UINT64_C(1) << shift; 1417 } 1418 1419 return static_cast<T>((sign << sign_offset) | 1420 (exponent << exponent_offset) | 1421 ((mantissa >> shift) << mantissa_offset)); 1422 } 1423 } else { 1424 // We have to shift the mantissa to the left (or not at all). The input 1425 // mantissa is exactly representable in the output mantissa, so apply no 1426 // rounding correction. 1427 return static_cast<T>((sign << sign_offset) | 1428 (exponent << exponent_offset) | 1429 ((mantissa << -shift) << mantissa_offset)); 1430 } 1431 } 1432 1433 1434 // See FPRound for a description of this function. 1435 inline double FPRoundToDouble(int64_t sign, 1436 int64_t exponent, 1437 uint64_t mantissa, 1438 FPRounding round_mode) { 1439 uint64_t bits = 1440 FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign, 1441 exponent, 1442 mantissa, 1443 round_mode); 1444 return RawbitsToDouble(bits); 1445 } 1446 1447 1448 // See FPRound for a description of this function. 1449 inline Float16 FPRoundToFloat16(int64_t sign, 1450 int64_t exponent, 1451 uint64_t mantissa, 1452 FPRounding round_mode) { 1453 return RawbitsToFloat16( 1454 FPRound<uint16_t, kFloat16ExponentBits, kFloat16MantissaBits>( 1455 sign, exponent, mantissa, round_mode)); 1456 } 1457 1458 1459 // See FPRound for a description of this function. 1460 static inline float FPRoundToFloat(int64_t sign, 1461 int64_t exponent, 1462 uint64_t mantissa, 1463 FPRounding round_mode) { 1464 uint32_t bits = 1465 FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(sign, 1466 exponent, 1467 mantissa, 1468 round_mode); 1469 return RawbitsToFloat(bits); 1470 } 1471 1472 1473 float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception = NULL); 1474 float FPToFloat(double value, 1475 FPRounding round_mode, 1476 UseDefaultNaN DN, 1477 bool* exception = NULL); 1478 1479 double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception = NULL); 1480 double FPToDouble(float value, UseDefaultNaN DN, bool* exception = NULL); 1481 1482 Float16 FPToFloat16(float value, 1483 FPRounding round_mode, 1484 UseDefaultNaN DN, 1485 bool* exception = NULL); 1486 1487 Float16 FPToFloat16(double value, 1488 FPRounding round_mode, 1489 UseDefaultNaN DN, 1490 bool* exception = NULL); 1491 1492 // Like static_cast<T>(value), but with specialisations for the Float16 type. 1493 template <typename T, typename F> 1494 T StaticCastFPTo(F value) { 1495 return static_cast<T>(value); 1496 } 1497 1498 template <> 1499 inline float StaticCastFPTo<float, Float16>(Float16 value) { 1500 return FPToFloat(value, kIgnoreDefaultNaN); 1501 } 1502 1503 template <> 1504 inline double StaticCastFPTo<double, Float16>(Float16 value) { 1505 return FPToDouble(value, kIgnoreDefaultNaN); 1506 } 1507 1508 template <> 1509 inline Float16 StaticCastFPTo<Float16, float>(float value) { 1510 return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN); 1511 } 1512 1513 template <> 1514 inline Float16 StaticCastFPTo<Float16, double>(double value) { 1515 return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN); 1516 } 1517 1518 template <typename T> 1519 uint64_t FPToRawbitsWithSize(unsigned size_in_bits, T value) { 1520 switch (size_in_bits) { 1521 case 16: 1522 return Float16ToRawbits(StaticCastFPTo<Float16>(value)); 1523 case 32: 1524 return FloatToRawbits(StaticCastFPTo<float>(value)); 1525 case 64: 1526 return DoubleToRawbits(StaticCastFPTo<double>(value)); 1527 } 1528 VIXL_UNREACHABLE(); 1529 return 0; 1530 } 1531 1532 template <typename T> 1533 T RawbitsWithSizeToFP(unsigned size_in_bits, uint64_t value) { 1534 VIXL_ASSERT(IsUintN(size_in_bits, value)); 1535 switch (size_in_bits) { 1536 case 16: 1537 return StaticCastFPTo<T>(RawbitsToFloat16(static_cast<uint16_t>(value))); 1538 case 32: 1539 return StaticCastFPTo<T>(RawbitsToFloat(static_cast<uint32_t>(value))); 1540 case 64: 1541 return StaticCastFPTo<T>(RawbitsToDouble(value)); 1542 } 1543 VIXL_UNREACHABLE(); 1544 return 0; 1545 } 1546 1547 // Jenkins one-at-a-time hash, based on 1548 // https://en.wikipedia.org/wiki/Jenkins_hash_function citing 1549 // https://www.drdobbs.com/database/algorithm-alley/184410284. 1550 constexpr uint32_t Hash(const char* str, uint32_t hash = 0) { 1551 if (*str == '\0') { 1552 hash += hash << 3; 1553 hash ^= hash >> 11; 1554 hash += hash << 15; 1555 return hash; 1556 } else { 1557 hash += *str; 1558 hash += hash << 10; 1559 hash ^= hash >> 6; 1560 return Hash(str + 1, hash); 1561 } 1562 } 1563 1564 constexpr uint32_t operator"" _h(const char* x, size_t) { return Hash(x); } 1565 1566 } // namespace vixl 1567 1568 #endif // VIXL_UTILS_H 1569