1 // Copyright 2015, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #ifndef VIXL_UTILS_H 28 #define VIXL_UTILS_H 29 30 #include <cmath> 31 #include <cstring> 32 #include <limits> 33 #include <type_traits> 34 #include <vector> 35 #include <optional> 36 37 #include "compiler-intrinsics-vixl.h" 38 #include "globals-vixl.h" 39 40 #if defined(VIXL_USE_PANDA_ALLOC) && !defined(PANDA_BUILD) 41 #error "PANDA_BUILD should be defined for VIXL_USE_PANDA_ALLOC" 42 #endif 43 44 #ifdef VIXL_USE_PANDA_ALLOC 45 #include "mem/arena_allocator_stl_adapter.h" 46 #include "mem/arena_allocator.h" 47 #include "utils/arena_containers.h" 48 #else 49 #include <list> 50 #include <map> 51 #include <memory> 52 #include <string> 53 #include <unordered_map> 54 #include <unordered_set> 55 #include <vector> 56 #endif 57 58 #if defined(PANDA_BUILD) && !defined(VIXL_USE_PANDA_ALLOC) 59 namespace ark { 60 template <bool> class ArenaAllocatorT; 61 using ArenaAllocator = ArenaAllocatorT<false>; 62 } 63 #endif 64 65 namespace vixl { 66 #ifdef VIXL_USE_PANDA_ALLOC 67 template <typename T> 68 using List = ark::ArenaList<T>; 69 70 template <typename K, typename V> 71 using Map = ark::ArenaMap<K, V>; 72 73 template <typename K, typename V> 74 using UnorderedMap = ark::ArenaUnorderedMap<K, V>; 75 76 template <typename K> 77 using UnorderedSet = ark::ArenaUnorderedSet<K>; 78 79 using String = ark::ArenaString; 80 81 template <typename T> 82 using Vector = ark::ArenaVector<T>; 83 #else 84 template <typename T> 85 using List = std::list<T>; 86 87 template <typename K, typename V> 88 using Map = std::map<K, V>; 89 90 template <typename K, typename V> 91 using UnorderedMap = std::unordered_map<K, V>; 92 93 template <typename K> 94 using UnorderedSet = std::unordered_set<K>; 95 96 using String = std::string; 97 98 template <typename T> 99 using Vector = std::vector<T>; 100 #endif 101 102 #ifdef PANDA_BUILD 103 using PandaAllocator = ark::ArenaAllocator; 104 #endif // PANDA_BUILD 105 106 template <typename T> 107 struct is_unbounded_array : public std::false_type {}; 108 109 template <typename T> 110 struct is_unbounded_array<T[]> : public std::true_type {}; 111 112 template <typename T> 113 constexpr bool is_unbounded_array_v = is_unbounded_array<T>::value; 114 115 class AllocatorWrapper { 116 public: 117 #ifndef PANDA_BUILD 118 AllocatorWrapper() = default; 119 #else // PANDA_BUILD 120 AllocatorWrapper([[maybe_unused]] PandaAllocator* allocator) 121 #ifdef VIXL_USE_PANDA_ALLOC 122 : allocator_(allocator) 123 #endif 124 {} 125 #endif // PANDA_BUILD 126 127 auto Adapter() { 128 #ifdef VIXL_USE_PANDA_ALLOC 129 return allocator_->Adapter(); 130 #else 131 return std::allocator<void>(); 132 #endif 133 } 134 135 template <typename T, typename... Args> 136 [[nodiscard]] std::enable_if_t<!std::is_array_v<T>, T*> New(Args&&... args) { 137 #ifdef VIXL_USE_PANDA_ALLOC 138 return allocator_->template New<T>(std::forward<Args>(args)...); 139 #else 140 return new T(std::forward<Args>(args)...); 141 #endif 142 } 143 144 template <typename T> 145 [[nodiscard]] std::enable_if_t<is_unbounded_array_v<T>, std::remove_extent_t<T>*> New(size_t size) { 146 #ifdef VIXL_USE_PANDA_ALLOC 147 return allocator_->template New<T>(size); 148 #else 149 return new std::remove_extent_t<T>[size]; 150 #endif 151 } 152 153 [[nodiscard]] void* Alloc(size_t size) { 154 #ifdef VIXL_USE_PANDA_ALLOC 155 return allocator_->Alloc(size); 156 #else 157 return malloc(size); 158 #endif 159 } 160 161 template <typename T> 162 void DeleteObject([[maybe_unused]] T* obj) { 163 #ifndef VIXL_USE_PANDA_ALLOC 164 delete obj; 165 #endif 166 } 167 168 template <typename T> 169 void DeleteArray([[maybe_unused]] T* arr) { 170 #ifndef VIXL_USE_PANDA_ALLOC 171 delete[] arr; 172 #endif 173 } 174 175 void Free([[maybe_unused]] void* ptr) { 176 #ifndef VIXL_USE_PANDA_ALLOC 177 free(ptr); 178 #endif 179 } 180 181 private: 182 #ifdef VIXL_USE_PANDA_ALLOC 183 PandaAllocator* allocator_; 184 #endif 185 }; 186 187 // Macros for compile-time format checking. 188 #if GCC_VERSION_OR_NEWER(4, 4, 0) 189 #define PRINTF_CHECK(format_index, varargs_index) \ 190 __attribute__((format(gnu_printf, format_index, varargs_index))) 191 #else 192 #define PRINTF_CHECK(format_index, varargs_index) 193 #endif 194 195 #ifdef __GNUC__ 196 #define VIXL_HAS_DEPRECATED_WITH_MSG 197 #elif defined(__clang__) 198 #ifdef __has_extension(attribute_deprecated_with_message) 199 #define VIXL_HAS_DEPRECATED_WITH_MSG 200 #endif 201 #endif 202 203 #ifdef VIXL_HAS_DEPRECATED_WITH_MSG 204 #define VIXL_DEPRECATED(replaced_by, declarator) \ 205 __attribute__((deprecated("Use \"" replaced_by "\" instead"))) declarator 206 #else 207 #define VIXL_DEPRECATED(replaced_by, declarator) declarator 208 #endif 209 210 #ifdef VIXL_DEBUG 211 #define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_UNREACHABLE() 212 #else 213 #define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_FALLTHROUGH() 214 #endif 215 216 template <typename T, size_t n> 217 constexpr size_t ArrayLength(const T (&)[n]) { 218 return n; 219 } 220 221 inline uint64_t GetUintMask(unsigned bits) { 222 VIXL_ASSERT(bits <= 64); 223 uint64_t base = (bits >= 64) ? 0 : (UINT64_C(1) << bits); 224 return base - 1; 225 } 226 227 inline uint64_t GetSignMask(unsigned bits) { 228 VIXL_ASSERT(bits <= 64); 229 return UINT64_C(1) << (bits - 1); 230 } 231 232 // Check number width. 233 // TODO: Refactor these using templates. 234 inline bool IsIntN(unsigned n, uint32_t x) { 235 VIXL_ASSERT((0 < n) && (n <= 32)); 236 return x <= static_cast<uint32_t>(INT32_MAX >> (32 - n)); 237 } 238 inline bool IsIntN(unsigned n, int32_t x) { 239 VIXL_ASSERT((0 < n) && (n <= 32)); 240 if (n == 32) return true; 241 int32_t limit = INT32_C(1) << (n - 1); 242 return (-limit <= x) && (x < limit); 243 } 244 inline bool IsIntN(unsigned n, uint64_t x) { 245 VIXL_ASSERT((0 < n) && (n <= 64)); 246 return x <= static_cast<uint64_t>(INT64_MAX >> (64 - n)); 247 } 248 inline bool IsIntN(unsigned n, int64_t x) { 249 VIXL_ASSERT((0 < n) && (n <= 64)); 250 if (n == 64) return true; 251 int64_t limit = INT64_C(1) << (n - 1); 252 return (-limit <= x) && (x < limit); 253 } 254 VIXL_DEPRECATED("IsIntN", inline bool is_intn(unsigned n, int64_t x)) { 255 return IsIntN(n, x); 256 } 257 258 inline bool IsUintN(unsigned n, uint32_t x) { 259 VIXL_ASSERT((0 < n) && (n <= 32)); 260 if (n >= 32) return true; 261 return !(x >> n); 262 } 263 inline bool IsUintN(unsigned n, int32_t x) { 264 VIXL_ASSERT((0 < n) && (n < 32)); 265 // Convert to an unsigned integer to avoid implementation-defined behavior. 266 return !(static_cast<uint32_t>(x) >> n); 267 } 268 inline bool IsUintN(unsigned n, uint64_t x) { 269 VIXL_ASSERT((0 < n) && (n <= 64)); 270 if (n >= 64) return true; 271 return !(x >> n); 272 } 273 inline bool IsUintN(unsigned n, int64_t x) { 274 VIXL_ASSERT((0 < n) && (n < 64)); 275 // Convert to an unsigned integer to avoid implementation-defined behavior. 276 return !(static_cast<uint64_t>(x) >> n); 277 } 278 VIXL_DEPRECATED("IsUintN", inline bool is_uintn(unsigned n, int64_t x)) { 279 return IsUintN(n, x); 280 } 281 282 inline uint64_t TruncateToUintN(unsigned n, uint64_t x) { 283 VIXL_ASSERT((0 < n) && (n < 64)); 284 return static_cast<uint64_t>(x) & ((UINT64_C(1) << n) - 1); 285 } 286 VIXL_DEPRECATED("TruncateToUintN", 287 inline uint64_t truncate_to_intn(unsigned n, int64_t x)) { 288 return TruncateToUintN(n, x); 289 } 290 291 // clang-format off 292 #define INT_1_TO_32_LIST(V) \ 293 V(1) V(2) V(3) V(4) V(5) V(6) V(7) V(8) \ 294 V(9) V(10) V(11) V(12) V(13) V(14) V(15) V(16) \ 295 V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24) \ 296 V(25) V(26) V(27) V(28) V(29) V(30) V(31) V(32) 297 298 #define INT_33_TO_63_LIST(V) \ 299 V(33) V(34) V(35) V(36) V(37) V(38) V(39) V(40) \ 300 V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48) \ 301 V(49) V(50) V(51) V(52) V(53) V(54) V(55) V(56) \ 302 V(57) V(58) V(59) V(60) V(61) V(62) V(63) 303 304 #define INT_1_TO_63_LIST(V) INT_1_TO_32_LIST(V) INT_33_TO_63_LIST(V) 305 306 // clang-format on 307 308 #define DECLARE_IS_INT_N(N) \ 309 inline bool IsInt##N(int64_t x) { return IsIntN(N, x); } \ 310 VIXL_DEPRECATED("IsInt" #N, inline bool is_int##N(int64_t x)) { \ 311 return IsIntN(N, x); \ 312 } 313 314 #define DECLARE_IS_UINT_N(N) \ 315 inline bool IsUint##N(int64_t x) { return IsUintN(N, x); } \ 316 VIXL_DEPRECATED("IsUint" #N, inline bool is_uint##N(int64_t x)) { \ 317 return IsUintN(N, x); \ 318 } 319 320 #define DECLARE_TRUNCATE_TO_UINT_32(N) \ 321 inline uint32_t TruncateToUint##N(uint64_t x) { \ 322 return static_cast<uint32_t>(TruncateToUintN(N, x)); \ 323 } \ 324 VIXL_DEPRECATED("TruncateToUint" #N, \ 325 inline uint32_t truncate_to_int##N(int64_t x)) { \ 326 return TruncateToUint##N(x); \ 327 } 328 329 INT_1_TO_63_LIST(DECLARE_IS_INT_N) 330 INT_1_TO_63_LIST(DECLARE_IS_UINT_N) 331 INT_1_TO_32_LIST(DECLARE_TRUNCATE_TO_UINT_32) 332 333 #undef DECLARE_IS_INT_N 334 #undef DECLARE_IS_UINT_N 335 #undef DECLARE_TRUNCATE_TO_INT_N 336 337 // Bit field extraction. 338 inline uint64_t ExtractUnsignedBitfield64(int msb, int lsb, uint64_t x) { 339 VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && 340 (msb >= lsb)); 341 if ((msb == 63) && (lsb == 0)) return x; 342 return (x >> lsb) & ((static_cast<uint64_t>(1) << (1 + msb - lsb)) - 1); 343 } 344 345 346 inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint64_t x) { 347 VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && 348 (msb >= lsb)); 349 return TruncateToUint32(ExtractUnsignedBitfield64(msb, lsb, x)); 350 } 351 352 353 inline int64_t ExtractSignedBitfield64(int msb, int lsb, uint64_t x) { 354 VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && 355 (msb >= lsb)); 356 uint64_t temp = ExtractUnsignedBitfield64(msb, lsb, x); 357 // If the highest extracted bit is set, sign extend. 358 if ((temp >> (msb - lsb)) == 1) { 359 temp |= ~UINT64_C(0) << (msb - lsb); 360 } 361 int64_t result; 362 memcpy(&result, &temp, sizeof(result)); 363 return result; 364 } 365 366 inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint64_t x) { 367 VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && 368 (msb >= lsb)); 369 uint32_t temp = TruncateToUint32(ExtractSignedBitfield64(msb, lsb, x)); 370 int32_t result; 371 memcpy(&result, &temp, sizeof(result)); 372 return result; 373 } 374 375 inline uint64_t RotateRight(uint64_t value, 376 unsigned int rotate, 377 unsigned int width) { 378 VIXL_ASSERT((width > 0) && (width <= 64)); 379 uint64_t width_mask = ~UINT64_C(0) >> (64 - width); 380 rotate &= 63; 381 if (rotate > 0) { 382 value &= width_mask; 383 value = (value << (width - rotate)) | (value >> rotate); 384 } 385 return value & width_mask; 386 } 387 388 389 // Wrapper class for passing FP16 values through the assembler. 390 // This is purely to aid with type checking/casting. 391 class Float16 { 392 public: 393 explicit Float16(double dvalue); 394 Float16() : rawbits_(0x0) {} 395 friend uint16_t Float16ToRawbits(Float16 value); 396 friend Float16 RawbitsToFloat16(uint16_t bits); 397 398 protected: 399 uint16_t rawbits_; 400 }; 401 402 // Floating point representation. 403 uint16_t Float16ToRawbits(Float16 value); 404 405 406 uint32_t FloatToRawbits(float value); 407 VIXL_DEPRECATED("FloatToRawbits", 408 inline uint32_t float_to_rawbits(float value)) { 409 return FloatToRawbits(value); 410 } 411 412 uint64_t DoubleToRawbits(double value); 413 VIXL_DEPRECATED("DoubleToRawbits", 414 inline uint64_t double_to_rawbits(double value)) { 415 return DoubleToRawbits(value); 416 } 417 418 Float16 RawbitsToFloat16(uint16_t bits); 419 420 float RawbitsToFloat(uint32_t bits); 421 VIXL_DEPRECATED("RawbitsToFloat", 422 inline float rawbits_to_float(uint32_t bits)) { 423 return RawbitsToFloat(bits); 424 } 425 426 double RawbitsToDouble(uint64_t bits); 427 VIXL_DEPRECATED("RawbitsToDouble", 428 inline double rawbits_to_double(uint64_t bits)) { 429 return RawbitsToDouble(bits); 430 } 431 432 // Some compilers dislike negating unsigned integers, 433 // so we provide an equivalent. 434 template <typename T> 435 T UnsignedNegate(T value) { 436 VIXL_STATIC_ASSERT(std::is_unsigned<T>::value); 437 return ~value + 1; 438 } 439 440 // An absolute operation for signed integers that is defined for results outside 441 // the representable range. Specifically, Abs(MIN_INT) is MIN_INT. 442 template <typename T> 443 T Abs(T val) { 444 // TODO: this static assertion is for signed integer inputs, as that's the 445 // only type tested. However, the code should work for all numeric inputs. 446 // Remove the assertion and this comment when more tests are available. 447 VIXL_STATIC_ASSERT(std::is_signed<T>::value && std::is_integral<T>::value); 448 return ((val >= -std::numeric_limits<T>::max()) && (val < 0)) ? -val : val; 449 } 450 451 // Convert unsigned to signed numbers in a well-defined way (using two's 452 // complement representations). 453 inline int64_t RawbitsToInt64(uint64_t bits) { 454 return (bits >= UINT64_C(0x8000000000000000)) 455 ? (-static_cast<int64_t>(UnsignedNegate(bits) - 1) - 1) 456 : static_cast<int64_t>(bits); 457 } 458 459 inline int32_t RawbitsToInt32(uint32_t bits) { 460 return (bits >= UINT64_C(0x80000000)) 461 ? (-static_cast<int32_t>(UnsignedNegate(bits) - 1) - 1) 462 : static_cast<int32_t>(bits); 463 } 464 465 namespace internal { 466 467 // Internal simulation class used solely by the simulator to 468 // provide an abstraction layer for any half-precision arithmetic. 469 class SimFloat16 : public Float16 { 470 public: 471 // TODO: We should investigate making this constructor explicit. 472 // This is currently difficult to do due to a number of templated 473 // functions in the simulator which rely on returning double values. 474 SimFloat16(double dvalue) : Float16(dvalue) {} // NOLINT(runtime/explicit) 475 SimFloat16(Float16 f) { // NOLINT(runtime/explicit) 476 this->rawbits_ = Float16ToRawbits(f); 477 } 478 SimFloat16() : Float16() {} 479 SimFloat16 operator-() const; 480 SimFloat16 operator+(SimFloat16 rhs) const; 481 SimFloat16 operator-(SimFloat16 rhs) const; 482 SimFloat16 operator*(SimFloat16 rhs) const; 483 SimFloat16 operator/(SimFloat16 rhs) const; 484 bool operator<(SimFloat16 rhs) const; 485 bool operator>(SimFloat16 rhs) const; 486 bool operator==(SimFloat16 rhs) const; 487 bool operator!=(SimFloat16 rhs) const; 488 // This is necessary for conversions performed in (macro asm) Fmov. 489 bool operator==(double rhs) const; 490 operator double() const; 491 }; 492 } // namespace internal 493 494 uint32_t Float16Sign(internal::SimFloat16 value); 495 496 uint32_t Float16Exp(internal::SimFloat16 value); 497 498 uint32_t Float16Mantissa(internal::SimFloat16 value); 499 500 uint32_t FloatSign(float value); 501 VIXL_DEPRECATED("FloatSign", inline uint32_t float_sign(float value)) { 502 return FloatSign(value); 503 } 504 505 uint32_t FloatExp(float value); 506 VIXL_DEPRECATED("FloatExp", inline uint32_t float_exp(float value)) { 507 return FloatExp(value); 508 } 509 510 uint32_t FloatMantissa(float value); 511 VIXL_DEPRECATED("FloatMantissa", inline uint32_t float_mantissa(float value)) { 512 return FloatMantissa(value); 513 } 514 515 uint32_t DoubleSign(double value); 516 VIXL_DEPRECATED("DoubleSign", inline uint32_t double_sign(double value)) { 517 return DoubleSign(value); 518 } 519 520 uint32_t DoubleExp(double value); 521 VIXL_DEPRECATED("DoubleExp", inline uint32_t double_exp(double value)) { 522 return DoubleExp(value); 523 } 524 525 uint64_t DoubleMantissa(double value); 526 VIXL_DEPRECATED("DoubleMantissa", 527 inline uint64_t double_mantissa(double value)) { 528 return DoubleMantissa(value); 529 } 530 531 internal::SimFloat16 Float16Pack(uint16_t sign, 532 uint16_t exp, 533 uint16_t mantissa); 534 535 float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa); 536 VIXL_DEPRECATED("FloatPack", 537 inline float float_pack(uint32_t sign, 538 uint32_t exp, 539 uint32_t mantissa)) { 540 return FloatPack(sign, exp, mantissa); 541 } 542 543 double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa); 544 VIXL_DEPRECATED("DoublePack", 545 inline double double_pack(uint32_t sign, 546 uint32_t exp, 547 uint64_t mantissa)) { 548 return DoublePack(sign, exp, mantissa); 549 } 550 551 // An fpclassify() function for 16-bit half-precision floats. 552 int Float16Classify(Float16 value); 553 VIXL_DEPRECATED("Float16Classify", inline int float16classify(uint16_t value)) { 554 return Float16Classify(RawbitsToFloat16(value)); 555 } 556 557 bool IsZero(Float16 value); 558 559 inline bool IsPositiveZero(double value) { 560 return (value == 0.0) && (copysign(1.0, value) > 0.0); 561 } 562 563 inline bool IsNaN(float value) { return std::isnan(value); } 564 565 inline bool IsNaN(double value) { return std::isnan(value); } 566 567 inline bool IsNaN(Float16 value) { return Float16Classify(value) == FP_NAN; } 568 569 inline bool IsInf(float value) { return std::isinf(value); } 570 571 inline bool IsInf(double value) { return std::isinf(value); } 572 573 inline bool IsInf(Float16 value) { 574 return Float16Classify(value) == FP_INFINITE; 575 } 576 577 578 // NaN tests. 579 inline bool IsSignallingNaN(double num) { 580 const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000); 581 uint64_t raw = DoubleToRawbits(num); 582 if (IsNaN(num) && ((raw & kFP64QuietNaNMask) == 0)) { 583 return true; 584 } 585 return false; 586 } 587 588 589 inline bool IsSignallingNaN(float num) { 590 const uint32_t kFP32QuietNaNMask = 0x00400000; 591 uint32_t raw = FloatToRawbits(num); 592 if (IsNaN(num) && ((raw & kFP32QuietNaNMask) == 0)) { 593 return true; 594 } 595 return false; 596 } 597 598 599 inline bool IsSignallingNaN(Float16 num) { 600 const uint16_t kFP16QuietNaNMask = 0x0200; 601 return IsNaN(num) && ((Float16ToRawbits(num) & kFP16QuietNaNMask) == 0); 602 } 603 604 605 template <typename T> 606 inline bool IsQuietNaN(T num) { 607 return IsNaN(num) && !IsSignallingNaN(num); 608 } 609 610 611 // Convert the NaN in 'num' to a quiet NaN. 612 inline double ToQuietNaN(double num) { 613 const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000); 614 VIXL_ASSERT(IsNaN(num)); 615 return RawbitsToDouble(DoubleToRawbits(num) | kFP64QuietNaNMask); 616 } 617 618 619 inline float ToQuietNaN(float num) { 620 const uint32_t kFP32QuietNaNMask = 0x00400000; 621 VIXL_ASSERT(IsNaN(num)); 622 return RawbitsToFloat(FloatToRawbits(num) | kFP32QuietNaNMask); 623 } 624 625 626 inline internal::SimFloat16 ToQuietNaN(internal::SimFloat16 num) { 627 const uint16_t kFP16QuietNaNMask = 0x0200; 628 VIXL_ASSERT(IsNaN(num)); 629 return internal::SimFloat16( 630 RawbitsToFloat16(Float16ToRawbits(num) | kFP16QuietNaNMask)); 631 } 632 633 634 // Fused multiply-add. 635 inline double FusedMultiplyAdd(double op1, double op2, double a) { 636 return fma(op1, op2, a); 637 } 638 639 640 inline float FusedMultiplyAdd(float op1, float op2, float a) { 641 return fmaf(op1, op2, a); 642 } 643 644 645 inline uint64_t LowestSetBit(uint64_t value) { 646 return value & UnsignedNegate(value); 647 } 648 649 650 template <typename T> 651 inline int HighestSetBitPosition(T value) { 652 VIXL_ASSERT(value != 0); 653 return (sizeof(value) * 8 - 1) - CountLeadingZeros(value); 654 } 655 656 657 template <typename V> 658 inline int WhichPowerOf2(V value) { 659 VIXL_ASSERT(IsPowerOf2(value)); 660 return CountTrailingZeros(value); 661 } 662 663 664 unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size); 665 666 667 int BitCount(uint64_t value); 668 669 670 template <typename T> 671 T ReverseBits(T value) { 672 VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) || 673 (sizeof(value) == 4) || (sizeof(value) == 8)); 674 T result = 0; 675 for (unsigned i = 0; i < (sizeof(value) * 8); i++) { 676 result = (result << 1) | (value & 1); 677 value >>= 1; 678 } 679 return result; 680 } 681 682 683 template <typename T> 684 inline T SignExtend(T val, int size_in_bits) { 685 VIXL_ASSERT(size_in_bits > 0); 686 T mask = (T(2) << (size_in_bits - 1)) - T(1); 687 val &= mask; 688 T sign_bits = -((val >> (size_in_bits - 1)) << size_in_bits); 689 val |= sign_bits; 690 return val; 691 } 692 693 694 template <typename T> 695 T ReverseBytes(T value, int block_bytes_log2) { 696 VIXL_ASSERT((sizeof(value) == 4) || (sizeof(value) == 8)); 697 VIXL_ASSERT((1U << block_bytes_log2) <= sizeof(value)); 698 // Split the 64-bit value into an 8-bit array, where b[0] is the least 699 // significant byte, and b[7] is the most significant. 700 uint8_t bytes[8]; 701 uint64_t mask = UINT64_C(0xff00000000000000); 702 for (int i = 7; i >= 0; i--) { 703 bytes[i] = (static_cast<uint64_t>(value) & mask) >> (i * 8); 704 mask >>= 8; 705 } 706 707 // Permutation tables for REV instructions. 708 // permute_table[0] is used by REV16_x, REV16_w 709 // permute_table[1] is used by REV32_x, REV_w 710 // permute_table[2] is used by REV_x 711 VIXL_ASSERT((0 < block_bytes_log2) && (block_bytes_log2 < 4)); 712 static const uint8_t permute_table[3][8] = {{6, 7, 4, 5, 2, 3, 0, 1}, 713 {4, 5, 6, 7, 0, 1, 2, 3}, 714 {0, 1, 2, 3, 4, 5, 6, 7}}; 715 uint64_t temp = 0; 716 for (int i = 0; i < 8; i++) { 717 temp <<= 8; 718 temp |= bytes[permute_table[block_bytes_log2 - 1][i]]; 719 } 720 721 T result; 722 VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(temp)); 723 memcpy(&result, &temp, sizeof(result)); 724 return result; 725 } 726 727 template <unsigned MULTIPLE, typename T> 728 inline bool IsMultiple(T value) { 729 VIXL_ASSERT(IsPowerOf2(MULTIPLE)); 730 return (value & (MULTIPLE - 1)) == 0; 731 } 732 733 template <typename T> 734 inline bool IsMultiple(T value, unsigned multiple) { 735 VIXL_ASSERT(IsPowerOf2(multiple)); 736 return (value & (multiple - 1)) == 0; 737 } 738 739 template <typename T> 740 inline bool IsAligned(T pointer, int alignment) { 741 VIXL_ASSERT(IsPowerOf2(alignment)); 742 return (pointer & (alignment - 1)) == 0; 743 } 744 745 // Pointer alignment 746 // TODO: rename/refactor to make it specific to instructions. 747 template <unsigned ALIGN, typename T> 748 inline bool IsAligned(T pointer) { 749 VIXL_ASSERT(sizeof(pointer) == sizeof(intptr_t)); // NOLINT(runtime/sizeof) 750 // Use C-style casts to get static_cast behaviour for integral types (T), and 751 // reinterpret_cast behaviour for other types. 752 return IsAligned((intptr_t)(pointer), ALIGN); 753 } 754 755 template <typename T> 756 bool IsWordAligned(T pointer) { 757 return IsAligned<4>(pointer); 758 } 759 760 // Increment a pointer until it has the specified alignment. The alignment must 761 // be a power of two. 762 template <class T> 763 T AlignUp(T pointer, 764 typename Unsigned<sizeof(T) * kBitsPerByte>::type alignment) { 765 VIXL_ASSERT(IsPowerOf2(alignment)); 766 // Use C-style casts to get static_cast behaviour for integral types (T), and 767 // reinterpret_cast behaviour for other types. 768 769 typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw = 770 (typename Unsigned<sizeof(T) * kBitsPerByte>::type) pointer; 771 VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); 772 773 size_t mask = alignment - 1; 774 T result = (T)((pointer_raw + mask) & ~mask); 775 VIXL_ASSERT(result >= pointer); 776 777 return result; 778 } 779 780 // Decrement a pointer until it has the specified alignment. The alignment must 781 // be a power of two. 782 template <class T> 783 T AlignDown(T pointer, 784 typename Unsigned<sizeof(T) * kBitsPerByte>::type alignment) { 785 VIXL_ASSERT(IsPowerOf2(alignment)); 786 // Use C-style casts to get static_cast behaviour for integral types (T), and 787 // reinterpret_cast behaviour for other types. 788 789 typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw = 790 (typename Unsigned<sizeof(T) * kBitsPerByte>::type) pointer; 791 VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); 792 793 size_t mask = alignment - 1; 794 return (T)(pointer_raw & ~mask); 795 } 796 797 798 template <typename T> 799 inline T ExtractBit(T value, unsigned bit) { 800 return (value >> bit) & T(1); 801 } 802 803 template <typename Ts, typename Td> 804 inline Td ExtractBits(Ts value, int least_significant_bit, Td mask) { 805 return Td((value >> least_significant_bit) & Ts(mask)); 806 } 807 808 template <typename Ts, typename Td> 809 inline void AssignBit(Td& dst, // NOLINT(runtime/references) 810 int bit, 811 Ts value) { 812 VIXL_ASSERT((value == Ts(0)) || (value == Ts(1))); 813 VIXL_ASSERT(bit >= 0); 814 VIXL_ASSERT(bit < static_cast<int>(sizeof(Td) * 8)); 815 Td mask(1); 816 dst &= ~(mask << bit); 817 dst |= Td(value) << bit; 818 } 819 820 template <typename Td, typename Ts> 821 inline void AssignBits(Td& dst, // NOLINT(runtime/references) 822 int least_significant_bit, 823 Ts mask, 824 Ts value) { 825 VIXL_ASSERT(least_significant_bit >= 0); 826 VIXL_ASSERT(least_significant_bit < static_cast<int>(sizeof(Td) * 8)); 827 VIXL_ASSERT(((Td(mask) << least_significant_bit) >> least_significant_bit) == 828 Td(mask)); 829 VIXL_ASSERT((value & mask) == value); 830 dst &= ~(Td(mask) << least_significant_bit); 831 dst |= Td(value) << least_significant_bit; 832 } 833 834 class VFP { 835 public: 836 static uint32_t FP32ToImm8(float imm) { 837 // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000 838 uint32_t bits = FloatToRawbits(imm); 839 // bit7: a000.0000 840 uint32_t bit7 = ((bits >> 31) & 0x1) << 7; 841 // bit6: 0b00.0000 842 uint32_t bit6 = ((bits >> 29) & 0x1) << 6; 843 // bit5_to_0: 00cd.efgh 844 uint32_t bit5_to_0 = (bits >> 19) & 0x3f; 845 return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0); 846 } 847 static uint32_t FP64ToImm8(double imm) { 848 // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 849 // 0000.0000.0000.0000.0000.0000.0000.0000 850 uint64_t bits = DoubleToRawbits(imm); 851 // bit7: a000.0000 852 uint64_t bit7 = ((bits >> 63) & 0x1) << 7; 853 // bit6: 0b00.0000 854 uint64_t bit6 = ((bits >> 61) & 0x1) << 6; 855 // bit5_to_0: 00cd.efgh 856 uint64_t bit5_to_0 = (bits >> 48) & 0x3f; 857 858 return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0); 859 } 860 static float Imm8ToFP32(uint32_t imm8) { 861 // Imm8: abcdefgh (8 bits) 862 // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits) 863 // where B is b ^ 1 864 uint32_t bits = imm8; 865 uint32_t bit7 = (bits >> 7) & 0x1; 866 uint32_t bit6 = (bits >> 6) & 0x1; 867 uint32_t bit5_to_0 = bits & 0x3f; 868 uint32_t result = (bit7 << 31) | ((32 - bit6) << 25) | (bit5_to_0 << 19); 869 870 return RawbitsToFloat(result); 871 } 872 static double Imm8ToFP64(uint32_t imm8) { 873 // Imm8: abcdefgh (8 bits) 874 // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 875 // 0000.0000.0000.0000.0000.0000.0000.0000 (64 bits) 876 // where B is b ^ 1 877 uint32_t bits = imm8; 878 uint64_t bit7 = (bits >> 7) & 0x1; 879 uint64_t bit6 = (bits >> 6) & 0x1; 880 uint64_t bit5_to_0 = bits & 0x3f; 881 uint64_t result = (bit7 << 63) | ((256 - bit6) << 54) | (bit5_to_0 << 48); 882 return RawbitsToDouble(result); 883 } 884 static bool IsImmFP32(float imm) { 885 // Valid values will have the form: 886 // aBbb.bbbc.defg.h000.0000.0000.0000.0000 887 uint32_t bits = FloatToRawbits(imm); 888 // bits[19..0] are cleared. 889 if ((bits & 0x7ffff) != 0) { 890 return false; 891 } 892 893 894 // bits[29..25] are all set or all cleared. 895 uint32_t b_pattern = (bits >> 16) & 0x3e00; 896 if (b_pattern != 0 && b_pattern != 0x3e00) { 897 return false; 898 } 899 // bit[30] and bit[29] are opposite. 900 if (((bits ^ (bits << 1)) & 0x40000000) == 0) { 901 return false; 902 } 903 return true; 904 } 905 static bool IsImmFP64(double imm) { 906 // Valid values will have the form: 907 // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 908 // 0000.0000.0000.0000.0000.0000.0000.0000 909 uint64_t bits = DoubleToRawbits(imm); 910 // bits[47..0] are cleared. 911 if ((bits & 0x0000ffffffffffff) != 0) { 912 return false; 913 } 914 // bits[61..54] are all set or all cleared. 915 uint32_t b_pattern = (bits >> 48) & 0x3fc0; 916 if ((b_pattern != 0) && (b_pattern != 0x3fc0)) { 917 return false; 918 } 919 // bit[62] and bit[61] are opposite. 920 if (((bits ^ (bits << 1)) & (UINT64_C(1) << 62)) == 0) { 921 return false; 922 } 923 return true; 924 } 925 }; 926 927 class BitField { 928 // ForEachBitHelper is a functor that will call 929 // bool ForEachBitHelper::execute(ElementType id) const 930 // and expects a boolean in return whether to continue (if true) 931 // or stop (if false) 932 // check_set will check if the bits are on (true) or off(false) 933 template <typename ForEachBitHelper, bool check_set> 934 bool ForEachBit(const ForEachBitHelper& helper) { 935 for (int i = 0; static_cast<size_t>(i) < bitfield_.size(); i++) { 936 if (bitfield_[i] == check_set) 937 if (!helper.execute(i)) return false; 938 } 939 return true; 940 } 941 942 public: 943 #ifndef PANDA_BUILD 944 explicit BitField(unsigned size) : bitfield_(size, 0) {} 945 #else 946 explicit BitField(unsigned size) = delete; 947 explicit BitField(PandaAllocator* allocator, unsigned size) : bitfield_(size, 0, AllocatorWrapper(allocator).Adapter()) {} 948 #endif 949 950 void Set(int i) { 951 VIXL_ASSERT((i >= 0) && (static_cast<size_t>(i) < bitfield_.size())); 952 bitfield_[i] = true; 953 } 954 955 void Unset(int i) { 956 VIXL_ASSERT((i >= 0) && (static_cast<size_t>(i) < bitfield_.size())); 957 bitfield_[i] = true; 958 } 959 960 bool IsSet(int i) const { return bitfield_[i]; } 961 962 // For each bit not set in the bitfield call the execute functor 963 // execute. 964 // ForEachBitSetHelper::execute returns true if the iteration through 965 // the bits can continue, otherwise it will stop. 966 // struct ForEachBitSetHelper { 967 // bool execute(int /*id*/) { return false; } 968 // }; 969 template <typename ForEachBitNotSetHelper> 970 bool ForEachBitNotSet(const ForEachBitNotSetHelper& helper) { 971 return ForEachBit<ForEachBitNotSetHelper, false>(helper); 972 } 973 974 // For each bit set in the bitfield call the execute functor 975 // execute. 976 template <typename ForEachBitSetHelper> 977 bool ForEachBitSet(const ForEachBitSetHelper& helper) { 978 return ForEachBit<ForEachBitSetHelper, true>(helper); 979 } 980 981 private: 982 #ifndef PANDA_BUILD 983 std::vector<bool> bitfield_; 984 #else 985 Vector<bool> bitfield_; 986 #endif 987 }; 988 989 namespace internal { 990 991 typedef int64_t Int64; 992 class Uint64; 993 class Uint128; 994 995 class Uint32 { 996 uint32_t data_; 997 998 public: 999 // Unlike uint32_t, Uint32 has a default constructor. 1000 Uint32() { data_ = 0; } 1001 explicit Uint32(uint32_t data) : data_(data) {} 1002 inline explicit Uint32(Uint64 data); 1003 uint32_t Get() const { return data_; } 1004 template <int N> 1005 int32_t GetSigned() const { 1006 return ExtractSignedBitfield32(N - 1, 0, data_); 1007 } 1008 int32_t GetSigned() const { return data_; } 1009 Uint32 operator~() const { return Uint32(~data_); } 1010 Uint32 operator-() const { return Uint32(UnsignedNegate(data_)); } 1011 bool operator==(Uint32 value) const { return data_ == value.data_; } 1012 bool operator!=(Uint32 value) const { return data_ != value.data_; } 1013 bool operator>(Uint32 value) const { return data_ > value.data_; } 1014 Uint32 operator+(Uint32 value) const { return Uint32(data_ + value.data_); } 1015 Uint32 operator-(Uint32 value) const { return Uint32(data_ - value.data_); } 1016 Uint32 operator&(Uint32 value) const { return Uint32(data_ & value.data_); } 1017 Uint32 operator&=(Uint32 value) { 1018 data_ &= value.data_; 1019 return *this; 1020 } 1021 Uint32 operator^(Uint32 value) const { return Uint32(data_ ^ value.data_); } 1022 Uint32 operator^=(Uint32 value) { 1023 data_ ^= value.data_; 1024 return *this; 1025 } 1026 Uint32 operator|(Uint32 value) const { return Uint32(data_ | value.data_); } 1027 Uint32 operator|=(Uint32 value) { 1028 data_ |= value.data_; 1029 return *this; 1030 } 1031 // Unlike uint32_t, the shift functions can accept negative shift and 1032 // return 0 when the shift is too big. 1033 Uint32 operator>>(int shift) const { 1034 if (shift == 0) return *this; 1035 if (shift < 0) { 1036 int tmp = -shift; 1037 if (tmp >= 32) return Uint32(0); 1038 return Uint32(data_ << tmp); 1039 } 1040 int tmp = shift; 1041 if (tmp >= 32) return Uint32(0); 1042 return Uint32(data_ >> tmp); 1043 } 1044 Uint32 operator<<(int shift) const { 1045 if (shift == 0) return *this; 1046 if (shift < 0) { 1047 int tmp = -shift; 1048 if (tmp >= 32) return Uint32(0); 1049 return Uint32(data_ >> tmp); 1050 } 1051 int tmp = shift; 1052 if (tmp >= 32) return Uint32(0); 1053 return Uint32(data_ << tmp); 1054 } 1055 }; 1056 1057 class Uint64 { 1058 uint64_t data_; 1059 1060 public: 1061 // Unlike uint64_t, Uint64 has a default constructor. 1062 Uint64() { data_ = 0; } 1063 explicit Uint64(uint64_t data) : data_(data) {} 1064 explicit Uint64(Uint32 data) : data_(data.Get()) {} 1065 inline explicit Uint64(Uint128 data); 1066 uint64_t Get() const { return data_; } 1067 int64_t GetSigned(int N) const { 1068 return ExtractSignedBitfield64(N - 1, 0, data_); 1069 } 1070 int64_t GetSigned() const { return data_; } 1071 Uint32 ToUint32() const { 1072 VIXL_ASSERT((data_ >> 32) == 0); 1073 return Uint32(static_cast<uint32_t>(data_)); 1074 } 1075 Uint32 GetHigh32() const { return Uint32(data_ >> 32); } 1076 Uint32 GetLow32() const { return Uint32(data_ & 0xffffffff); } 1077 Uint64 operator~() const { return Uint64(~data_); } 1078 Uint64 operator-() const { return Uint64(UnsignedNegate(data_)); } 1079 bool operator==(Uint64 value) const { return data_ == value.data_; } 1080 bool operator!=(Uint64 value) const { return data_ != value.data_; } 1081 Uint64 operator+(Uint64 value) const { return Uint64(data_ + value.data_); } 1082 Uint64 operator-(Uint64 value) const { return Uint64(data_ - value.data_); } 1083 Uint64 operator&(Uint64 value) const { return Uint64(data_ & value.data_); } 1084 Uint64 operator&=(Uint64 value) { 1085 data_ &= value.data_; 1086 return *this; 1087 } 1088 Uint64 operator^(Uint64 value) const { return Uint64(data_ ^ value.data_); } 1089 Uint64 operator^=(Uint64 value) { 1090 data_ ^= value.data_; 1091 return *this; 1092 } 1093 Uint64 operator|(Uint64 value) const { return Uint64(data_ | value.data_); } 1094 Uint64 operator|=(Uint64 value) { 1095 data_ |= value.data_; 1096 return *this; 1097 } 1098 // Unlike uint64_t, the shift functions can accept negative shift and 1099 // return 0 when the shift is too big. 1100 Uint64 operator>>(int shift) const { 1101 if (shift == 0) return *this; 1102 if (shift < 0) { 1103 int tmp = -shift; 1104 if (tmp >= 64) return Uint64(0); 1105 return Uint64(data_ << tmp); 1106 } 1107 int tmp = shift; 1108 if (tmp >= 64) return Uint64(0); 1109 return Uint64(data_ >> tmp); 1110 } 1111 Uint64 operator<<(int shift) const { 1112 if (shift == 0) return *this; 1113 if (shift < 0) { 1114 int tmp = -shift; 1115 if (tmp >= 64) return Uint64(0); 1116 return Uint64(data_ >> tmp); 1117 } 1118 int tmp = shift; 1119 if (tmp >= 64) return Uint64(0); 1120 return Uint64(data_ << tmp); 1121 } 1122 }; 1123 1124 class Uint128 { 1125 uint64_t data_high_; 1126 uint64_t data_low_; 1127 1128 public: 1129 Uint128() : data_high_(0), data_low_(0) {} 1130 explicit Uint128(uint64_t data_low) : data_high_(0), data_low_(data_low) {} 1131 explicit Uint128(Uint64 data_low) 1132 : data_high_(0), data_low_(data_low.Get()) {} 1133 Uint128(uint64_t data_high, uint64_t data_low) 1134 : data_high_(data_high), data_low_(data_low) {} 1135 Uint64 ToUint64() const { 1136 VIXL_ASSERT(data_high_ == 0); 1137 return Uint64(data_low_); 1138 } 1139 Uint64 GetHigh64() const { return Uint64(data_high_); } 1140 Uint64 GetLow64() const { return Uint64(data_low_); } 1141 Uint128 operator~() const { return Uint128(~data_high_, ~data_low_); } 1142 bool operator==(Uint128 value) const { 1143 return (data_high_ == value.data_high_) && (data_low_ == value.data_low_); 1144 } 1145 Uint128 operator&(Uint128 value) const { 1146 return Uint128(data_high_ & value.data_high_, data_low_ & value.data_low_); 1147 } 1148 Uint128 operator&=(Uint128 value) { 1149 data_high_ &= value.data_high_; 1150 data_low_ &= value.data_low_; 1151 return *this; 1152 } 1153 Uint128 operator|=(Uint128 value) { 1154 data_high_ |= value.data_high_; 1155 data_low_ |= value.data_low_; 1156 return *this; 1157 } 1158 Uint128 operator>>(int shift) const { 1159 VIXL_ASSERT((shift >= 0) && (shift < 128)); 1160 if (shift == 0) return *this; 1161 if (shift >= 64) { 1162 return Uint128(0, data_high_ >> (shift - 64)); 1163 } 1164 uint64_t tmp = (data_high_ << (64 - shift)) | (data_low_ >> shift); 1165 return Uint128(data_high_ >> shift, tmp); 1166 } 1167 Uint128 operator<<(int shift) const { 1168 VIXL_ASSERT((shift >= 0) && (shift < 128)); 1169 if (shift == 0) return *this; 1170 if (shift >= 64) { 1171 return Uint128(data_low_ << (shift - 64), 0); 1172 } 1173 uint64_t tmp = (data_high_ << shift) | (data_low_ >> (64 - shift)); 1174 return Uint128(tmp, data_low_ << shift); 1175 } 1176 }; 1177 1178 Uint32::Uint32(Uint64 data) : data_(data.ToUint32().Get()) {} 1179 Uint64::Uint64(Uint128 data) : data_(data.ToUint64().Get()) {} 1180 1181 Int64 BitCount(Uint32 value); 1182 1183 // The algorithm used is adapted from the one described in section 8.2 of 1184 // Hacker's Delight, by Henry S. Warren, Jr. 1185 template <unsigned N, typename T> 1186 int64_t MultiplyHigh(T u, T v) { 1187 uint64_t u0, v0, w0, u1, v1, w1, w2, t; 1188 VIXL_STATIC_ASSERT((N == 8) || (N == 16) || (N == 32) || (N == 64)); 1189 uint64_t sign_mask = UINT64_C(1) << (N - 1); 1190 uint64_t sign_ext = 0; 1191 unsigned half_bits = N / 2; 1192 uint64_t half_mask = GetUintMask(half_bits); 1193 if (std::numeric_limits<T>::is_signed) { 1194 sign_ext = UINT64_C(0xffffffffffffffff) << half_bits; 1195 } 1196 1197 VIXL_ASSERT(sizeof(u) == sizeof(uint64_t)); 1198 VIXL_ASSERT(sizeof(u) == sizeof(u0)); 1199 1200 u0 = u & half_mask; 1201 u1 = u >> half_bits | (((u & sign_mask) != 0) ? sign_ext : 0); 1202 v0 = v & half_mask; 1203 v1 = v >> half_bits | (((v & sign_mask) != 0) ? sign_ext : 0); 1204 1205 w0 = u0 * v0; 1206 t = u1 * v0 + (w0 >> half_bits); 1207 1208 w1 = t & half_mask; 1209 w2 = t >> half_bits | (((t & sign_mask) != 0) ? sign_ext : 0); 1210 w1 = u0 * v1 + w1; 1211 w1 = w1 >> half_bits | (((w1 & sign_mask) != 0) ? sign_ext : 0); 1212 1213 uint64_t value = u1 * v1 + w2 + w1; 1214 int64_t result; 1215 memcpy(&result, &value, sizeof(result)); 1216 return result; 1217 } 1218 1219 } // namespace internal 1220 1221 // The default NaN values (for FPCR.DN=1). 1222 extern const double kFP64DefaultNaN; 1223 extern const float kFP32DefaultNaN; 1224 extern const Float16 kFP16DefaultNaN; 1225 1226 // Floating-point infinity values. 1227 extern const Float16 kFP16PositiveInfinity; 1228 extern const Float16 kFP16NegativeInfinity; 1229 extern const float kFP32PositiveInfinity; 1230 extern const float kFP32NegativeInfinity; 1231 extern const double kFP64PositiveInfinity; 1232 extern const double kFP64NegativeInfinity; 1233 1234 // Floating-point zero values. 1235 extern const Float16 kFP16PositiveZero; 1236 extern const Float16 kFP16NegativeZero; 1237 1238 // AArch64 floating-point specifics. These match IEEE-754. 1239 const unsigned kDoubleMantissaBits = 52; 1240 const unsigned kDoubleExponentBits = 11; 1241 const unsigned kFloatMantissaBits = 23; 1242 const unsigned kFloatExponentBits = 8; 1243 const unsigned kFloat16MantissaBits = 10; 1244 const unsigned kFloat16ExponentBits = 5; 1245 1246 enum FPRounding { 1247 // The first four values are encodable directly by FPCR<RMode>. 1248 FPTieEven = 0x0, 1249 FPPositiveInfinity = 0x1, 1250 FPNegativeInfinity = 0x2, 1251 FPZero = 0x3, 1252 1253 // The final rounding modes are only available when explicitly specified by 1254 // the instruction (such as with fcvta). It cannot be set in FPCR. 1255 FPTieAway, 1256 FPRoundOdd 1257 }; 1258 1259 enum UseDefaultNaN { kUseDefaultNaN, kIgnoreDefaultNaN }; 1260 1261 // Assemble the specified IEEE-754 components into the target type and apply 1262 // appropriate rounding. 1263 // sign: 0 = positive, 1 = negative 1264 // exponent: Unbiased IEEE-754 exponent. 1265 // mantissa: The mantissa of the input. The top bit (which is not encoded for 1266 // normal IEEE-754 values) must not be omitted. This bit has the 1267 // value 'pow(2, exponent)'. 1268 // 1269 // The input value is assumed to be a normalized value. That is, the input may 1270 // not be infinity or NaN. If the source value is subnormal, it must be 1271 // normalized before calling this function such that the highest set bit in the 1272 // mantissa has the value 'pow(2, exponent)'. 1273 // 1274 // Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than 1275 // calling a templated FPRound. 1276 template <class T, int ebits, int mbits> 1277 T FPRound(int64_t sign, 1278 int64_t exponent, 1279 uint64_t mantissa, 1280 FPRounding round_mode) { 1281 VIXL_ASSERT((sign == 0) || (sign == 1)); 1282 1283 // Only FPTieEven and FPRoundOdd rounding modes are implemented. 1284 VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); 1285 1286 // Rounding can promote subnormals to normals, and normals to infinities. For 1287 // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be 1288 // encodable as a float, but rounding based on the low-order mantissa bits 1289 // could make it overflow. With ties-to-even rounding, this value would become 1290 // an infinity. 1291 1292 // ---- Rounding Method ---- 1293 // 1294 // The exponent is irrelevant in the rounding operation, so we treat the 1295 // lowest-order bit that will fit into the result ('onebit') as having 1296 // the value '1'. Similarly, the highest-order bit that won't fit into 1297 // the result ('halfbit') has the value '0.5'. The 'point' sits between 1298 // 'onebit' and 'halfbit': 1299 // 1300 // These bits fit into the result. 1301 // |---------------------| 1302 // mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 1303 // || 1304 // / | 1305 // / halfbit 1306 // onebit 1307 // 1308 // For subnormal outputs, the range of representable bits is smaller and 1309 // the position of onebit and halfbit depends on the exponent of the 1310 // input, but the method is otherwise similar. 1311 // 1312 // onebit(frac) 1313 // | 1314 // | halfbit(frac) halfbit(adjusted) 1315 // | / / 1316 // | | | 1317 // 0b00.0 (exact) -> 0b00.0 (exact) -> 0b00 1318 // 0b00.0... -> 0b00.0... -> 0b00 1319 // 0b00.1 (exact) -> 0b00.0111..111 -> 0b00 1320 // 0b00.1... -> 0b00.1... -> 0b01 1321 // 0b01.0 (exact) -> 0b01.0 (exact) -> 0b01 1322 // 0b01.0... -> 0b01.0... -> 0b01 1323 // 0b01.1 (exact) -> 0b01.1 (exact) -> 0b10 1324 // 0b01.1... -> 0b01.1... -> 0b10 1325 // 0b10.0 (exact) -> 0b10.0 (exact) -> 0b10 1326 // 0b10.0... -> 0b10.0... -> 0b10 1327 // 0b10.1 (exact) -> 0b10.0111..111 -> 0b10 1328 // 0b10.1... -> 0b10.1... -> 0b11 1329 // 0b11.0 (exact) -> 0b11.0 (exact) -> 0b11 1330 // ... / | / | 1331 // / | / | 1332 // / | 1333 // adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / | 1334 // 1335 // mantissa = (mantissa >> shift) + halfbit(adjusted); 1336 1337 static const int mantissa_offset = 0; 1338 static const int exponent_offset = mantissa_offset + mbits; 1339 static const int sign_offset = exponent_offset + ebits; 1340 VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1)); 1341 1342 // Bail out early for zero inputs. 1343 if (mantissa == 0) { 1344 return static_cast<T>(sign << sign_offset); 1345 } 1346 1347 // If all bits in the exponent are set, the value is infinite or NaN. 1348 // This is true for all binary IEEE-754 formats. 1349 static const int infinite_exponent = (1 << ebits) - 1; 1350 static const int max_normal_exponent = infinite_exponent - 1; 1351 1352 // Apply the exponent bias to encode it for the result. Doing this early makes 1353 // it easy to detect values that will be infinite or subnormal. 1354 exponent += max_normal_exponent >> 1; 1355 1356 if (exponent > max_normal_exponent) { 1357 // Overflow: the input is too large for the result type to represent. 1358 if (round_mode == FPTieEven) { 1359 // FPTieEven rounding mode handles overflows using infinities. 1360 exponent = infinite_exponent; 1361 mantissa = 0; 1362 } else { 1363 VIXL_ASSERT(round_mode == FPRoundOdd); 1364 // FPRoundOdd rounding mode handles overflows using the largest magnitude 1365 // normal number. 1366 exponent = max_normal_exponent; 1367 mantissa = (UINT64_C(1) << exponent_offset) - 1; 1368 } 1369 return static_cast<T>((sign << sign_offset) | 1370 (exponent << exponent_offset) | 1371 (mantissa << mantissa_offset)); 1372 } 1373 1374 // Calculate the shift required to move the top mantissa bit to the proper 1375 // place in the destination type. 1376 const int highest_significant_bit = 63 - CountLeadingZeros(mantissa); 1377 int shift = highest_significant_bit - mbits; 1378 1379 if (exponent <= 0) { 1380 // The output will be subnormal (before rounding). 1381 // For subnormal outputs, the shift must be adjusted by the exponent. The +1 1382 // is necessary because the exponent of a subnormal value (encoded as 0) is 1383 // the same as the exponent of the smallest normal value (encoded as 1). 1384 shift += static_cast<int>(-exponent + 1); 1385 1386 // Handle inputs that would produce a zero output. 1387 // 1388 // Shifts higher than highest_significant_bit+1 will always produce a zero 1389 // result. A shift of exactly highest_significant_bit+1 might produce a 1390 // non-zero result after rounding. 1391 if (shift > (highest_significant_bit + 1)) { 1392 if (round_mode == FPTieEven) { 1393 // The result will always be +/-0.0. 1394 return static_cast<T>(sign << sign_offset); 1395 } else { 1396 VIXL_ASSERT(round_mode == FPRoundOdd); 1397 VIXL_ASSERT(mantissa != 0); 1398 // For FPRoundOdd, if the mantissa is too small to represent and 1399 // non-zero return the next "odd" value. 1400 return static_cast<T>((sign << sign_offset) | 1); 1401 } 1402 } 1403 1404 // Properly encode the exponent for a subnormal output. 1405 exponent = 0; 1406 } else { 1407 // Clear the topmost mantissa bit, since this is not encoded in IEEE-754 1408 // normal values. 1409 mantissa &= ~(UINT64_C(1) << highest_significant_bit); 1410 } 1411 1412 // The casts below are only well-defined for unsigned integers. 1413 VIXL_STATIC_ASSERT(std::numeric_limits<T>::is_integer); 1414 VIXL_STATIC_ASSERT(!std::numeric_limits<T>::is_signed); 1415 1416 if (shift > 0) { 1417 if (round_mode == FPTieEven) { 1418 // We have to shift the mantissa to the right. Some precision is lost, so 1419 // we need to apply rounding. 1420 uint64_t onebit_mantissa = (mantissa >> (shift)) & 1; 1421 uint64_t halfbit_mantissa = (mantissa >> (shift - 1)) & 1; 1422 uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa); 1423 uint64_t adjusted = mantissa - adjustment; 1424 T halfbit_adjusted = (adjusted >> (shift - 1)) & 1; 1425 1426 T result = 1427 static_cast<T>((sign << sign_offset) | (exponent << exponent_offset) | 1428 ((mantissa >> shift) << mantissa_offset)); 1429 1430 // A very large mantissa can overflow during rounding. If this happens, 1431 // the exponent should be incremented and the mantissa set to 1.0 1432 // (encoded as 0). Applying halfbit_adjusted after assembling the float 1433 // has the nice side-effect that this case is handled for free. 1434 // 1435 // This also handles cases where a very large finite value overflows to 1436 // infinity, or where a very large subnormal value overflows to become 1437 // normal. 1438 return result + halfbit_adjusted; 1439 } else { 1440 VIXL_ASSERT(round_mode == FPRoundOdd); 1441 // If any bits at position halfbit or below are set, onebit (ie. the 1442 // bottom bit of the resulting mantissa) must be set. 1443 uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1); 1444 if (fractional_bits != 0) { 1445 mantissa |= UINT64_C(1) << shift; 1446 } 1447 1448 return static_cast<T>((sign << sign_offset) | 1449 (exponent << exponent_offset) | 1450 ((mantissa >> shift) << mantissa_offset)); 1451 } 1452 } else { 1453 // We have to shift the mantissa to the left (or not at all). The input 1454 // mantissa is exactly representable in the output mantissa, so apply no 1455 // rounding correction. 1456 return static_cast<T>((sign << sign_offset) | 1457 (exponent << exponent_offset) | 1458 ((mantissa << -shift) << mantissa_offset)); 1459 } 1460 } 1461 1462 1463 // See FPRound for a description of this function. 1464 inline double FPRoundToDouble(int64_t sign, 1465 int64_t exponent, 1466 uint64_t mantissa, 1467 FPRounding round_mode) { 1468 uint64_t bits = 1469 FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign, 1470 exponent, 1471 mantissa, 1472 round_mode); 1473 return RawbitsToDouble(bits); 1474 } 1475 1476 1477 // See FPRound for a description of this function. 1478 inline Float16 FPRoundToFloat16(int64_t sign, 1479 int64_t exponent, 1480 uint64_t mantissa, 1481 FPRounding round_mode) { 1482 return RawbitsToFloat16( 1483 FPRound<uint16_t, kFloat16ExponentBits, kFloat16MantissaBits>( 1484 sign, exponent, mantissa, round_mode)); 1485 } 1486 1487 1488 // See FPRound for a description of this function. 1489 static inline float FPRoundToFloat(int64_t sign, 1490 int64_t exponent, 1491 uint64_t mantissa, 1492 FPRounding round_mode) { 1493 uint32_t bits = 1494 FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(sign, 1495 exponent, 1496 mantissa, 1497 round_mode); 1498 return RawbitsToFloat(bits); 1499 } 1500 1501 1502 float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception = NULL); 1503 float FPToFloat(double value, 1504 FPRounding round_mode, 1505 UseDefaultNaN DN, 1506 bool* exception = NULL); 1507 1508 double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception = NULL); 1509 double FPToDouble(float value, UseDefaultNaN DN, bool* exception = NULL); 1510 1511 Float16 FPToFloat16(float value, 1512 FPRounding round_mode, 1513 UseDefaultNaN DN, 1514 bool* exception = NULL); 1515 1516 Float16 FPToFloat16(double value, 1517 FPRounding round_mode, 1518 UseDefaultNaN DN, 1519 bool* exception = NULL); 1520 1521 // Like static_cast<T>(value), but with specialisations for the Float16 type. 1522 template <typename T, typename F> 1523 T StaticCastFPTo(F value) { 1524 return static_cast<T>(value); 1525 } 1526 1527 template <> 1528 inline float StaticCastFPTo<float, Float16>(Float16 value) { 1529 return FPToFloat(value, kIgnoreDefaultNaN); 1530 } 1531 1532 template <> 1533 inline double StaticCastFPTo<double, Float16>(Float16 value) { 1534 return FPToDouble(value, kIgnoreDefaultNaN); 1535 } 1536 1537 template <> 1538 inline Float16 StaticCastFPTo<Float16, float>(float value) { 1539 return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN); 1540 } 1541 1542 template <> 1543 inline Float16 StaticCastFPTo<Float16, double>(double value) { 1544 return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN); 1545 } 1546 1547 template <typename T> 1548 uint64_t FPToRawbitsWithSize(unsigned size_in_bits, T value) { 1549 switch (size_in_bits) { 1550 case 16: 1551 return Float16ToRawbits(StaticCastFPTo<Float16>(value)); 1552 case 32: 1553 return FloatToRawbits(StaticCastFPTo<float>(value)); 1554 case 64: 1555 return DoubleToRawbits(StaticCastFPTo<double>(value)); 1556 } 1557 VIXL_UNREACHABLE(); 1558 return 0; 1559 } 1560 1561 template <typename T> 1562 T RawbitsWithSizeToFP(unsigned size_in_bits, uint64_t value) { 1563 VIXL_ASSERT(IsUintN(size_in_bits, value)); 1564 switch (size_in_bits) { 1565 case 16: 1566 return StaticCastFPTo<T>(RawbitsToFloat16(static_cast<uint16_t>(value))); 1567 case 32: 1568 return StaticCastFPTo<T>(RawbitsToFloat(static_cast<uint32_t>(value))); 1569 case 64: 1570 return StaticCastFPTo<T>(RawbitsToDouble(value)); 1571 } 1572 VIXL_UNREACHABLE(); 1573 return 0; 1574 } 1575 1576 // Jenkins one-at-a-time hash, based on 1577 // https://en.wikipedia.org/wiki/Jenkins_hash_function citing 1578 // https://www.drdobbs.com/database/algorithm-alley/184410284. 1579 constexpr uint32_t Hash(const char* str, uint32_t hash = 0) { 1580 if (*str == '\0') { 1581 hash += hash << 3; 1582 hash ^= hash >> 11; 1583 hash += hash << 15; 1584 return hash; 1585 } else { 1586 hash += *str; 1587 hash += hash << 10; 1588 hash ^= hash >> 6; 1589 return Hash(str + 1, hash); 1590 } 1591 } 1592 1593 constexpr uint32_t operator"" _h(const char* x, size_t) { return Hash(x); } 1594 1595 } // namespace vixl 1596 1597 #endif // VIXL_UTILS_H 1598