• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifndef VIXL_UTILS_H
28 #define VIXL_UTILS_H
29 
30 #include <cmath>
31 #include <cstring>
32 #include <limits>
33 #include <type_traits>
34 #include <vector>
35 #include <optional>
36 
37 #include "compiler-intrinsics-vixl.h"
38 #include "globals-vixl.h"
39 
40 #if defined(VIXL_USE_PANDA_ALLOC) && !defined(PANDA_BUILD)
41 #error "PANDA_BUILD should be defined for VIXL_USE_PANDA_ALLOC"
42 #endif
43 
44 #ifdef VIXL_USE_PANDA_ALLOC
45 #include "mem/arena_allocator_stl_adapter.h"
46 #include "mem/arena_allocator.h"
47 #include "utils/arena_containers.h"
48 #else
49 #include <list>
50 #include <map>
51 #include <memory>
52 #include <string>
53 #include <unordered_map>
54 #include <unordered_set>
55 #include <vector>
56 #endif
57 
58 #if defined(PANDA_BUILD) && !defined(VIXL_USE_PANDA_ALLOC)
59 namespace ark {
60   template <bool> class ArenaAllocatorT;
61   using ArenaAllocator = ArenaAllocatorT<false>;
62 }
63 #endif
64 
65 namespace vixl {
66 #ifdef VIXL_USE_PANDA_ALLOC
67 template <typename T>
68 using List = ark::ArenaList<T>;
69 
70 template <typename K, typename V>
71 using Map = ark::ArenaMap<K, V>;
72 
73 template <typename K, typename V>
74 using UnorderedMap = ark::ArenaUnorderedMap<K, V>;
75 
76 template <typename K>
77 using UnorderedSet = ark::ArenaUnorderedSet<K>;
78 
79 using String = ark::ArenaString;
80 
81 template <typename T>
82 using Vector = ark::ArenaVector<T>;
83 #else
84 template <typename T>
85 using List = std::list<T>;
86 
87 template <typename K, typename V>
88 using Map = std::map<K, V>;
89 
90 template <typename K, typename V>
91 using UnorderedMap = std::unordered_map<K, V>;
92 
93 template <typename K>
94 using UnorderedSet = std::unordered_set<K>;
95 
96 using String = std::string;
97 
98 template <typename T>
99 using Vector = std::vector<T>;
100 #endif
101 
102 #ifdef PANDA_BUILD
103 using PandaAllocator = ark::ArenaAllocator;
104 #endif // PANDA_BUILD
105 
106 template <typename T>
107 struct is_unbounded_array : public std::false_type {};
108 
109 template <typename T>
110 struct is_unbounded_array<T[]> : public std::true_type {};
111 
112 template <typename T>
113 constexpr bool is_unbounded_array_v = is_unbounded_array<T>::value;
114 
115 class AllocatorWrapper {
116 public:
117 #ifndef PANDA_BUILD
118   AllocatorWrapper() = default;
119 #else // PANDA_BUILD
120   AllocatorWrapper([[maybe_unused]] PandaAllocator* allocator)
121 #ifdef VIXL_USE_PANDA_ALLOC
122     : allocator_(allocator)
123 #endif
124     {}
125 #endif // PANDA_BUILD
126 
127   auto Adapter() {
128 #ifdef VIXL_USE_PANDA_ALLOC
129     return allocator_->Adapter();
130 #else
131     return std::allocator<void>();
132 #endif
133   }
134 
135   template <typename T, typename... Args>
136   [[nodiscard]] std::enable_if_t<!std::is_array_v<T>, T*> New(Args&&... args) {
137 #ifdef VIXL_USE_PANDA_ALLOC
138     return allocator_->template New<T>(std::forward<Args>(args)...);
139 #else
140     return new T(std::forward<Args>(args)...);
141 #endif
142   }
143 
144   template <typename T>
145   [[nodiscard]] std::enable_if_t<is_unbounded_array_v<T>, std::remove_extent_t<T>*> New(size_t size) {
146 #ifdef VIXL_USE_PANDA_ALLOC
147     return allocator_->template New<T>(size);
148 #else
149     return new std::remove_extent_t<T>[size];
150 #endif
151   }
152 
153   [[nodiscard]] void* Alloc(size_t size) {
154 #ifdef VIXL_USE_PANDA_ALLOC
155     return allocator_->Alloc(size);
156 #else
157     return malloc(size);
158 #endif
159   }
160 
161   template <typename T>
162   void DeleteObject([[maybe_unused]] T* obj) {
163 #ifndef VIXL_USE_PANDA_ALLOC
164     delete obj;
165 #endif
166   }
167 
168   template <typename T>
169   void DeleteArray([[maybe_unused]] T* arr) {
170 #ifndef VIXL_USE_PANDA_ALLOC
171     delete[] arr;
172 #endif
173   }
174 
175   void Free([[maybe_unused]] void* ptr) {
176 #ifndef VIXL_USE_PANDA_ALLOC
177     free(ptr);
178 #endif
179   }
180 
181 private:
182 #ifdef VIXL_USE_PANDA_ALLOC
183   PandaAllocator* allocator_;
184 #endif
185 };
186 
187 // Macros for compile-time format checking.
188 #if GCC_VERSION_OR_NEWER(4, 4, 0)
189 #define PRINTF_CHECK(format_index, varargs_index) \
190   __attribute__((format(gnu_printf, format_index, varargs_index)))
191 #else
192 #define PRINTF_CHECK(format_index, varargs_index)
193 #endif
194 
195 #ifdef __GNUC__
196 #define VIXL_HAS_DEPRECATED_WITH_MSG
197 #elif defined(__clang__)
198 #ifdef __has_extension(attribute_deprecated_with_message)
199 #define VIXL_HAS_DEPRECATED_WITH_MSG
200 #endif
201 #endif
202 
203 #ifdef VIXL_HAS_DEPRECATED_WITH_MSG
204 #define VIXL_DEPRECATED(replaced_by, declarator) \
205   __attribute__((deprecated("Use \"" replaced_by "\" instead"))) declarator
206 #else
207 #define VIXL_DEPRECATED(replaced_by, declarator) declarator
208 #endif
209 
210 #ifdef VIXL_DEBUG
211 #define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_UNREACHABLE()
212 #else
213 #define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_FALLTHROUGH()
214 #endif
215 
216 template <typename T, size_t n>
217 constexpr size_t ArrayLength(const T (&)[n]) {
218   return n;
219 }
220 
221 inline uint64_t GetUintMask(unsigned bits) {
222   VIXL_ASSERT(bits <= 64);
223   uint64_t base = (bits >= 64) ? 0 : (UINT64_C(1) << bits);
224   return base - 1;
225 }
226 
227 inline uint64_t GetSignMask(unsigned bits) {
228   VIXL_ASSERT(bits <= 64);
229   return UINT64_C(1) << (bits - 1);
230 }
231 
232 // Check number width.
233 // TODO: Refactor these using templates.
234 inline bool IsIntN(unsigned n, uint32_t x) {
235   VIXL_ASSERT((0 < n) && (n <= 32));
236   return x <= static_cast<uint32_t>(INT32_MAX >> (32 - n));
237 }
238 inline bool IsIntN(unsigned n, int32_t x) {
239   VIXL_ASSERT((0 < n) && (n <= 32));
240   if (n == 32) return true;
241   int32_t limit = INT32_C(1) << (n - 1);
242   return (-limit <= x) && (x < limit);
243 }
244 inline bool IsIntN(unsigned n, uint64_t x) {
245   VIXL_ASSERT((0 < n) && (n <= 64));
246   return x <= static_cast<uint64_t>(INT64_MAX >> (64 - n));
247 }
248 inline bool IsIntN(unsigned n, int64_t x) {
249   VIXL_ASSERT((0 < n) && (n <= 64));
250   if (n == 64) return true;
251   int64_t limit = INT64_C(1) << (n - 1);
252   return (-limit <= x) && (x < limit);
253 }
254 VIXL_DEPRECATED("IsIntN", inline bool is_intn(unsigned n, int64_t x)) {
255   return IsIntN(n, x);
256 }
257 
258 inline bool IsUintN(unsigned n, uint32_t x) {
259   VIXL_ASSERT((0 < n) && (n <= 32));
260   if (n >= 32) return true;
261   return !(x >> n);
262 }
263 inline bool IsUintN(unsigned n, int32_t x) {
264   VIXL_ASSERT((0 < n) && (n < 32));
265   // Convert to an unsigned integer to avoid implementation-defined behavior.
266   return !(static_cast<uint32_t>(x) >> n);
267 }
268 inline bool IsUintN(unsigned n, uint64_t x) {
269   VIXL_ASSERT((0 < n) && (n <= 64));
270   if (n >= 64) return true;
271   return !(x >> n);
272 }
273 inline bool IsUintN(unsigned n, int64_t x) {
274   VIXL_ASSERT((0 < n) && (n < 64));
275   // Convert to an unsigned integer to avoid implementation-defined behavior.
276   return !(static_cast<uint64_t>(x) >> n);
277 }
278 VIXL_DEPRECATED("IsUintN", inline bool is_uintn(unsigned n, int64_t x)) {
279   return IsUintN(n, x);
280 }
281 
282 inline uint64_t TruncateToUintN(unsigned n, uint64_t x) {
283   VIXL_ASSERT((0 < n) && (n < 64));
284   return static_cast<uint64_t>(x) & ((UINT64_C(1) << n) - 1);
285 }
286 VIXL_DEPRECATED("TruncateToUintN",
287                 inline uint64_t truncate_to_intn(unsigned n, int64_t x)) {
288   return TruncateToUintN(n, x);
289 }
290 
291 // clang-format off
292 #define INT_1_TO_32_LIST(V)                                                    \
293 V(1)  V(2)  V(3)  V(4)  V(5)  V(6)  V(7)  V(8)                                 \
294 V(9)  V(10) V(11) V(12) V(13) V(14) V(15) V(16)                                \
295 V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24)                                \
296 V(25) V(26) V(27) V(28) V(29) V(30) V(31) V(32)
297 
298 #define INT_33_TO_63_LIST(V)                                                   \
299 V(33) V(34) V(35) V(36) V(37) V(38) V(39) V(40)                                \
300 V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48)                                \
301 V(49) V(50) V(51) V(52) V(53) V(54) V(55) V(56)                                \
302 V(57) V(58) V(59) V(60) V(61) V(62) V(63)
303 
304 #define INT_1_TO_63_LIST(V) INT_1_TO_32_LIST(V) INT_33_TO_63_LIST(V)
305 
306 // clang-format on
307 
308 #define DECLARE_IS_INT_N(N)                                       \
309   inline bool IsInt##N(int64_t x) { return IsIntN(N, x); }        \
310   VIXL_DEPRECATED("IsInt" #N, inline bool is_int##N(int64_t x)) { \
311     return IsIntN(N, x);                                          \
312   }
313 
314 #define DECLARE_IS_UINT_N(N)                                        \
315   inline bool IsUint##N(int64_t x) { return IsUintN(N, x); }        \
316   VIXL_DEPRECATED("IsUint" #N, inline bool is_uint##N(int64_t x)) { \
317     return IsUintN(N, x);                                           \
318   }
319 
320 #define DECLARE_TRUNCATE_TO_UINT_32(N)                             \
321   inline uint32_t TruncateToUint##N(uint64_t x) {                  \
322     return static_cast<uint32_t>(TruncateToUintN(N, x));           \
323   }                                                                \
324   VIXL_DEPRECATED("TruncateToUint" #N,                             \
325                   inline uint32_t truncate_to_int##N(int64_t x)) { \
326     return TruncateToUint##N(x);                                   \
327   }
328 
329 INT_1_TO_63_LIST(DECLARE_IS_INT_N)
330 INT_1_TO_63_LIST(DECLARE_IS_UINT_N)
331 INT_1_TO_32_LIST(DECLARE_TRUNCATE_TO_UINT_32)
332 
333 #undef DECLARE_IS_INT_N
334 #undef DECLARE_IS_UINT_N
335 #undef DECLARE_TRUNCATE_TO_INT_N
336 
337 // Bit field extraction.
338 inline uint64_t ExtractUnsignedBitfield64(int msb, int lsb, uint64_t x) {
339   VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
340               (msb >= lsb));
341   if ((msb == 63) && (lsb == 0)) return x;
342   return (x >> lsb) & ((static_cast<uint64_t>(1) << (1 + msb - lsb)) - 1);
343 }
344 
345 
346 inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint64_t x) {
347   VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
348               (msb >= lsb));
349   return TruncateToUint32(ExtractUnsignedBitfield64(msb, lsb, x));
350 }
351 
352 
353 inline int64_t ExtractSignedBitfield64(int msb, int lsb, uint64_t x) {
354   VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
355               (msb >= lsb));
356   uint64_t temp = ExtractUnsignedBitfield64(msb, lsb, x);
357   // If the highest extracted bit is set, sign extend.
358   if ((temp >> (msb - lsb)) == 1) {
359     temp |= ~UINT64_C(0) << (msb - lsb);
360   }
361   int64_t result;
362   memcpy(&result, &temp, sizeof(result));
363   return result;
364 }
365 
366 inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint64_t x) {
367   VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) &&
368               (msb >= lsb));
369   uint32_t temp = TruncateToUint32(ExtractSignedBitfield64(msb, lsb, x));
370   int32_t result;
371   memcpy(&result, &temp, sizeof(result));
372   return result;
373 }
374 
375 inline uint64_t RotateRight(uint64_t value,
376                             unsigned int rotate,
377                             unsigned int width) {
378   VIXL_ASSERT((width > 0) && (width <= 64));
379   uint64_t width_mask = ~UINT64_C(0) >> (64 - width);
380   rotate &= 63;
381   if (rotate > 0) {
382     value &= width_mask;
383     value = (value << (width - rotate)) | (value >> rotate);
384   }
385   return value & width_mask;
386 }
387 
388 
389 // Wrapper class for passing FP16 values through the assembler.
390 // This is purely to aid with type checking/casting.
391 class Float16 {
392  public:
393   explicit Float16(double dvalue);
394   Float16() : rawbits_(0x0) {}
395   friend uint16_t Float16ToRawbits(Float16 value);
396   friend Float16 RawbitsToFloat16(uint16_t bits);
397 
398  protected:
399   uint16_t rawbits_;
400 };
401 
402 // Floating point representation.
403 uint16_t Float16ToRawbits(Float16 value);
404 
405 
406 uint32_t FloatToRawbits(float value);
407 VIXL_DEPRECATED("FloatToRawbits",
408                 inline uint32_t float_to_rawbits(float value)) {
409   return FloatToRawbits(value);
410 }
411 
412 uint64_t DoubleToRawbits(double value);
413 VIXL_DEPRECATED("DoubleToRawbits",
414                 inline uint64_t double_to_rawbits(double value)) {
415   return DoubleToRawbits(value);
416 }
417 
418 Float16 RawbitsToFloat16(uint16_t bits);
419 
420 float RawbitsToFloat(uint32_t bits);
421 VIXL_DEPRECATED("RawbitsToFloat",
422                 inline float rawbits_to_float(uint32_t bits)) {
423   return RawbitsToFloat(bits);
424 }
425 
426 double RawbitsToDouble(uint64_t bits);
427 VIXL_DEPRECATED("RawbitsToDouble",
428                 inline double rawbits_to_double(uint64_t bits)) {
429   return RawbitsToDouble(bits);
430 }
431 
432 // Some compilers dislike negating unsigned integers,
433 // so we provide an equivalent.
434 template <typename T>
435 T UnsignedNegate(T value) {
436   VIXL_STATIC_ASSERT(std::is_unsigned<T>::value);
437   return ~value + 1;
438 }
439 
440 // An absolute operation for signed integers that is defined for results outside
441 // the representable range. Specifically, Abs(MIN_INT) is MIN_INT.
442 template <typename T>
443 T Abs(T val) {
444   // TODO: this static assertion is for signed integer inputs, as that's the
445   // only type tested. However, the code should work for all numeric inputs.
446   // Remove the assertion and this comment when more tests are available.
447   VIXL_STATIC_ASSERT(std::is_signed<T>::value && std::is_integral<T>::value);
448   return ((val >= -std::numeric_limits<T>::max()) && (val < 0)) ? -val : val;
449 }
450 
451 // Convert unsigned to signed numbers in a well-defined way (using two's
452 // complement representations).
453 inline int64_t RawbitsToInt64(uint64_t bits) {
454   return (bits >= UINT64_C(0x8000000000000000))
455              ? (-static_cast<int64_t>(UnsignedNegate(bits) - 1) - 1)
456              : static_cast<int64_t>(bits);
457 }
458 
459 inline int32_t RawbitsToInt32(uint32_t bits) {
460   return (bits >= UINT64_C(0x80000000))
461              ? (-static_cast<int32_t>(UnsignedNegate(bits) - 1) - 1)
462              : static_cast<int32_t>(bits);
463 }
464 
465 namespace internal {
466 
467 // Internal simulation class used solely by the simulator to
468 // provide an abstraction layer for any half-precision arithmetic.
469 class SimFloat16 : public Float16 {
470  public:
471   // TODO: We should investigate making this constructor explicit.
472   // This is currently difficult to do due to a number of templated
473   // functions in the simulator which rely on returning double values.
474   SimFloat16(double dvalue) : Float16(dvalue) {}  // NOLINT(runtime/explicit)
475   SimFloat16(Float16 f) {                         // NOLINT(runtime/explicit)
476     this->rawbits_ = Float16ToRawbits(f);
477   }
478   SimFloat16() : Float16() {}
479   SimFloat16 operator-() const;
480   SimFloat16 operator+(SimFloat16 rhs) const;
481   SimFloat16 operator-(SimFloat16 rhs) const;
482   SimFloat16 operator*(SimFloat16 rhs) const;
483   SimFloat16 operator/(SimFloat16 rhs) const;
484   bool operator<(SimFloat16 rhs) const;
485   bool operator>(SimFloat16 rhs) const;
486   bool operator==(SimFloat16 rhs) const;
487   bool operator!=(SimFloat16 rhs) const;
488   // This is necessary for conversions performed in (macro asm) Fmov.
489   bool operator==(double rhs) const;
490   operator double() const;
491 };
492 }  // namespace internal
493 
494 uint32_t Float16Sign(internal::SimFloat16 value);
495 
496 uint32_t Float16Exp(internal::SimFloat16 value);
497 
498 uint32_t Float16Mantissa(internal::SimFloat16 value);
499 
500 uint32_t FloatSign(float value);
501 VIXL_DEPRECATED("FloatSign", inline uint32_t float_sign(float value)) {
502   return FloatSign(value);
503 }
504 
505 uint32_t FloatExp(float value);
506 VIXL_DEPRECATED("FloatExp", inline uint32_t float_exp(float value)) {
507   return FloatExp(value);
508 }
509 
510 uint32_t FloatMantissa(float value);
511 VIXL_DEPRECATED("FloatMantissa", inline uint32_t float_mantissa(float value)) {
512   return FloatMantissa(value);
513 }
514 
515 uint32_t DoubleSign(double value);
516 VIXL_DEPRECATED("DoubleSign", inline uint32_t double_sign(double value)) {
517   return DoubleSign(value);
518 }
519 
520 uint32_t DoubleExp(double value);
521 VIXL_DEPRECATED("DoubleExp", inline uint32_t double_exp(double value)) {
522   return DoubleExp(value);
523 }
524 
525 uint64_t DoubleMantissa(double value);
526 VIXL_DEPRECATED("DoubleMantissa",
527                 inline uint64_t double_mantissa(double value)) {
528   return DoubleMantissa(value);
529 }
530 
531 internal::SimFloat16 Float16Pack(uint16_t sign,
532                                  uint16_t exp,
533                                  uint16_t mantissa);
534 
535 float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa);
536 VIXL_DEPRECATED("FloatPack",
537                 inline float float_pack(uint32_t sign,
538                                         uint32_t exp,
539                                         uint32_t mantissa)) {
540   return FloatPack(sign, exp, mantissa);
541 }
542 
543 double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa);
544 VIXL_DEPRECATED("DoublePack",
545                 inline double double_pack(uint32_t sign,
546                                           uint32_t exp,
547                                           uint64_t mantissa)) {
548   return DoublePack(sign, exp, mantissa);
549 }
550 
551 // An fpclassify() function for 16-bit half-precision floats.
552 int Float16Classify(Float16 value);
553 VIXL_DEPRECATED("Float16Classify", inline int float16classify(uint16_t value)) {
554   return Float16Classify(RawbitsToFloat16(value));
555 }
556 
557 bool IsZero(Float16 value);
558 
559 inline bool IsPositiveZero(double value) {
560   return (value == 0.0) && (copysign(1.0, value) > 0.0);
561 }
562 
563 inline bool IsNaN(float value) { return std::isnan(value); }
564 
565 inline bool IsNaN(double value) { return std::isnan(value); }
566 
567 inline bool IsNaN(Float16 value) { return Float16Classify(value) == FP_NAN; }
568 
569 inline bool IsInf(float value) { return std::isinf(value); }
570 
571 inline bool IsInf(double value) { return std::isinf(value); }
572 
573 inline bool IsInf(Float16 value) {
574   return Float16Classify(value) == FP_INFINITE;
575 }
576 
577 
578 // NaN tests.
579 inline bool IsSignallingNaN(double num) {
580   const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000);
581   uint64_t raw = DoubleToRawbits(num);
582   if (IsNaN(num) && ((raw & kFP64QuietNaNMask) == 0)) {
583     return true;
584   }
585   return false;
586 }
587 
588 
589 inline bool IsSignallingNaN(float num) {
590   const uint32_t kFP32QuietNaNMask = 0x00400000;
591   uint32_t raw = FloatToRawbits(num);
592   if (IsNaN(num) && ((raw & kFP32QuietNaNMask) == 0)) {
593     return true;
594   }
595   return false;
596 }
597 
598 
599 inline bool IsSignallingNaN(Float16 num) {
600   const uint16_t kFP16QuietNaNMask = 0x0200;
601   return IsNaN(num) && ((Float16ToRawbits(num) & kFP16QuietNaNMask) == 0);
602 }
603 
604 
605 template <typename T>
606 inline bool IsQuietNaN(T num) {
607   return IsNaN(num) && !IsSignallingNaN(num);
608 }
609 
610 
611 // Convert the NaN in 'num' to a quiet NaN.
612 inline double ToQuietNaN(double num) {
613   const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000);
614   VIXL_ASSERT(IsNaN(num));
615   return RawbitsToDouble(DoubleToRawbits(num) | kFP64QuietNaNMask);
616 }
617 
618 
619 inline float ToQuietNaN(float num) {
620   const uint32_t kFP32QuietNaNMask = 0x00400000;
621   VIXL_ASSERT(IsNaN(num));
622   return RawbitsToFloat(FloatToRawbits(num) | kFP32QuietNaNMask);
623 }
624 
625 
626 inline internal::SimFloat16 ToQuietNaN(internal::SimFloat16 num) {
627   const uint16_t kFP16QuietNaNMask = 0x0200;
628   VIXL_ASSERT(IsNaN(num));
629   return internal::SimFloat16(
630       RawbitsToFloat16(Float16ToRawbits(num) | kFP16QuietNaNMask));
631 }
632 
633 
634 // Fused multiply-add.
635 inline double FusedMultiplyAdd(double op1, double op2, double a) {
636   return fma(op1, op2, a);
637 }
638 
639 
640 inline float FusedMultiplyAdd(float op1, float op2, float a) {
641   return fmaf(op1, op2, a);
642 }
643 
644 
645 inline uint64_t LowestSetBit(uint64_t value) {
646   return value & UnsignedNegate(value);
647 }
648 
649 
650 template <typename T>
651 inline int HighestSetBitPosition(T value) {
652   VIXL_ASSERT(value != 0);
653   return (sizeof(value) * 8 - 1) - CountLeadingZeros(value);
654 }
655 
656 
657 template <typename V>
658 inline int WhichPowerOf2(V value) {
659   VIXL_ASSERT(IsPowerOf2(value));
660   return CountTrailingZeros(value);
661 }
662 
663 
664 unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size);
665 
666 
667 int BitCount(uint64_t value);
668 
669 
670 template <typename T>
671 T ReverseBits(T value) {
672   VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
673               (sizeof(value) == 4) || (sizeof(value) == 8));
674   T result = 0;
675   for (unsigned i = 0; i < (sizeof(value) * 8); i++) {
676     result = (result << 1) | (value & 1);
677     value >>= 1;
678   }
679   return result;
680 }
681 
682 
683 template <typename T>
684 inline T SignExtend(T val, int size_in_bits) {
685   VIXL_ASSERT(size_in_bits > 0);
686   T mask = (T(2) << (size_in_bits - 1)) - T(1);
687   val &= mask;
688   T sign_bits = -((val >> (size_in_bits - 1)) << size_in_bits);
689   val |= sign_bits;
690   return val;
691 }
692 
693 
694 template <typename T>
695 T ReverseBytes(T value, int block_bytes_log2) {
696   VIXL_ASSERT((sizeof(value) == 4) || (sizeof(value) == 8));
697   VIXL_ASSERT((1U << block_bytes_log2) <= sizeof(value));
698   // Split the 64-bit value into an 8-bit array, where b[0] is the least
699   // significant byte, and b[7] is the most significant.
700   uint8_t bytes[8];
701   uint64_t mask = UINT64_C(0xff00000000000000);
702   for (int i = 7; i >= 0; i--) {
703     bytes[i] = (static_cast<uint64_t>(value) & mask) >> (i * 8);
704     mask >>= 8;
705   }
706 
707   // Permutation tables for REV instructions.
708   //  permute_table[0] is used by REV16_x, REV16_w
709   //  permute_table[1] is used by REV32_x, REV_w
710   //  permute_table[2] is used by REV_x
711   VIXL_ASSERT((0 < block_bytes_log2) && (block_bytes_log2 < 4));
712   static const uint8_t permute_table[3][8] = {{6, 7, 4, 5, 2, 3, 0, 1},
713                                               {4, 5, 6, 7, 0, 1, 2, 3},
714                                               {0, 1, 2, 3, 4, 5, 6, 7}};
715   uint64_t temp = 0;
716   for (int i = 0; i < 8; i++) {
717     temp <<= 8;
718     temp |= bytes[permute_table[block_bytes_log2 - 1][i]];
719   }
720 
721   T result;
722   VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(temp));
723   memcpy(&result, &temp, sizeof(result));
724   return result;
725 }
726 
727 template <unsigned MULTIPLE, typename T>
728 inline bool IsMultiple(T value) {
729   VIXL_ASSERT(IsPowerOf2(MULTIPLE));
730   return (value & (MULTIPLE - 1)) == 0;
731 }
732 
733 template <typename T>
734 inline bool IsMultiple(T value, unsigned multiple) {
735   VIXL_ASSERT(IsPowerOf2(multiple));
736   return (value & (multiple - 1)) == 0;
737 }
738 
739 template <typename T>
740 inline bool IsAligned(T pointer, int alignment) {
741   VIXL_ASSERT(IsPowerOf2(alignment));
742   return (pointer & (alignment - 1)) == 0;
743 }
744 
745 // Pointer alignment
746 // TODO: rename/refactor to make it specific to instructions.
747 template <unsigned ALIGN, typename T>
748 inline bool IsAligned(T pointer) {
749   VIXL_ASSERT(sizeof(pointer) == sizeof(intptr_t));  // NOLINT(runtime/sizeof)
750   // Use C-style casts to get static_cast behaviour for integral types (T), and
751   // reinterpret_cast behaviour for other types.
752   return IsAligned((intptr_t)(pointer), ALIGN);
753 }
754 
755 template <typename T>
756 bool IsWordAligned(T pointer) {
757   return IsAligned<4>(pointer);
758 }
759 
760 // Increment a pointer until it has the specified alignment. The alignment must
761 // be a power of two.
762 template <class T>
763 T AlignUp(T pointer,
764           typename Unsigned<sizeof(T) * kBitsPerByte>::type alignment) {
765   VIXL_ASSERT(IsPowerOf2(alignment));
766   // Use C-style casts to get static_cast behaviour for integral types (T), and
767   // reinterpret_cast behaviour for other types.
768 
769   typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw =
770       (typename Unsigned<sizeof(T) * kBitsPerByte>::type) pointer;
771   VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw));
772 
773   size_t mask = alignment - 1;
774   T result = (T)((pointer_raw + mask) & ~mask);
775   VIXL_ASSERT(result >= pointer);
776 
777   return result;
778 }
779 
780 // Decrement a pointer until it has the specified alignment. The alignment must
781 // be a power of two.
782 template <class T>
783 T AlignDown(T pointer,
784             typename Unsigned<sizeof(T) * kBitsPerByte>::type alignment) {
785   VIXL_ASSERT(IsPowerOf2(alignment));
786   // Use C-style casts to get static_cast behaviour for integral types (T), and
787   // reinterpret_cast behaviour for other types.
788 
789   typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw =
790       (typename Unsigned<sizeof(T) * kBitsPerByte>::type) pointer;
791   VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw));
792 
793   size_t mask = alignment - 1;
794   return (T)(pointer_raw & ~mask);
795 }
796 
797 
798 template <typename T>
799 inline T ExtractBit(T value, unsigned bit) {
800   return (value >> bit) & T(1);
801 }
802 
803 template <typename Ts, typename Td>
804 inline Td ExtractBits(Ts value, int least_significant_bit, Td mask) {
805   return Td((value >> least_significant_bit) & Ts(mask));
806 }
807 
808 template <typename Ts, typename Td>
809 inline void AssignBit(Td& dst,  // NOLINT(runtime/references)
810                       int bit,
811                       Ts value) {
812   VIXL_ASSERT((value == Ts(0)) || (value == Ts(1)));
813   VIXL_ASSERT(bit >= 0);
814   VIXL_ASSERT(bit < static_cast<int>(sizeof(Td) * 8));
815   Td mask(1);
816   dst &= ~(mask << bit);
817   dst |= Td(value) << bit;
818 }
819 
820 template <typename Td, typename Ts>
821 inline void AssignBits(Td& dst,  // NOLINT(runtime/references)
822                        int least_significant_bit,
823                        Ts mask,
824                        Ts value) {
825   VIXL_ASSERT(least_significant_bit >= 0);
826   VIXL_ASSERT(least_significant_bit < static_cast<int>(sizeof(Td) * 8));
827   VIXL_ASSERT(((Td(mask) << least_significant_bit) >> least_significant_bit) ==
828               Td(mask));
829   VIXL_ASSERT((value & mask) == value);
830   dst &= ~(Td(mask) << least_significant_bit);
831   dst |= Td(value) << least_significant_bit;
832 }
833 
834 class VFP {
835  public:
836   static uint32_t FP32ToImm8(float imm) {
837     // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
838     uint32_t bits = FloatToRawbits(imm);
839     // bit7: a000.0000
840     uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
841     // bit6: 0b00.0000
842     uint32_t bit6 = ((bits >> 29) & 0x1) << 6;
843     // bit5_to_0: 00cd.efgh
844     uint32_t bit5_to_0 = (bits >> 19) & 0x3f;
845     return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0);
846   }
847   static uint32_t FP64ToImm8(double imm) {
848     // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
849     //       0000.0000.0000.0000.0000.0000.0000.0000
850     uint64_t bits = DoubleToRawbits(imm);
851     // bit7: a000.0000
852     uint64_t bit7 = ((bits >> 63) & 0x1) << 7;
853     // bit6: 0b00.0000
854     uint64_t bit6 = ((bits >> 61) & 0x1) << 6;
855     // bit5_to_0: 00cd.efgh
856     uint64_t bit5_to_0 = (bits >> 48) & 0x3f;
857 
858     return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0);
859   }
860   static float Imm8ToFP32(uint32_t imm8) {
861     //   Imm8: abcdefgh (8 bits)
862     // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits)
863     // where B is b ^ 1
864     uint32_t bits = imm8;
865     uint32_t bit7 = (bits >> 7) & 0x1;
866     uint32_t bit6 = (bits >> 6) & 0x1;
867     uint32_t bit5_to_0 = bits & 0x3f;
868     uint32_t result = (bit7 << 31) | ((32 - bit6) << 25) | (bit5_to_0 << 19);
869 
870     return RawbitsToFloat(result);
871   }
872   static double Imm8ToFP64(uint32_t imm8) {
873     //   Imm8: abcdefgh (8 bits)
874     // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
875     //         0000.0000.0000.0000.0000.0000.0000.0000 (64 bits)
876     // where B is b ^ 1
877     uint32_t bits = imm8;
878     uint64_t bit7 = (bits >> 7) & 0x1;
879     uint64_t bit6 = (bits >> 6) & 0x1;
880     uint64_t bit5_to_0 = bits & 0x3f;
881     uint64_t result = (bit7 << 63) | ((256 - bit6) << 54) | (bit5_to_0 << 48);
882     return RawbitsToDouble(result);
883   }
884   static bool IsImmFP32(float imm) {
885     // Valid values will have the form:
886     // aBbb.bbbc.defg.h000.0000.0000.0000.0000
887     uint32_t bits = FloatToRawbits(imm);
888     // bits[19..0] are cleared.
889     if ((bits & 0x7ffff) != 0) {
890       return false;
891     }
892 
893 
894     // bits[29..25] are all set or all cleared.
895     uint32_t b_pattern = (bits >> 16) & 0x3e00;
896     if (b_pattern != 0 && b_pattern != 0x3e00) {
897       return false;
898     }
899     // bit[30] and bit[29] are opposite.
900     if (((bits ^ (bits << 1)) & 0x40000000) == 0) {
901       return false;
902     }
903     return true;
904   }
905   static bool IsImmFP64(double imm) {
906     // Valid values will have the form:
907     // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
908     // 0000.0000.0000.0000.0000.0000.0000.0000
909     uint64_t bits = DoubleToRawbits(imm);
910     // bits[47..0] are cleared.
911     if ((bits & 0x0000ffffffffffff) != 0) {
912       return false;
913     }
914     // bits[61..54] are all set or all cleared.
915     uint32_t b_pattern = (bits >> 48) & 0x3fc0;
916     if ((b_pattern != 0) && (b_pattern != 0x3fc0)) {
917       return false;
918     }
919     // bit[62] and bit[61] are opposite.
920     if (((bits ^ (bits << 1)) & (UINT64_C(1) << 62)) == 0) {
921       return false;
922     }
923     return true;
924   }
925 };
926 
927 class BitField {
928   // ForEachBitHelper is a functor that will call
929   // bool ForEachBitHelper::execute(ElementType id) const
930   //   and expects a boolean in return whether to continue (if true)
931   //   or stop (if false)
932   // check_set will check if the bits are on (true) or off(false)
933   template <typename ForEachBitHelper, bool check_set>
934   bool ForEachBit(const ForEachBitHelper& helper) {
935     for (int i = 0; static_cast<size_t>(i) < bitfield_.size(); i++) {
936       if (bitfield_[i] == check_set)
937         if (!helper.execute(i)) return false;
938     }
939     return true;
940   }
941 
942  public:
943 #ifndef PANDA_BUILD
944   explicit BitField(unsigned size) : bitfield_(size, 0) {}
945 #else
946   explicit BitField(unsigned size) = delete;
947   explicit BitField(PandaAllocator* allocator, unsigned size) : bitfield_(size, 0, AllocatorWrapper(allocator).Adapter()) {}
948 #endif
949 
950   void Set(int i) {
951     VIXL_ASSERT((i >= 0) && (static_cast<size_t>(i) < bitfield_.size()));
952     bitfield_[i] = true;
953   }
954 
955   void Unset(int i) {
956     VIXL_ASSERT((i >= 0) && (static_cast<size_t>(i) < bitfield_.size()));
957     bitfield_[i] = true;
958   }
959 
960   bool IsSet(int i) const { return bitfield_[i]; }
961 
962   // For each bit not set in the bitfield call the execute functor
963   // execute.
964   // ForEachBitSetHelper::execute returns true if the iteration through
965   // the bits can continue, otherwise it will stop.
966   // struct ForEachBitSetHelper {
967   //   bool execute(int /*id*/) { return false; }
968   // };
969   template <typename ForEachBitNotSetHelper>
970   bool ForEachBitNotSet(const ForEachBitNotSetHelper& helper) {
971     return ForEachBit<ForEachBitNotSetHelper, false>(helper);
972   }
973 
974   // For each bit set in the bitfield call the execute functor
975   // execute.
976   template <typename ForEachBitSetHelper>
977   bool ForEachBitSet(const ForEachBitSetHelper& helper) {
978     return ForEachBit<ForEachBitSetHelper, true>(helper);
979   }
980 
981  private:
982 #ifndef PANDA_BUILD
983   std::vector<bool> bitfield_;
984 #else
985   Vector<bool> bitfield_;
986 #endif
987 };
988 
989 namespace internal {
990 
991 typedef int64_t Int64;
992 class Uint64;
993 class Uint128;
994 
995 class Uint32 {
996   uint32_t data_;
997 
998  public:
999   // Unlike uint32_t, Uint32 has a default constructor.
1000   Uint32() { data_ = 0; }
1001   explicit Uint32(uint32_t data) : data_(data) {}
1002   inline explicit Uint32(Uint64 data);
1003   uint32_t Get() const { return data_; }
1004   template <int N>
1005   int32_t GetSigned() const {
1006     return ExtractSignedBitfield32(N - 1, 0, data_);
1007   }
1008   int32_t GetSigned() const { return data_; }
1009   Uint32 operator~() const { return Uint32(~data_); }
1010   Uint32 operator-() const { return Uint32(UnsignedNegate(data_)); }
1011   bool operator==(Uint32 value) const { return data_ == value.data_; }
1012   bool operator!=(Uint32 value) const { return data_ != value.data_; }
1013   bool operator>(Uint32 value) const { return data_ > value.data_; }
1014   Uint32 operator+(Uint32 value) const { return Uint32(data_ + value.data_); }
1015   Uint32 operator-(Uint32 value) const { return Uint32(data_ - value.data_); }
1016   Uint32 operator&(Uint32 value) const { return Uint32(data_ & value.data_); }
1017   Uint32 operator&=(Uint32 value) {
1018     data_ &= value.data_;
1019     return *this;
1020   }
1021   Uint32 operator^(Uint32 value) const { return Uint32(data_ ^ value.data_); }
1022   Uint32 operator^=(Uint32 value) {
1023     data_ ^= value.data_;
1024     return *this;
1025   }
1026   Uint32 operator|(Uint32 value) const { return Uint32(data_ | value.data_); }
1027   Uint32 operator|=(Uint32 value) {
1028     data_ |= value.data_;
1029     return *this;
1030   }
1031   // Unlike uint32_t, the shift functions can accept negative shift and
1032   // return 0 when the shift is too big.
1033   Uint32 operator>>(int shift) const {
1034     if (shift == 0) return *this;
1035     if (shift < 0) {
1036       int tmp = -shift;
1037       if (tmp >= 32) return Uint32(0);
1038       return Uint32(data_ << tmp);
1039     }
1040     int tmp = shift;
1041     if (tmp >= 32) return Uint32(0);
1042     return Uint32(data_ >> tmp);
1043   }
1044   Uint32 operator<<(int shift) const {
1045     if (shift == 0) return *this;
1046     if (shift < 0) {
1047       int tmp = -shift;
1048       if (tmp >= 32) return Uint32(0);
1049       return Uint32(data_ >> tmp);
1050     }
1051     int tmp = shift;
1052     if (tmp >= 32) return Uint32(0);
1053     return Uint32(data_ << tmp);
1054   }
1055 };
1056 
1057 class Uint64 {
1058   uint64_t data_;
1059 
1060  public:
1061   // Unlike uint64_t, Uint64 has a default constructor.
1062   Uint64() { data_ = 0; }
1063   explicit Uint64(uint64_t data) : data_(data) {}
1064   explicit Uint64(Uint32 data) : data_(data.Get()) {}
1065   inline explicit Uint64(Uint128 data);
1066   uint64_t Get() const { return data_; }
1067   int64_t GetSigned(int N) const {
1068     return ExtractSignedBitfield64(N - 1, 0, data_);
1069   }
1070   int64_t GetSigned() const { return data_; }
1071   Uint32 ToUint32() const {
1072     VIXL_ASSERT((data_ >> 32) == 0);
1073     return Uint32(static_cast<uint32_t>(data_));
1074   }
1075   Uint32 GetHigh32() const { return Uint32(data_ >> 32); }
1076   Uint32 GetLow32() const { return Uint32(data_ & 0xffffffff); }
1077   Uint64 operator~() const { return Uint64(~data_); }
1078   Uint64 operator-() const { return Uint64(UnsignedNegate(data_)); }
1079   bool operator==(Uint64 value) const { return data_ == value.data_; }
1080   bool operator!=(Uint64 value) const { return data_ != value.data_; }
1081   Uint64 operator+(Uint64 value) const { return Uint64(data_ + value.data_); }
1082   Uint64 operator-(Uint64 value) const { return Uint64(data_ - value.data_); }
1083   Uint64 operator&(Uint64 value) const { return Uint64(data_ & value.data_); }
1084   Uint64 operator&=(Uint64 value) {
1085     data_ &= value.data_;
1086     return *this;
1087   }
1088   Uint64 operator^(Uint64 value) const { return Uint64(data_ ^ value.data_); }
1089   Uint64 operator^=(Uint64 value) {
1090     data_ ^= value.data_;
1091     return *this;
1092   }
1093   Uint64 operator|(Uint64 value) const { return Uint64(data_ | value.data_); }
1094   Uint64 operator|=(Uint64 value) {
1095     data_ |= value.data_;
1096     return *this;
1097   }
1098   // Unlike uint64_t, the shift functions can accept negative shift and
1099   // return 0 when the shift is too big.
1100   Uint64 operator>>(int shift) const {
1101     if (shift == 0) return *this;
1102     if (shift < 0) {
1103       int tmp = -shift;
1104       if (tmp >= 64) return Uint64(0);
1105       return Uint64(data_ << tmp);
1106     }
1107     int tmp = shift;
1108     if (tmp >= 64) return Uint64(0);
1109     return Uint64(data_ >> tmp);
1110   }
1111   Uint64 operator<<(int shift) const {
1112     if (shift == 0) return *this;
1113     if (shift < 0) {
1114       int tmp = -shift;
1115       if (tmp >= 64) return Uint64(0);
1116       return Uint64(data_ >> tmp);
1117     }
1118     int tmp = shift;
1119     if (tmp >= 64) return Uint64(0);
1120     return Uint64(data_ << tmp);
1121   }
1122 };
1123 
1124 class Uint128 {
1125   uint64_t data_high_;
1126   uint64_t data_low_;
1127 
1128  public:
1129   Uint128() : data_high_(0), data_low_(0) {}
1130   explicit Uint128(uint64_t data_low) : data_high_(0), data_low_(data_low) {}
1131   explicit Uint128(Uint64 data_low)
1132       : data_high_(0), data_low_(data_low.Get()) {}
1133   Uint128(uint64_t data_high, uint64_t data_low)
1134       : data_high_(data_high), data_low_(data_low) {}
1135   Uint64 ToUint64() const {
1136     VIXL_ASSERT(data_high_ == 0);
1137     return Uint64(data_low_);
1138   }
1139   Uint64 GetHigh64() const { return Uint64(data_high_); }
1140   Uint64 GetLow64() const { return Uint64(data_low_); }
1141   Uint128 operator~() const { return Uint128(~data_high_, ~data_low_); }
1142   bool operator==(Uint128 value) const {
1143     return (data_high_ == value.data_high_) && (data_low_ == value.data_low_);
1144   }
1145   Uint128 operator&(Uint128 value) const {
1146     return Uint128(data_high_ & value.data_high_, data_low_ & value.data_low_);
1147   }
1148   Uint128 operator&=(Uint128 value) {
1149     data_high_ &= value.data_high_;
1150     data_low_ &= value.data_low_;
1151     return *this;
1152   }
1153   Uint128 operator|=(Uint128 value) {
1154     data_high_ |= value.data_high_;
1155     data_low_ |= value.data_low_;
1156     return *this;
1157   }
1158   Uint128 operator>>(int shift) const {
1159     VIXL_ASSERT((shift >= 0) && (shift < 128));
1160     if (shift == 0) return *this;
1161     if (shift >= 64) {
1162       return Uint128(0, data_high_ >> (shift - 64));
1163     }
1164     uint64_t tmp = (data_high_ << (64 - shift)) | (data_low_ >> shift);
1165     return Uint128(data_high_ >> shift, tmp);
1166   }
1167   Uint128 operator<<(int shift) const {
1168     VIXL_ASSERT((shift >= 0) && (shift < 128));
1169     if (shift == 0) return *this;
1170     if (shift >= 64) {
1171       return Uint128(data_low_ << (shift - 64), 0);
1172     }
1173     uint64_t tmp = (data_high_ << shift) | (data_low_ >> (64 - shift));
1174     return Uint128(tmp, data_low_ << shift);
1175   }
1176 };
1177 
1178 Uint32::Uint32(Uint64 data) : data_(data.ToUint32().Get()) {}
1179 Uint64::Uint64(Uint128 data) : data_(data.ToUint64().Get()) {}
1180 
1181 Int64 BitCount(Uint32 value);
1182 
1183 // The algorithm used is adapted from the one described in section 8.2 of
1184 // Hacker's Delight, by Henry S. Warren, Jr.
1185 template <unsigned N, typename T>
1186 int64_t MultiplyHigh(T u, T v) {
1187   uint64_t u0, v0, w0, u1, v1, w1, w2, t;
1188   VIXL_STATIC_ASSERT((N == 8) || (N == 16) || (N == 32) || (N == 64));
1189   uint64_t sign_mask = UINT64_C(1) << (N - 1);
1190   uint64_t sign_ext = 0;
1191   unsigned half_bits = N / 2;
1192   uint64_t half_mask = GetUintMask(half_bits);
1193   if (std::numeric_limits<T>::is_signed) {
1194     sign_ext = UINT64_C(0xffffffffffffffff) << half_bits;
1195   }
1196 
1197   VIXL_ASSERT(sizeof(u) == sizeof(uint64_t));
1198   VIXL_ASSERT(sizeof(u) == sizeof(u0));
1199 
1200   u0 = u & half_mask;
1201   u1 = u >> half_bits | (((u & sign_mask) != 0) ? sign_ext : 0);
1202   v0 = v & half_mask;
1203   v1 = v >> half_bits | (((v & sign_mask) != 0) ? sign_ext : 0);
1204 
1205   w0 = u0 * v0;
1206   t = u1 * v0 + (w0 >> half_bits);
1207 
1208   w1 = t & half_mask;
1209   w2 = t >> half_bits | (((t & sign_mask) != 0) ? sign_ext : 0);
1210   w1 = u0 * v1 + w1;
1211   w1 = w1 >> half_bits | (((w1 & sign_mask) != 0) ? sign_ext : 0);
1212 
1213   uint64_t value = u1 * v1 + w2 + w1;
1214   int64_t result;
1215   memcpy(&result, &value, sizeof(result));
1216   return result;
1217 }
1218 
1219 }  // namespace internal
1220 
1221 // The default NaN values (for FPCR.DN=1).
1222 extern const double kFP64DefaultNaN;
1223 extern const float kFP32DefaultNaN;
1224 extern const Float16 kFP16DefaultNaN;
1225 
1226 // Floating-point infinity values.
1227 extern const Float16 kFP16PositiveInfinity;
1228 extern const Float16 kFP16NegativeInfinity;
1229 extern const float kFP32PositiveInfinity;
1230 extern const float kFP32NegativeInfinity;
1231 extern const double kFP64PositiveInfinity;
1232 extern const double kFP64NegativeInfinity;
1233 
1234 // Floating-point zero values.
1235 extern const Float16 kFP16PositiveZero;
1236 extern const Float16 kFP16NegativeZero;
1237 
1238 // AArch64 floating-point specifics. These match IEEE-754.
1239 const unsigned kDoubleMantissaBits = 52;
1240 const unsigned kDoubleExponentBits = 11;
1241 const unsigned kFloatMantissaBits = 23;
1242 const unsigned kFloatExponentBits = 8;
1243 const unsigned kFloat16MantissaBits = 10;
1244 const unsigned kFloat16ExponentBits = 5;
1245 
1246 enum FPRounding {
1247   // The first four values are encodable directly by FPCR<RMode>.
1248   FPTieEven = 0x0,
1249   FPPositiveInfinity = 0x1,
1250   FPNegativeInfinity = 0x2,
1251   FPZero = 0x3,
1252 
1253   // The final rounding modes are only available when explicitly specified by
1254   // the instruction (such as with fcvta). It cannot be set in FPCR.
1255   FPTieAway,
1256   FPRoundOdd
1257 };
1258 
1259 enum UseDefaultNaN { kUseDefaultNaN, kIgnoreDefaultNaN };
1260 
1261 // Assemble the specified IEEE-754 components into the target type and apply
1262 // appropriate rounding.
1263 //  sign:     0 = positive, 1 = negative
1264 //  exponent: Unbiased IEEE-754 exponent.
1265 //  mantissa: The mantissa of the input. The top bit (which is not encoded for
1266 //            normal IEEE-754 values) must not be omitted. This bit has the
1267 //            value 'pow(2, exponent)'.
1268 //
1269 // The input value is assumed to be a normalized value. That is, the input may
1270 // not be infinity or NaN. If the source value is subnormal, it must be
1271 // normalized before calling this function such that the highest set bit in the
1272 // mantissa has the value 'pow(2, exponent)'.
1273 //
1274 // Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than
1275 // calling a templated FPRound.
1276 template <class T, int ebits, int mbits>
1277 T FPRound(int64_t sign,
1278           int64_t exponent,
1279           uint64_t mantissa,
1280           FPRounding round_mode) {
1281   VIXL_ASSERT((sign == 0) || (sign == 1));
1282 
1283   // Only FPTieEven and FPRoundOdd rounding modes are implemented.
1284   VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
1285 
1286   // Rounding can promote subnormals to normals, and normals to infinities. For
1287   // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be
1288   // encodable as a float, but rounding based on the low-order mantissa bits
1289   // could make it overflow. With ties-to-even rounding, this value would become
1290   // an infinity.
1291 
1292   // ---- Rounding Method ----
1293   //
1294   // The exponent is irrelevant in the rounding operation, so we treat the
1295   // lowest-order bit that will fit into the result ('onebit') as having
1296   // the value '1'. Similarly, the highest-order bit that won't fit into
1297   // the result ('halfbit') has the value '0.5'. The 'point' sits between
1298   // 'onebit' and 'halfbit':
1299   //
1300   //            These bits fit into the result.
1301   //               |---------------------|
1302   //  mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
1303   //                                     ||
1304   //                                    / |
1305   //                                   /  halfbit
1306   //                               onebit
1307   //
1308   // For subnormal outputs, the range of representable bits is smaller and
1309   // the position of onebit and halfbit depends on the exponent of the
1310   // input, but the method is otherwise similar.
1311   //
1312   //   onebit(frac)
1313   //     |
1314   //     | halfbit(frac)          halfbit(adjusted)
1315   //     | /                      /
1316   //     | |                      |
1317   //  0b00.0 (exact)      -> 0b00.0 (exact)                    -> 0b00
1318   //  0b00.0...           -> 0b00.0...                         -> 0b00
1319   //  0b00.1 (exact)      -> 0b00.0111..111                    -> 0b00
1320   //  0b00.1...           -> 0b00.1...                         -> 0b01
1321   //  0b01.0 (exact)      -> 0b01.0 (exact)                    -> 0b01
1322   //  0b01.0...           -> 0b01.0...                         -> 0b01
1323   //  0b01.1 (exact)      -> 0b01.1 (exact)                    -> 0b10
1324   //  0b01.1...           -> 0b01.1...                         -> 0b10
1325   //  0b10.0 (exact)      -> 0b10.0 (exact)                    -> 0b10
1326   //  0b10.0...           -> 0b10.0...                         -> 0b10
1327   //  0b10.1 (exact)      -> 0b10.0111..111                    -> 0b10
1328   //  0b10.1...           -> 0b10.1...                         -> 0b11
1329   //  0b11.0 (exact)      -> 0b11.0 (exact)                    -> 0b11
1330   //  ...                   /             |                      /   |
1331   //                       /              |                     /    |
1332   //                                                           /     |
1333   // adjusted = frac - (halfbit(mantissa) & ~onebit(frac));   /      |
1334   //
1335   //                   mantissa = (mantissa >> shift) + halfbit(adjusted);
1336 
1337   static const int mantissa_offset = 0;
1338   static const int exponent_offset = mantissa_offset + mbits;
1339   static const int sign_offset = exponent_offset + ebits;
1340   VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1));
1341 
1342   // Bail out early for zero inputs.
1343   if (mantissa == 0) {
1344     return static_cast<T>(sign << sign_offset);
1345   }
1346 
1347   // If all bits in the exponent are set, the value is infinite or NaN.
1348   // This is true for all binary IEEE-754 formats.
1349   static const int infinite_exponent = (1 << ebits) - 1;
1350   static const int max_normal_exponent = infinite_exponent - 1;
1351 
1352   // Apply the exponent bias to encode it for the result. Doing this early makes
1353   // it easy to detect values that will be infinite or subnormal.
1354   exponent += max_normal_exponent >> 1;
1355 
1356   if (exponent > max_normal_exponent) {
1357     // Overflow: the input is too large for the result type to represent.
1358     if (round_mode == FPTieEven) {
1359       // FPTieEven rounding mode handles overflows using infinities.
1360       exponent = infinite_exponent;
1361       mantissa = 0;
1362     } else {
1363       VIXL_ASSERT(round_mode == FPRoundOdd);
1364       // FPRoundOdd rounding mode handles overflows using the largest magnitude
1365       // normal number.
1366       exponent = max_normal_exponent;
1367       mantissa = (UINT64_C(1) << exponent_offset) - 1;
1368     }
1369     return static_cast<T>((sign << sign_offset) |
1370                           (exponent << exponent_offset) |
1371                           (mantissa << mantissa_offset));
1372   }
1373 
1374   // Calculate the shift required to move the top mantissa bit to the proper
1375   // place in the destination type.
1376   const int highest_significant_bit = 63 - CountLeadingZeros(mantissa);
1377   int shift = highest_significant_bit - mbits;
1378 
1379   if (exponent <= 0) {
1380     // The output will be subnormal (before rounding).
1381     // For subnormal outputs, the shift must be adjusted by the exponent. The +1
1382     // is necessary because the exponent of a subnormal value (encoded as 0) is
1383     // the same as the exponent of the smallest normal value (encoded as 1).
1384     shift += static_cast<int>(-exponent + 1);
1385 
1386     // Handle inputs that would produce a zero output.
1387     //
1388     // Shifts higher than highest_significant_bit+1 will always produce a zero
1389     // result. A shift of exactly highest_significant_bit+1 might produce a
1390     // non-zero result after rounding.
1391     if (shift > (highest_significant_bit + 1)) {
1392       if (round_mode == FPTieEven) {
1393         // The result will always be +/-0.0.
1394         return static_cast<T>(sign << sign_offset);
1395       } else {
1396         VIXL_ASSERT(round_mode == FPRoundOdd);
1397         VIXL_ASSERT(mantissa != 0);
1398         // For FPRoundOdd, if the mantissa is too small to represent and
1399         // non-zero return the next "odd" value.
1400         return static_cast<T>((sign << sign_offset) | 1);
1401       }
1402     }
1403 
1404     // Properly encode the exponent for a subnormal output.
1405     exponent = 0;
1406   } else {
1407     // Clear the topmost mantissa bit, since this is not encoded in IEEE-754
1408     // normal values.
1409     mantissa &= ~(UINT64_C(1) << highest_significant_bit);
1410   }
1411 
1412   // The casts below are only well-defined for unsigned integers.
1413   VIXL_STATIC_ASSERT(std::numeric_limits<T>::is_integer);
1414   VIXL_STATIC_ASSERT(!std::numeric_limits<T>::is_signed);
1415 
1416   if (shift > 0) {
1417     if (round_mode == FPTieEven) {
1418       // We have to shift the mantissa to the right. Some precision is lost, so
1419       // we need to apply rounding.
1420       uint64_t onebit_mantissa = (mantissa >> (shift)) & 1;
1421       uint64_t halfbit_mantissa = (mantissa >> (shift - 1)) & 1;
1422       uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa);
1423       uint64_t adjusted = mantissa - adjustment;
1424       T halfbit_adjusted = (adjusted >> (shift - 1)) & 1;
1425 
1426       T result =
1427           static_cast<T>((sign << sign_offset) | (exponent << exponent_offset) |
1428                          ((mantissa >> shift) << mantissa_offset));
1429 
1430       // A very large mantissa can overflow during rounding. If this happens,
1431       // the exponent should be incremented and the mantissa set to 1.0
1432       // (encoded as 0). Applying halfbit_adjusted after assembling the float
1433       // has the nice side-effect that this case is handled for free.
1434       //
1435       // This also handles cases where a very large finite value overflows to
1436       // infinity, or where a very large subnormal value overflows to become
1437       // normal.
1438       return result + halfbit_adjusted;
1439     } else {
1440       VIXL_ASSERT(round_mode == FPRoundOdd);
1441       // If any bits at position halfbit or below are set, onebit (ie. the
1442       // bottom bit of the resulting mantissa) must be set.
1443       uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1);
1444       if (fractional_bits != 0) {
1445         mantissa |= UINT64_C(1) << shift;
1446       }
1447 
1448       return static_cast<T>((sign << sign_offset) |
1449                             (exponent << exponent_offset) |
1450                             ((mantissa >> shift) << mantissa_offset));
1451     }
1452   } else {
1453     // We have to shift the mantissa to the left (or not at all). The input
1454     // mantissa is exactly representable in the output mantissa, so apply no
1455     // rounding correction.
1456     return static_cast<T>((sign << sign_offset) |
1457                           (exponent << exponent_offset) |
1458                           ((mantissa << -shift) << mantissa_offset));
1459   }
1460 }
1461 
1462 
1463 // See FPRound for a description of this function.
1464 inline double FPRoundToDouble(int64_t sign,
1465                               int64_t exponent,
1466                               uint64_t mantissa,
1467                               FPRounding round_mode) {
1468   uint64_t bits =
1469       FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
1470                                                                   exponent,
1471                                                                   mantissa,
1472                                                                   round_mode);
1473   return RawbitsToDouble(bits);
1474 }
1475 
1476 
1477 // See FPRound for a description of this function.
1478 inline Float16 FPRoundToFloat16(int64_t sign,
1479                                 int64_t exponent,
1480                                 uint64_t mantissa,
1481                                 FPRounding round_mode) {
1482   return RawbitsToFloat16(
1483       FPRound<uint16_t, kFloat16ExponentBits, kFloat16MantissaBits>(
1484           sign, exponent, mantissa, round_mode));
1485 }
1486 
1487 
1488 // See FPRound for a description of this function.
1489 static inline float FPRoundToFloat(int64_t sign,
1490                                    int64_t exponent,
1491                                    uint64_t mantissa,
1492                                    FPRounding round_mode) {
1493   uint32_t bits =
1494       FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
1495                                                                 exponent,
1496                                                                 mantissa,
1497                                                                 round_mode);
1498   return RawbitsToFloat(bits);
1499 }
1500 
1501 
1502 float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception = NULL);
1503 float FPToFloat(double value,
1504                 FPRounding round_mode,
1505                 UseDefaultNaN DN,
1506                 bool* exception = NULL);
1507 
1508 double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception = NULL);
1509 double FPToDouble(float value, UseDefaultNaN DN, bool* exception = NULL);
1510 
1511 Float16 FPToFloat16(float value,
1512                     FPRounding round_mode,
1513                     UseDefaultNaN DN,
1514                     bool* exception = NULL);
1515 
1516 Float16 FPToFloat16(double value,
1517                     FPRounding round_mode,
1518                     UseDefaultNaN DN,
1519                     bool* exception = NULL);
1520 
1521 // Like static_cast<T>(value), but with specialisations for the Float16 type.
1522 template <typename T, typename F>
1523 T StaticCastFPTo(F value) {
1524   return static_cast<T>(value);
1525 }
1526 
1527 template <>
1528 inline float StaticCastFPTo<float, Float16>(Float16 value) {
1529   return FPToFloat(value, kIgnoreDefaultNaN);
1530 }
1531 
1532 template <>
1533 inline double StaticCastFPTo<double, Float16>(Float16 value) {
1534   return FPToDouble(value, kIgnoreDefaultNaN);
1535 }
1536 
1537 template <>
1538 inline Float16 StaticCastFPTo<Float16, float>(float value) {
1539   return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN);
1540 }
1541 
1542 template <>
1543 inline Float16 StaticCastFPTo<Float16, double>(double value) {
1544   return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN);
1545 }
1546 
1547 template <typename T>
1548 uint64_t FPToRawbitsWithSize(unsigned size_in_bits, T value) {
1549   switch (size_in_bits) {
1550     case 16:
1551       return Float16ToRawbits(StaticCastFPTo<Float16>(value));
1552     case 32:
1553       return FloatToRawbits(StaticCastFPTo<float>(value));
1554     case 64:
1555       return DoubleToRawbits(StaticCastFPTo<double>(value));
1556   }
1557   VIXL_UNREACHABLE();
1558   return 0;
1559 }
1560 
1561 template <typename T>
1562 T RawbitsWithSizeToFP(unsigned size_in_bits, uint64_t value) {
1563   VIXL_ASSERT(IsUintN(size_in_bits, value));
1564   switch (size_in_bits) {
1565     case 16:
1566       return StaticCastFPTo<T>(RawbitsToFloat16(static_cast<uint16_t>(value)));
1567     case 32:
1568       return StaticCastFPTo<T>(RawbitsToFloat(static_cast<uint32_t>(value)));
1569     case 64:
1570       return StaticCastFPTo<T>(RawbitsToDouble(value));
1571   }
1572   VIXL_UNREACHABLE();
1573   return 0;
1574 }
1575 
1576 // Jenkins one-at-a-time hash, based on
1577 // https://en.wikipedia.org/wiki/Jenkins_hash_function citing
1578 // https://www.drdobbs.com/database/algorithm-alley/184410284.
1579 constexpr uint32_t Hash(const char* str, uint32_t hash = 0) {
1580   if (*str == '\0') {
1581     hash += hash << 3;
1582     hash ^= hash >> 11;
1583     hash += hash << 15;
1584     return hash;
1585   } else {
1586     hash += *str;
1587     hash += hash << 10;
1588     hash ^= hash >> 6;
1589     return Hash(str + 1, hash);
1590   }
1591 }
1592 
1593 constexpr uint32_t operator"" _h(const char* x, size_t) { return Hash(x); }
1594 
1595 }  // namespace vixl
1596 
1597 #endif  // VIXL_UTILS_H
1598