• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifndef VIXL_AARCH64_SIMULATOR_AARCH64_H_
28 #define VIXL_AARCH64_SIMULATOR_AARCH64_H_
29 
30 #include <memory>
31 #include <mutex>
32 #include <random>
33 #include <unordered_map>
34 #include <vector>
35 
36 #include "../cpu-features.h"
37 #include "../globals-vixl.h"
38 #include "../utils-vixl.h"
39 
40 #include "abi-aarch64.h"
41 #include "cpu-features-auditor-aarch64.h"
42 #include "debugger-aarch64.h"
43 #include "disasm-aarch64.h"
44 #include "instructions-aarch64.h"
45 #include "simulator-constants-aarch64.h"
46 
47 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
48 
49 // These are only used for the ABI feature, and depend on checks performed for
50 // it.
51 #ifdef VIXL_HAS_ABI_SUPPORT
52 #include <tuple>
53 #if __cplusplus >= 201402L
54 // Required for `std::index_sequence`
55 #include <utility>
56 #endif
57 #endif
58 
59 // The hosts that Simulator running on may not have these flags defined.
60 #ifndef PROT_BTI
61 #define PROT_BTI 0x10
62 #endif
63 #ifndef PROT_MTE
64 #define PROT_MTE 0x20
65 #endif
66 
67 namespace vixl {
68 namespace aarch64 {
69 
70 class Simulator;
71 struct RuntimeCallStructHelper;
72 
73 enum class MemoryAccessResult { Success = 0, Failure = 1 };
74 
75 // Try to access a piece of memory at the given address. Accessing that memory
76 // might raise a signal which, if handled by a custom signal handler, should
77 // setup the native and simulated context in order to continue. Return whether
78 // the memory access failed (i.e: raised a signal) or succeeded.
79 MemoryAccessResult TryMemoryAccess(uintptr_t address, uintptr_t access_size);
80 
81 #ifdef VIXL_ENABLE_IMPLICIT_CHECKS
82 // Access a byte of memory from the address at the given offset. If the memory
83 // could be accessed then return MemoryAccessResult::Success. If the memory
84 // could not be accessed, and therefore raised a signal, setup the simulated
85 // context and return MemoryAccessResult::Failure.
86 //
87 // If a signal is raised then it is expected that the signal handler will place
88 // MemoryAccessResult::Failure in the native return register and the address of
89 // _vixl_internal_AccessMemory_continue into the native instruction pointer.
90 extern "C" MemoryAccessResult _vixl_internal_ReadMemory(uintptr_t address,
91                                                         uintptr_t offset);
92 extern "C" uintptr_t _vixl_internal_AccessMemory_continue();
93 #endif  // VIXL_ENABLE_IMPLICIT_CHECKS
94 
95 class SimStack {
96  public:
SimStack()97   SimStack() {}
SimStack(size_t size)98   explicit SimStack(size_t size) : usable_size_(size) {}
99 
100   // Guard against accesses above the stack base. This could occur, for example,
101   // if the first simulated function tries to read stack arguments that haven't
102   // been properly initialised in the Simulator's stack.
SetBaseGuardSize(size_t size)103   void SetBaseGuardSize(size_t size) { base_guard_size_ = size; }
104 
105   // Guard against stack overflows. The size should be large enough to detect
106   // the largest stride made (by `MacroAssembler::Claim()` or equivalent) whilst
107   // initialising stack objects.
SetLimitGuardSize(size_t size)108   void SetLimitGuardSize(size_t size) { limit_guard_size_ = size; }
109 
110   // The minimum usable size of the stack.
111   // Equal to "stack base" - "stack limit", in AAPCS64 terminology.
SetUsableSize(size_t size)112   void SetUsableSize(size_t size) { usable_size_ = size; }
113 
114   // Set the minimum alignment for the stack parameters.
AlignToBytesLog2(int align_log2)115   void AlignToBytesLog2(int align_log2) { align_log2_ = align_log2; }
116 
117   class Allocated {
118    public:
119     // Using AAPCS64 terminology, highest addresses at the top:
120     //
121     //  data_.get() + alloc_size ->
122     //                              |
123     //                              | Base guard
124     //                 GetBase() -> |                  |
125     //                                |                |
126     //                                |                | AAPCS64-legal
127     //                                | Usable stack   | values of 'sp'.
128     //                                |                |
129     //                                |                |
130     //                GetLimit() -> |
131     //                              | Limit guard
132     //               data_.get() -> |
133     //
134     // The Simulator detects (and forbids) accesses to either guard region.
135 
GetBase()136     char* GetBase() const { return base_; }
GetLimit()137     char* GetLimit() const { return limit_; }
138 
139     template <typename T>
IsAccessInGuardRegion(const T * base,size_t size)140     bool IsAccessInGuardRegion(const T* base, size_t size) const {
141       VIXL_ASSERT(size > 0);
142       // Inclusive bounds.
143       const char* start = reinterpret_cast<const char*>(base);
144       const char* end = start + size - 1;
145       const char* data_start = data_.get();
146       const char* data_end = data_start + alloc_size_ - 1;
147       bool in_base_guard = (start <= data_end) && (end >= base_);
148       bool in_limit_guard = (start <= limit_) && (end >= data_start);
149       return in_base_guard || in_limit_guard;
150     }
151 
152    private:
153     std::unique_ptr<char[]> data_;
154     char* limit_;
155     char* base_;
156     size_t alloc_size_;
157 
158     friend class SimStack;
159   };
160 
161   // Allocate the stack, locking the parameters.
Allocate()162   Allocated Allocate() {
163     size_t align_to = uint64_t{1} << align_log2_;
164     size_t l = AlignUp(limit_guard_size_, align_to);
165     size_t u = AlignUp(usable_size_, align_to);
166     size_t b = AlignUp(base_guard_size_, align_to);
167     size_t size = l + u + b;
168 
169     Allocated a;
170     size_t alloc_size = (align_to - 1) + size;
171     a.data_ = std::make_unique<char[]>(alloc_size);
172     void* data = a.data_.get();
173     auto data_aligned =
174         reinterpret_cast<char*>(std::align(align_to, size, data, alloc_size));
175     a.limit_ = data_aligned + l - 1;
176     a.base_ = data_aligned + l + u;
177     a.alloc_size_ = alloc_size;
178     return a;
179   }
180 
181  private:
182   size_t base_guard_size_ = 256;
183   size_t limit_guard_size_ = 4 * 1024;
184   size_t usable_size_ = 8 * 1024;
185   size_t align_log2_ = 4;
186 
187   static const size_t kDefaultBaseGuardSize = 256;
188   static const size_t kDefaultLimitGuardSize = 4 * 1024;
189   static const size_t kDefaultUsableSize = 8 * 1024;
190 };
191 
192 // Armv8.5 MTE helpers.
GetAllocationTagFromAddress(uint64_t address)193 inline int GetAllocationTagFromAddress(uint64_t address) {
194   return static_cast<int>(ExtractUnsignedBitfield64(59, 56, address));
195 }
196 
197 template <typename T>
AddressUntag(T address)198 T AddressUntag(T address) {
199   // Cast the address using a C-style cast. A reinterpret_cast would be
200   // appropriate, but it can't cast one integral type to another.
201   uint64_t bits = (uint64_t)address;
202   return (T)(bits & ~kAddressTagMask);
203 }
204 
205 // A callback function, called when a function has been intercepted if a
206 // BranchInterception entry exists in branch_interceptions. The address of
207 // the intercepted function is passed to the callback. For usage see
208 // BranchInterception.
209 using InterceptionCallback = std::function<void(uint64_t)>;
210 
211 class MetaDataDepot {
212  public:
213   class MetaDataMTE {
214    public:
MetaDataMTE(int tag)215     explicit MetaDataMTE(int tag) : tag_(tag) {}
216 
GetTag()217     int GetTag() const { return tag_; }
SetTag(int tag)218     void SetTag(int tag) {
219       VIXL_ASSERT(IsUint4(tag));
220       tag_ = tag;
221     }
222 
IsActive()223     static bool IsActive() { return is_active; }
SetActive(bool value)224     static void SetActive(bool value) { is_active = value; }
225 
226    private:
227     static bool is_active;
228     int16_t tag_;
229 
230     friend class MetaDataDepot;
231   };
232 
233   // Generate a key for metadata recording from a untagged address.
234   template <typename T>
GenerateMTEkey(T address)235   uint64_t GenerateMTEkey(T address) const {
236     // Cast the address using a C-style cast. A reinterpret_cast would be
237     // appropriate, but it can't cast one integral type to another.
238     return (uint64_t)(AddressUntag(address)) >> kMTETagGranuleInBytesLog2;
239   }
240 
241   template <typename R, typename T>
GetAttribute(T map,uint64_t key)242   R GetAttribute(T map, uint64_t key) {
243     auto pair = map->find(key);
244     R value = (pair == map->end()) ? nullptr : &pair->second;
245     return value;
246   }
247 
248   template <typename T>
249   int GetMTETag(T address, Instruction const* pc = nullptr) {
250     uint64_t key = GenerateMTEkey(address);
251     MetaDataMTE* m = GetAttribute<MetaDataMTE*>(&metadata_mte_, key);
252 
253     if (!m) {
254       std::stringstream sstream;
255       sstream << std::hex << "MTE ERROR : instruction at 0x"
256               << reinterpret_cast<uint64_t>(pc)
257               << " touched a unallocated memory location 0x"
258               << (uint64_t)(address) << ".\n";
259       VIXL_ABORT_WITH_MSG(sstream.str().c_str());
260     }
261 
262     return m->GetTag();
263   }
264 
265   template <typename T>
266   void SetMTETag(T address, int tag, Instruction const* pc = nullptr) {
267     VIXL_ASSERT(IsAligned((uintptr_t)address, kMTETagGranuleInBytes));
268     uint64_t key = GenerateMTEkey(address);
269     MetaDataMTE* m = GetAttribute<MetaDataMTE*>(&metadata_mte_, key);
270 
271     if (!m) {
272       metadata_mte_.insert({key, MetaDataMTE(tag)});
273     } else {
274       // Overwrite
275       if (m->GetTag() == tag) {
276         std::stringstream sstream;
277         sstream << std::hex << "MTE WARNING : instruction at 0x"
278                 << reinterpret_cast<uint64_t>(pc)
279                 << ", the same tag is assigned to the address 0x"
280                 << (uint64_t)(address) << ".\n";
281         VIXL_WARNING(sstream.str().c_str());
282       }
283       m->SetTag(tag);
284     }
285   }
286 
287   template <typename T>
CleanMTETag(T address)288   size_t CleanMTETag(T address) {
289     VIXL_ASSERT(
290         IsAligned(reinterpret_cast<uintptr_t>(address), kMTETagGranuleInBytes));
291     uint64_t key = GenerateMTEkey(address);
292     return metadata_mte_.erase(key);
293   }
294 
GetTotalCountMTE()295   size_t GetTotalCountMTE() { return metadata_mte_.size(); }
296 
297   // A pure virtual struct that allows the templated BranchInterception struct
298   // to be stored. For more information see BranchInterception.
299   struct BranchInterceptionAbstract {
~BranchInterceptionAbstractBranchInterceptionAbstract300     virtual ~BranchInterceptionAbstract() {}
301     // Call the callback_ if one exists, otherwise do a RuntimeCall.
302     virtual void operator()(Simulator* simulator) const = 0;
303   };
304 
305   // An entry denoting a function to intercept when branched to during
306   // simulator execution. When a function is intercepted the callback will be
307   // called if one exists otherwise the function will be passed to
308   // RuntimeCall.
309   template <typename R, typename... P>
310   struct BranchInterception : public BranchInterceptionAbstract {
311     BranchInterception(R (*function)(P...),
312                        InterceptionCallback callback = nullptr)
function_BranchInterception313         : function_(function), callback_(callback) {}
314 
315     void operator()(Simulator* simulator) const VIXL_OVERRIDE;
316 
317    private:
318     // Pointer to the function that will be intercepted.
319     R (*function_)(P...);
320 
321     // Function to be called instead of function_
322     InterceptionCallback callback_;
323   };
324 
325   // Register a new BranchInterception object. If 'function' is branched to
326   // (e.g: "blr function") in the future; instead, if provided, 'callback' will
327   // be called otherwise a runtime call will be performed on 'function'.
328   //
329   // For example: this can be used to always perform runtime calls on
330   // non-AArch64 functions without using the macroassembler.
331   //
332   // Note: only unconditional branches to registers are currently supported to
333   // be intercepted, e.g: "br"/"blr".
334   //
335   // TODO: support intercepting other branch types.
336   template <typename R, typename... P>
337   void RegisterBranchInterception(R (*function)(P...),
338                                   InterceptionCallback callback = nullptr) {
339     uintptr_t addr = reinterpret_cast<uintptr_t>(function);
340     std::unique_ptr<BranchInterceptionAbstract> intercept =
341         std::make_unique<BranchInterception<R, P...>>(function, callback);
342     branch_interceptions_.insert(std::make_pair(addr, std::move(intercept)));
343   }
344 
345   // Search for branch interceptions to the branch_target address; If one is
346   // found return it otherwise return nullptr.
FindBranchInterception(uint64_t branch_target)347   BranchInterceptionAbstract* FindBranchInterception(uint64_t branch_target) {
348     // Check for interceptions to the target address, if one is found, call it.
349     auto search = branch_interceptions_.find(branch_target);
350     if (search != branch_interceptions_.end()) {
351       return search->second.get();
352     } else {
353       return nullptr;
354     }
355   }
356 
ResetState()357   void ResetState() { branch_interceptions_.clear(); }
358 
359  private:
360   // Tag recording of each allocated memory in the tag-granule.
361   std::unordered_map<uint64_t, class MetaDataMTE> metadata_mte_;
362 
363   // Store a map of addresses to be intercepted and their corresponding branch
364   // interception object, see 'BranchInterception'.
365   std::unordered_map<uintptr_t, std::unique_ptr<BranchInterceptionAbstract>>
366       branch_interceptions_;
367 };
368 
369 
370 // Representation of memory, with typed getters and setters for access.
371 class Memory {
372  public:
Memory(SimStack::Allocated stack)373   explicit Memory(SimStack::Allocated stack) : stack_(std::move(stack)) {
374     metadata_depot_ = nullptr;
375   }
376 
GetStack()377   const SimStack::Allocated& GetStack() { return stack_; }
378 
379   template <typename A>
380   bool IsMTETagsMatched(A address, Instruction const* pc = nullptr) const {
381     if (MetaDataDepot::MetaDataMTE::IsActive()) {
382       // Cast the address using a C-style cast. A reinterpret_cast would be
383       // appropriate, but it can't cast one integral type to another.
384       uint64_t addr = (uint64_t)address;
385       int pointer_tag = GetAllocationTagFromAddress(addr);
386       int memory_tag = metadata_depot_->GetMTETag(AddressUntag(addr), pc);
387       return pointer_tag == memory_tag;
388     }
389     return true;
390   }
391 
392   template <typename T, typename A>
393   std::optional<T> Read(A address, Instruction const* pc = nullptr) const {
394     T value;
395     VIXL_STATIC_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
396                        (sizeof(value) == 4) || (sizeof(value) == 8) ||
397                        (sizeof(value) == 16));
398     auto base = reinterpret_cast<const char*>(AddressUntag(address));
399     if (stack_.IsAccessInGuardRegion(base, sizeof(value))) {
400       VIXL_ABORT_WITH_MSG("Attempt to read from stack guard region");
401     }
402     if (!IsMTETagsMatched(address, pc)) {
403       VIXL_ABORT_WITH_MSG("Tag mismatch.");
404     }
405     if (TryMemoryAccess(reinterpret_cast<uintptr_t>(base), sizeof(value)) ==
406         MemoryAccessResult::Failure) {
407       return std::nullopt;
408     }
409     memcpy(&value, base, sizeof(value));
410     return value;
411   }
412 
413   template <typename T, typename A>
414   bool Write(A address, T value, Instruction const* pc = nullptr) const {
415     VIXL_STATIC_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
416                        (sizeof(value) == 4) || (sizeof(value) == 8) ||
417                        (sizeof(value) == 16));
418     auto base = reinterpret_cast<char*>(AddressUntag(address));
419     if (stack_.IsAccessInGuardRegion(base, sizeof(value))) {
420       VIXL_ABORT_WITH_MSG("Attempt to write to stack guard region");
421     }
422     if (!IsMTETagsMatched(address, pc)) {
423       VIXL_ABORT_WITH_MSG("Tag mismatch.");
424     }
425     if (TryMemoryAccess(reinterpret_cast<uintptr_t>(base), sizeof(value)) ==
426         MemoryAccessResult::Failure) {
427       return false;
428     }
429     memcpy(base, &value, sizeof(value));
430     return true;
431   }
432 
433   template <typename A>
ReadUint(int size_in_bytes,A address)434   std::optional<uint64_t> ReadUint(int size_in_bytes, A address) const {
435     switch (size_in_bytes) {
436       case 1:
437         return Read<uint8_t>(address);
438       case 2:
439         return Read<uint16_t>(address);
440       case 4:
441         return Read<uint32_t>(address);
442       case 8:
443         return Read<uint64_t>(address);
444     }
445     VIXL_UNREACHABLE();
446     return 0;
447   }
448 
449   template <typename A>
ReadInt(int size_in_bytes,A address)450   std::optional<int64_t> ReadInt(int size_in_bytes, A address) const {
451     switch (size_in_bytes) {
452       case 1:
453         return Read<int8_t>(address);
454       case 2:
455         return Read<int16_t>(address);
456       case 4:
457         return Read<int32_t>(address);
458       case 8:
459         return Read<int64_t>(address);
460     }
461     VIXL_UNREACHABLE();
462     return 0;
463   }
464 
465   template <typename A>
Write(int size_in_bytes,A address,uint64_t value)466   bool Write(int size_in_bytes, A address, uint64_t value) const {
467     switch (size_in_bytes) {
468       case 1:
469         return Write(address, static_cast<uint8_t>(value));
470       case 2:
471         return Write(address, static_cast<uint16_t>(value));
472       case 4:
473         return Write(address, static_cast<uint32_t>(value));
474       case 8:
475         return Write(address, value);
476     }
477     VIXL_UNREACHABLE();
478     return false;
479   }
480 
AppendMetaData(MetaDataDepot * metadata_depot)481   void AppendMetaData(MetaDataDepot* metadata_depot) {
482     VIXL_ASSERT(metadata_depot != nullptr);
483     VIXL_ASSERT(metadata_depot_ == nullptr);
484     metadata_depot_ = metadata_depot;
485   }
486 
487  private:
488   SimStack::Allocated stack_;
489   MetaDataDepot* metadata_depot_;
490 };
491 
492 // Represent a register (r0-r31, v0-v31, z0-z31, p0-p15).
493 template <unsigned kMaxSizeInBits>
494 class SimRegisterBase {
495  public:
496   static const unsigned kMaxSizeInBytes = kMaxSizeInBits / kBitsPerByte;
497   VIXL_STATIC_ASSERT((kMaxSizeInBytes * kBitsPerByte) == kMaxSizeInBits);
498 
SimRegisterBase()499   SimRegisterBase() : size_in_bytes_(kMaxSizeInBytes) { Clear(); }
500 
GetSizeInBits()501   unsigned GetSizeInBits() const { return size_in_bytes_ * kBitsPerByte; }
GetSizeInBytes()502   unsigned GetSizeInBytes() const { return size_in_bytes_; }
503 
SetSizeInBytes(unsigned size_in_bytes)504   void SetSizeInBytes(unsigned size_in_bytes) {
505     VIXL_ASSERT(size_in_bytes <= kMaxSizeInBytes);
506     size_in_bytes_ = size_in_bytes;
507   }
SetSizeInBits(unsigned size_in_bits)508   void SetSizeInBits(unsigned size_in_bits) {
509     VIXL_ASSERT(size_in_bits <= kMaxSizeInBits);
510     VIXL_ASSERT((size_in_bits % kBitsPerByte) == 0);
511     SetSizeInBytes(size_in_bits / kBitsPerByte);
512   }
513 
514   // Write the specified value. The value is zero-extended if necessary.
515   template <typename T>
Write(T new_value)516   void Write(T new_value) {
517     // All AArch64 registers are zero-extending.
518     if (sizeof(new_value) < GetSizeInBytes()) Clear();
519     WriteLane(new_value, 0);
520     NotifyRegisterWrite();
521   }
522   template <typename T>
Set(T new_value)523   VIXL_DEPRECATED("Write", void Set(T new_value)) {
524     Write(new_value);
525   }
526 
Clear()527   void Clear() {
528     memset(value_, 0, kMaxSizeInBytes);
529     NotifyRegisterWrite();
530   }
531 
532   // Insert a typed value into a register, leaving the rest of the register
533   // unchanged. The lane parameter indicates where in the register the value
534   // should be inserted, in the range [ 0, sizeof(value_) / sizeof(T) ), where
535   // 0 represents the least significant bits.
536   template <typename T>
Insert(int lane,T new_value)537   void Insert(int lane, T new_value) {
538     WriteLane(new_value, lane);
539     NotifyRegisterWrite();
540   }
541 
542   // Get the value as the specified type. The value is truncated if necessary.
543   template <typename T>
Get()544   T Get() const {
545     return GetLane<T>(0);
546   }
547 
548   // Get the lane value as the specified type. The value is truncated if
549   // necessary.
550   template <typename T>
GetLane(int lane)551   T GetLane(int lane) const {
552     T result;
553     ReadLane(&result, lane);
554     return result;
555   }
556   template <typename T>
557   VIXL_DEPRECATED("GetLane", T Get(int lane) const) {
558     return GetLane(lane);
559   }
560 
561   // Get the value of a specific bit, indexed from the least-significant bit of
562   // lane 0.
GetBit(int bit)563   bool GetBit(int bit) const {
564     int bit_in_byte = bit % (sizeof(value_[0]) * kBitsPerByte);
565     int byte = bit / (sizeof(value_[0]) * kBitsPerByte);
566     return ((value_[byte] >> bit_in_byte) & 1) != 0;
567   }
568 
569   // Return a pointer to the raw, underlying byte array.
GetBytes()570   const uint8_t* GetBytes() const { return value_; }
571 
572   // TODO: Make this return a map of updated bytes, so that we can highlight
573   // updated lanes for load-and-insert. (That never happens for scalar code, but
574   // NEON has some instructions that can update individual lanes.)
WrittenSinceLastLog()575   bool WrittenSinceLastLog() const { return written_since_last_log_; }
576 
NotifyRegisterLogged()577   void NotifyRegisterLogged() { written_since_last_log_ = false; }
578 
579  protected:
580   uint8_t value_[kMaxSizeInBytes];
581 
582   unsigned size_in_bytes_;
583 
584   // Helpers to aid with register tracing.
585   bool written_since_last_log_;
586 
NotifyRegisterWrite()587   void NotifyRegisterWrite() { written_since_last_log_ = true; }
588 
589  private:
590   template <typename T>
ReadLane(T * dst,int lane)591   void ReadLane(T* dst, int lane) const {
592     VIXL_ASSERT(lane >= 0);
593     VIXL_ASSERT((sizeof(*dst) + (lane * sizeof(*dst))) <= GetSizeInBytes());
594     memcpy(dst, &value_[lane * sizeof(*dst)], sizeof(*dst));
595   }
596 
597   template <typename T>
WriteLane(T src,int lane)598   void WriteLane(T src, int lane) {
599     VIXL_ASSERT(lane >= 0);
600     VIXL_ASSERT((sizeof(src) + (lane * sizeof(src))) <= GetSizeInBytes());
601     memcpy(&value_[lane * sizeof(src)], &src, sizeof(src));
602   }
603 
604   // The default ReadLane and WriteLane methods assume what we are copying is
605   // "trivially copyable" by using memcpy. We have to provide alternative
606   // implementations for SimFloat16 which cannot be copied this way.
607 
ReadLane(vixl::internal::SimFloat16 * dst,int lane)608   void ReadLane(vixl::internal::SimFloat16* dst, int lane) const {
609     uint16_t rawbits;
610     ReadLane(&rawbits, lane);
611     *dst = RawbitsToFloat16(rawbits);
612   }
613 
WriteLane(vixl::internal::SimFloat16 src,int lane)614   void WriteLane(vixl::internal::SimFloat16 src, int lane) {
615     WriteLane(Float16ToRawbits(src), lane);
616   }
617 };
618 
619 typedef SimRegisterBase<kXRegSize> SimRegister;      // r0-r31
620 typedef SimRegisterBase<kPRegMaxSize> SimPRegister;  // p0-p15
621 // FFR has the same format as a predicate register.
622 typedef SimPRegister SimFFRRegister;
623 
624 // v0-v31 and z0-z31
625 class SimVRegister : public SimRegisterBase<kZRegMaxSize> {
626  public:
SimVRegister()627   SimVRegister() : SimRegisterBase<kZRegMaxSize>(), accessed_as_z_(false) {}
628 
NotifyAccessAsZ()629   void NotifyAccessAsZ() { accessed_as_z_ = true; }
630 
NotifyRegisterLogged()631   void NotifyRegisterLogged() {
632     SimRegisterBase<kZRegMaxSize>::NotifyRegisterLogged();
633     accessed_as_z_ = false;
634   }
635 
AccessedAsZSinceLastLog()636   bool AccessedAsZSinceLastLog() const { return accessed_as_z_; }
637 
638  private:
639   bool accessed_as_z_;
640 };
641 
642 // Representation of a SVE predicate register.
643 class LogicPRegister {
644  public:
LogicPRegister(SimPRegister & other)645   inline LogicPRegister(
646       SimPRegister& other)  // NOLINT(runtime/references)(runtime/explicit)
647       : register_(other) {}
648 
649   // Set a conveniently-sized block to 16 bits as the minimum predicate length
650   // is 16 bits and allow to be increased to multiples of 16 bits.
651   typedef uint16_t ChunkType;
652 
653   // Assign a bit into the end positon of the specified lane.
654   // The bit is zero-extended if necessary.
SetActive(VectorFormat vform,int lane_index,bool value)655   void SetActive(VectorFormat vform, int lane_index, bool value) {
656     int psize = LaneSizeInBytesFromFormat(vform);
657     int bit_index = lane_index * psize;
658     int byte_index = bit_index / kBitsPerByte;
659     int bit_offset = bit_index % kBitsPerByte;
660     uint8_t byte = register_.GetLane<uint8_t>(byte_index);
661     register_.Insert(byte_index, ZeroExtend(byte, bit_offset, psize, value));
662   }
663 
IsActive(VectorFormat vform,int lane_index)664   bool IsActive(VectorFormat vform, int lane_index) const {
665     int psize = LaneSizeInBytesFromFormat(vform);
666     int bit_index = lane_index * psize;
667     int byte_index = bit_index / kBitsPerByte;
668     int bit_offset = bit_index % kBitsPerByte;
669     uint8_t byte = register_.GetLane<uint8_t>(byte_index);
670     return ExtractBit(byte, bit_offset);
671   }
672 
673   // The accessors for bulk processing.
GetChunkCount()674   int GetChunkCount() const {
675     VIXL_ASSERT((register_.GetSizeInBytes() % sizeof(ChunkType)) == 0);
676     return register_.GetSizeInBytes() / sizeof(ChunkType);
677   }
678 
GetChunk(int lane)679   ChunkType GetChunk(int lane) const { return GetActiveMask<ChunkType>(lane); }
680 
SetChunk(int lane,ChunkType new_value)681   void SetChunk(int lane, ChunkType new_value) {
682     SetActiveMask(lane, new_value);
683   }
684 
SetAllBits()685   void SetAllBits() {
686     int chunk_size = sizeof(ChunkType) * kBitsPerByte;
687     ChunkType bits = static_cast<ChunkType>(GetUintMask(chunk_size));
688     for (int lane = 0;
689          lane < (static_cast<int>(register_.GetSizeInBits() / chunk_size));
690          lane++) {
691       SetChunk(lane, bits);
692     }
693   }
694 
695   template <typename T>
GetActiveMask(int lane)696   T GetActiveMask(int lane) const {
697     return register_.GetLane<T>(lane);
698   }
699 
700   template <typename T>
SetActiveMask(int lane,T new_value)701   void SetActiveMask(int lane, T new_value) {
702     register_.Insert<T>(lane, new_value);
703   }
704 
Clear()705   void Clear() { register_.Clear(); }
706 
Aliases(const LogicPRegister & other)707   bool Aliases(const LogicPRegister& other) const {
708     return &register_ == &other.register_;
709   }
710 
711  private:
712   // The bit assignment is zero-extended to fill the size of predicate element.
ZeroExtend(uint8_t byte,int index,int psize,bool value)713   uint8_t ZeroExtend(uint8_t byte, int index, int psize, bool value) {
714     VIXL_ASSERT(index >= 0);
715     VIXL_ASSERT(index + psize <= kBitsPerByte);
716     int bits = value ? 1 : 0;
717     switch (psize) {
718       case 1:
719         AssignBit(byte, index, bits);
720         break;
721       case 2:
722         AssignBits(byte, index, 0x03, bits);
723         break;
724       case 4:
725         AssignBits(byte, index, 0x0f, bits);
726         break;
727       case 8:
728         AssignBits(byte, index, 0xff, bits);
729         break;
730       default:
731         VIXL_UNREACHABLE();
732         return 0;
733     }
734     return byte;
735   }
736 
737   SimPRegister& register_;
738 };
739 
740 using vixl_uint128_t = std::pair<uint64_t, uint64_t>;
741 
742 // Representation of a vector register, with typed getters and setters for lanes
743 // and additional information to represent lane state.
744 class LogicVRegister {
745  public:
LogicVRegister(SimVRegister & other)746   inline LogicVRegister(
747       SimVRegister& other)  // NOLINT(runtime/references)(runtime/explicit)
748       : register_(other) {
749     for (size_t i = 0; i < ArrayLength(saturated_); i++) {
750       saturated_[i] = kNotSaturated;
751     }
752     for (size_t i = 0; i < ArrayLength(round_); i++) {
753       round_[i] = 0;
754     }
755   }
756 
Int(VectorFormat vform,int index)757   int64_t Int(VectorFormat vform, int index) const {
758     if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
759     int64_t element;
760     switch (LaneSizeInBitsFromFormat(vform)) {
761       case 8:
762         element = register_.GetLane<int8_t>(index);
763         break;
764       case 16:
765         element = register_.GetLane<int16_t>(index);
766         break;
767       case 32:
768         element = register_.GetLane<int32_t>(index);
769         break;
770       case 64:
771         element = register_.GetLane<int64_t>(index);
772         break;
773       default:
774         VIXL_UNREACHABLE();
775         return 0;
776     }
777     return element;
778   }
779 
Uint(VectorFormat vform,int index)780   uint64_t Uint(VectorFormat vform, int index) const {
781     if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
782     uint64_t element;
783     switch (LaneSizeInBitsFromFormat(vform)) {
784       case 8:
785         element = register_.GetLane<uint8_t>(index);
786         break;
787       case 16:
788         element = register_.GetLane<uint16_t>(index);
789         break;
790       case 32:
791         element = register_.GetLane<uint32_t>(index);
792         break;
793       case 64:
794         element = register_.GetLane<uint64_t>(index);
795         break;
796       default:
797         VIXL_UNREACHABLE();
798         return 0;
799     }
800     return element;
801   }
802 
UintArray(VectorFormat vform,uint64_t * dst)803   int UintArray(VectorFormat vform, uint64_t* dst) const {
804     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
805       dst[i] = Uint(vform, i);
806     }
807     return LaneCountFromFormat(vform);
808   }
809 
UintLeftJustified(VectorFormat vform,int index)810   uint64_t UintLeftJustified(VectorFormat vform, int index) const {
811     return Uint(vform, index) << (64 - LaneSizeInBitsFromFormat(vform));
812   }
813 
IntLeftJustified(VectorFormat vform,int index)814   int64_t IntLeftJustified(VectorFormat vform, int index) const {
815     uint64_t value = UintLeftJustified(vform, index);
816     int64_t result;
817     memcpy(&result, &value, sizeof(result));
818     return result;
819   }
820 
SetInt(VectorFormat vform,int index,int64_t value)821   void SetInt(VectorFormat vform, int index, int64_t value) const {
822     if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
823     switch (LaneSizeInBitsFromFormat(vform)) {
824       case 8:
825         register_.Insert(index, static_cast<int8_t>(value));
826         break;
827       case 16:
828         register_.Insert(index, static_cast<int16_t>(value));
829         break;
830       case 32:
831         register_.Insert(index, static_cast<int32_t>(value));
832         break;
833       case 64:
834         register_.Insert(index, static_cast<int64_t>(value));
835         break;
836       default:
837         VIXL_UNREACHABLE();
838         return;
839     }
840   }
841 
SetIntArray(VectorFormat vform,const int64_t * src)842   void SetIntArray(VectorFormat vform, const int64_t* src) const {
843     ClearForWrite(vform);
844     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
845       SetInt(vform, i, src[i]);
846     }
847   }
848 
SetUint(VectorFormat vform,int index,uint64_t value)849   void SetUint(VectorFormat vform, int index, uint64_t value) const {
850     if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
851     switch (LaneSizeInBitsFromFormat(vform)) {
852       case 8:
853         register_.Insert(index, static_cast<uint8_t>(value));
854         break;
855       case 16:
856         register_.Insert(index, static_cast<uint16_t>(value));
857         break;
858       case 32:
859         register_.Insert(index, static_cast<uint32_t>(value));
860         break;
861       case 64:
862         register_.Insert(index, static_cast<uint64_t>(value));
863         break;
864       default:
865         VIXL_UNREACHABLE();
866         return;
867     }
868   }
869 
SetUint(VectorFormat vform,int index,vixl_uint128_t value)870   void SetUint(VectorFormat vform, int index, vixl_uint128_t value) const {
871     if (LaneSizeInBitsFromFormat(vform) <= 64) {
872       SetUint(vform, index, value.second);
873       return;
874     }
875     VIXL_ASSERT((vform == kFormat1Q) || (vform == kFormatVnQ));
876     SetUint(kFormatVnD, 2 * index, value.second);
877     SetUint(kFormatVnD, 2 * index + 1, value.first);
878   }
879 
SetUintArray(VectorFormat vform,const uint64_t * src)880   void SetUintArray(VectorFormat vform, const uint64_t* src) const {
881     ClearForWrite(vform);
882     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
883       SetUint(vform, i, src[i]);
884     }
885   }
886 
887   template <typename T>
Float(int index)888   T Float(int index) const {
889     return register_.GetLane<T>(index);
890   }
891 
892   template <typename T>
SetFloat(int index,T value)893   void SetFloat(int index, T value) const {
894     register_.Insert(index, value);
895   }
896 
897   template <typename T>
SetFloat(VectorFormat vform,int index,T value)898   void SetFloat(VectorFormat vform, int index, T value) const {
899     if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
900     register_.Insert(index, value);
901   }
902 
Clear()903   void Clear() { register_.Clear(); }
904 
905   // When setting a result in a register larger than the result itself, the top
906   // bits of the register must be cleared.
ClearForWrite(VectorFormat vform)907   void ClearForWrite(VectorFormat vform) const {
908     // SVE destinations write whole registers, so we have nothing to clear.
909     if (IsSVEFormat(vform)) return;
910 
911     unsigned size = RegisterSizeInBytesFromFormat(vform);
912     for (unsigned i = size; i < register_.GetSizeInBytes(); i++) {
913       SetUint(kFormat16B, i, 0);
914     }
915   }
916 
917   // Saturation state for each lane of a vector.
918   enum Saturation {
919     kNotSaturated = 0,
920     kSignedSatPositive = 1 << 0,
921     kSignedSatNegative = 1 << 1,
922     kSignedSatMask = kSignedSatPositive | kSignedSatNegative,
923     kSignedSatUndefined = kSignedSatMask,
924     kUnsignedSatPositive = 1 << 2,
925     kUnsignedSatNegative = 1 << 3,
926     kUnsignedSatMask = kUnsignedSatPositive | kUnsignedSatNegative,
927     kUnsignedSatUndefined = kUnsignedSatMask
928   };
929 
930   // Getters for saturation state.
GetSignedSaturation(int index)931   Saturation GetSignedSaturation(int index) {
932     return static_cast<Saturation>(saturated_[index] & kSignedSatMask);
933   }
934 
GetUnsignedSaturation(int index)935   Saturation GetUnsignedSaturation(int index) {
936     return static_cast<Saturation>(saturated_[index] & kUnsignedSatMask);
937   }
938 
939   // Setters for saturation state.
ClearSat(int index)940   void ClearSat(int index) { saturated_[index] = kNotSaturated; }
941 
SetSignedSat(int index,bool positive)942   void SetSignedSat(int index, bool positive) {
943     SetSatFlag(index, positive ? kSignedSatPositive : kSignedSatNegative);
944   }
945 
SetUnsignedSat(int index,bool positive)946   void SetUnsignedSat(int index, bool positive) {
947     SetSatFlag(index, positive ? kUnsignedSatPositive : kUnsignedSatNegative);
948   }
949 
SetSatFlag(int index,Saturation sat)950   void SetSatFlag(int index, Saturation sat) {
951     saturated_[index] = static_cast<Saturation>(saturated_[index] | sat);
952     VIXL_ASSERT((sat & kUnsignedSatMask) != kUnsignedSatUndefined);
953     VIXL_ASSERT((sat & kSignedSatMask) != kSignedSatUndefined);
954   }
955 
956   // Saturate lanes of a vector based on saturation state.
SignedSaturate(VectorFormat vform)957   LogicVRegister& SignedSaturate(VectorFormat vform) {
958     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
959       Saturation sat = GetSignedSaturation(i);
960       if (sat == kSignedSatPositive) {
961         SetInt(vform, i, MaxIntFromFormat(vform));
962       } else if (sat == kSignedSatNegative) {
963         SetInt(vform, i, MinIntFromFormat(vform));
964       }
965     }
966     return *this;
967   }
968 
UnsignedSaturate(VectorFormat vform)969   LogicVRegister& UnsignedSaturate(VectorFormat vform) {
970     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
971       Saturation sat = GetUnsignedSaturation(i);
972       if (sat == kUnsignedSatPositive) {
973         SetUint(vform, i, MaxUintFromFormat(vform));
974       } else if (sat == kUnsignedSatNegative) {
975         SetUint(vform, i, 0);
976       }
977     }
978     return *this;
979   }
980 
981   // Getter for rounding state.
GetRounding(int index)982   bool GetRounding(int index) { return round_[index]; }
983 
984   // Setter for rounding state.
SetRounding(int index,bool round)985   void SetRounding(int index, bool round) { round_[index] = round; }
986 
987   // Round lanes of a vector based on rounding state.
Round(VectorFormat vform)988   LogicVRegister& Round(VectorFormat vform) {
989     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
990       SetUint(vform, i, Uint(vform, i) + (GetRounding(i) ? 1 : 0));
991     }
992     return *this;
993   }
994 
995   // Unsigned halve lanes of a vector, and use the saturation state to set the
996   // top bit.
Uhalve(VectorFormat vform)997   LogicVRegister& Uhalve(VectorFormat vform) {
998     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
999       uint64_t val = Uint(vform, i);
1000       SetRounding(i, (val & 1) == 1);
1001       val >>= 1;
1002       if (GetUnsignedSaturation(i) != kNotSaturated) {
1003         // If the operation causes unsigned saturation, the bit shifted into the
1004         // most significant bit must be set.
1005         val |= (MaxUintFromFormat(vform) >> 1) + 1;
1006       }
1007       SetInt(vform, i, val);
1008     }
1009     return *this;
1010   }
1011 
1012   // Signed halve lanes of a vector, and use the carry state to set the top bit.
Halve(VectorFormat vform)1013   LogicVRegister& Halve(VectorFormat vform) {
1014     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1015       int64_t val = Int(vform, i);
1016       SetRounding(i, (val & 1) == 1);
1017       val = ExtractSignedBitfield64(63, 1, val);  // >>= 1
1018       if (GetSignedSaturation(i) == kNotSaturated) {
1019         SetInt(vform, i, val);
1020       } else {
1021         // If the operation causes signed saturation, the sign bit must be
1022         // inverted.
1023         uint64_t uval = static_cast<uint64_t>(val);
1024         SetUint(vform, i, uval ^ ((MaxUintFromFormat(vform) >> 1) + 1));
1025       }
1026     }
1027     return *this;
1028   }
1029 
LaneCountFromFormat(VectorFormat vform)1030   int LaneCountFromFormat(VectorFormat vform) const {
1031     if (IsSVEFormat(vform)) {
1032       return register_.GetSizeInBits() / LaneSizeInBitsFromFormat(vform);
1033     } else {
1034       return vixl::aarch64::LaneCountFromFormat(vform);
1035     }
1036   }
1037 
1038  private:
1039   SimVRegister& register_;
1040 
1041   // Allocate one saturation state entry per lane; largest register is type Q,
1042   // and lanes can be a minimum of one byte wide.
1043   Saturation saturated_[kZRegMaxSizeInBytes];
1044 
1045   // Allocate one rounding state entry per lane.
1046   bool round_[kZRegMaxSizeInBytes];
1047 };
1048 
1049 // Represent an SVE addressing mode and abstract per-lane address generation to
1050 // make iteration easy.
1051 //
1052 // Contiguous accesses are described with a simple base address, the memory
1053 // occupied by each lane (`SetMsizeInBytesLog2()`) and the number of elements in
1054 // each struct (`SetRegCount()`).
1055 //
1056 // Scatter-gather accesses also require a SimVRegister and information about how
1057 // to extract lanes from it.
1058 class LogicSVEAddressVector {
1059  public:
1060   // scalar-plus-scalar
1061   // scalar-plus-immediate
LogicSVEAddressVector(uint64_t base)1062   explicit LogicSVEAddressVector(uint64_t base)
1063       : base_(base),
1064         msize_in_bytes_log2_(kUnknownMsizeInBytesLog2),
1065         reg_count_(1),
1066         vector_(NULL),
1067         vector_form_(kFormatUndefined),
1068         vector_mod_(NO_SVE_OFFSET_MODIFIER),
1069         vector_shift_(0) {}
1070 
1071   // scalar-plus-vector
1072   // vector-plus-immediate
1073   //    `base` should be the constant used for each element. That is, the value
1074   //    of `xn`, or `#<imm>`.
1075   //    `vector` should be the SimVRegister with offsets for each element. The
1076   //    vector format must be specified; SVE scatter/gather accesses typically
1077   //    support both 32-bit and 64-bit addressing.
1078   //
1079   //    `mod` and `shift` correspond to the modifiers applied to each element in
1080   //    scalar-plus-vector forms, such as those used for unpacking and
1081   //    sign-extension. They are not used for vector-plus-immediate.
1082   LogicSVEAddressVector(uint64_t base,
1083                         const SimVRegister* vector,
1084                         VectorFormat vform,
1085                         SVEOffsetModifier mod = NO_SVE_OFFSET_MODIFIER,
1086                         int shift = 0)
base_(base)1087       : base_(base),
1088         msize_in_bytes_log2_(kUnknownMsizeInBytesLog2),
1089         reg_count_(1),
1090         vector_(vector),
1091         vector_form_(vform),
1092         vector_mod_(mod),
1093         vector_shift_(shift) {}
1094 
1095   // Set `msize` -- the memory occupied by each lane -- for address
1096   // calculations.
SetMsizeInBytesLog2(int msize_in_bytes_log2)1097   void SetMsizeInBytesLog2(int msize_in_bytes_log2) {
1098     VIXL_ASSERT(msize_in_bytes_log2 >= static_cast<int>(kBRegSizeInBytesLog2));
1099     VIXL_ASSERT(msize_in_bytes_log2 <= static_cast<int>(kDRegSizeInBytesLog2));
1100     msize_in_bytes_log2_ = msize_in_bytes_log2;
1101   }
1102 
HasMsize()1103   bool HasMsize() const {
1104     return msize_in_bytes_log2_ != kUnknownMsizeInBytesLog2;
1105   }
1106 
GetMsizeInBytesLog2()1107   int GetMsizeInBytesLog2() const {
1108     VIXL_ASSERT(HasMsize());
1109     return msize_in_bytes_log2_;
1110   }
GetMsizeInBitsLog2()1111   int GetMsizeInBitsLog2() const {
1112     return GetMsizeInBytesLog2() + kBitsPerByteLog2;
1113   }
1114 
GetMsizeInBytes()1115   int GetMsizeInBytes() const { return 1 << GetMsizeInBytesLog2(); }
GetMsizeInBits()1116   int GetMsizeInBits() const { return 1 << GetMsizeInBitsLog2(); }
1117 
SetRegCount(int reg_count)1118   void SetRegCount(int reg_count) {
1119     VIXL_ASSERT(reg_count >= 1);  // E.g. ld1/st1
1120     VIXL_ASSERT(reg_count <= 4);  // E.g. ld4/st4
1121     reg_count_ = reg_count;
1122   }
1123 
GetRegCount()1124   int GetRegCount() const { return reg_count_; }
1125 
1126   // Full per-element address calculation for structured accesses.
1127   //
1128   // Note that the register number argument (`reg`) is zero-based.
GetElementAddress(int lane,int reg)1129   uint64_t GetElementAddress(int lane, int reg) const {
1130     VIXL_ASSERT(reg < GetRegCount());
1131     // Individual structures are always contiguous in memory, so this
1132     // implementation works for both contiguous and scatter-gather addressing.
1133     return GetStructAddress(lane) + (reg * GetMsizeInBytes());
1134   }
1135 
1136   // Full per-struct address calculation for structured accesses.
1137   uint64_t GetStructAddress(int lane) const;
1138 
IsContiguous()1139   bool IsContiguous() const { return vector_ == NULL; }
IsScatterGather()1140   bool IsScatterGather() const { return !IsContiguous(); }
1141 
1142  private:
1143   uint64_t base_;
1144   int msize_in_bytes_log2_;
1145   int reg_count_;
1146 
1147   const SimVRegister* vector_;
1148   VectorFormat vector_form_;
1149   SVEOffsetModifier vector_mod_;
1150   int vector_shift_;
1151 
1152   static const int kUnknownMsizeInBytesLog2 = -1;
1153 };
1154 
1155 // The proper way to initialize a simulated system register (such as NZCV) is as
1156 // follows:
1157 //  SimSystemRegister nzcv = SimSystemRegister::DefaultValueFor(NZCV);
1158 class SimSystemRegister {
1159  public:
1160   // The default constructor represents a register which has no writable bits.
1161   // It is not possible to set its value to anything other than 0.
SimSystemRegister()1162   SimSystemRegister() : value_(0), write_ignore_mask_(0xffffffff) {}
1163 
GetRawValue()1164   uint32_t GetRawValue() const { return value_; }
1165   VIXL_DEPRECATED("GetRawValue", uint32_t RawValue() const) {
1166     return GetRawValue();
1167   }
1168 
SetRawValue(uint32_t new_value)1169   void SetRawValue(uint32_t new_value) {
1170     value_ = (value_ & write_ignore_mask_) | (new_value & ~write_ignore_mask_);
1171   }
1172 
ExtractBits(int msb,int lsb)1173   uint32_t ExtractBits(int msb, int lsb) const {
1174     return ExtractUnsignedBitfield32(msb, lsb, value_);
1175   }
1176   VIXL_DEPRECATED("ExtractBits", uint32_t Bits(int msb, int lsb) const) {
1177     return ExtractBits(msb, lsb);
1178   }
1179 
ExtractSignedBits(int msb,int lsb)1180   int32_t ExtractSignedBits(int msb, int lsb) const {
1181     return ExtractSignedBitfield32(msb, lsb, value_);
1182   }
1183   VIXL_DEPRECATED("ExtractSignedBits",
1184                   int32_t SignedBits(int msb, int lsb) const) {
1185     return ExtractSignedBits(msb, lsb);
1186   }
1187 
1188   void SetBits(int msb, int lsb, uint32_t bits);
1189 
1190   // Default system register values.
1191   static SimSystemRegister DefaultValueFor(SystemRegister id);
1192 
1193 #define DEFINE_GETTER(Name, HighBit, LowBit, Func)                            \
1194   uint32_t Get##Name() const { return this->Func(HighBit, LowBit); }          \
1195   VIXL_DEPRECATED("Get" #Name, uint32_t Name() const) { return Get##Name(); } \
1196   void Set##Name(uint32_t bits) { SetBits(HighBit, LowBit, bits); }
1197 #define DEFINE_WRITE_IGNORE_MASK(Name, Mask) \
1198   static const uint32_t Name##WriteIgnoreMask = ~static_cast<uint32_t>(Mask);
1199 
SYSTEM_REGISTER_FIELDS_LIST(DEFINE_GETTER,DEFINE_WRITE_IGNORE_MASK)1200   SYSTEM_REGISTER_FIELDS_LIST(DEFINE_GETTER, DEFINE_WRITE_IGNORE_MASK)
1201 
1202 #undef DEFINE_ZERO_BITS
1203 #undef DEFINE_GETTER
1204 
1205  protected:
1206   // Most system registers only implement a few of the bits in the word. Other
1207   // bits are "read-as-zero, write-ignored". The write_ignore_mask argument
1208   // describes the bits which are not modifiable.
1209   SimSystemRegister(uint32_t value, uint32_t write_ignore_mask)
1210       : value_(value), write_ignore_mask_(write_ignore_mask) {}
1211 
1212   uint32_t value_;
1213   uint32_t write_ignore_mask_;
1214 };
1215 
1216 
1217 class SimExclusiveLocalMonitor {
1218  public:
SimExclusiveLocalMonitor()1219   SimExclusiveLocalMonitor() : kSkipClearProbability(8), seed_(0x87654321) {
1220     Clear();
1221   }
1222 
1223   // Clear the exclusive monitor (like clrex).
Clear()1224   void Clear() {
1225     address_ = 0;
1226     size_ = 0;
1227   }
1228 
1229   // Clear the exclusive monitor most of the time.
MaybeClear()1230   void MaybeClear() {
1231     if ((seed_ % kSkipClearProbability) != 0) {
1232       Clear();
1233     }
1234 
1235     // Advance seed_ using a simple linear congruential generator.
1236     seed_ = (seed_ * 48271) % 2147483647;
1237   }
1238 
1239   // Mark the address range for exclusive access (like load-exclusive).
MarkExclusive(uint64_t address,size_t size)1240   void MarkExclusive(uint64_t address, size_t size) {
1241     address_ = address;
1242     size_ = size;
1243   }
1244 
1245   // Return true if the address range is marked (like store-exclusive).
1246   // This helper doesn't implicitly clear the monitor.
IsExclusive(uint64_t address,size_t size)1247   bool IsExclusive(uint64_t address, size_t size) {
1248     VIXL_ASSERT(size > 0);
1249     // Be pedantic: Require both the address and the size to match.
1250     return (size == size_) && (address == address_);
1251   }
1252 
1253  private:
1254   uint64_t address_;
1255   size_t size_;
1256 
1257   const int kSkipClearProbability;
1258   uint32_t seed_;
1259 };
1260 
1261 
1262 // We can't accurate simulate the global monitor since it depends on external
1263 // influences. Instead, this implementation occasionally causes accesses to
1264 // fail, according to kPassProbability.
1265 class SimExclusiveGlobalMonitor {
1266  public:
SimExclusiveGlobalMonitor()1267   SimExclusiveGlobalMonitor() : kPassProbability(8), seed_(0x87654321) {}
1268 
IsExclusive(uint64_t address,size_t size)1269   bool IsExclusive(uint64_t address, size_t size) {
1270     USE(address, size);
1271 
1272     bool pass = (seed_ % kPassProbability) != 0;
1273     // Advance seed_ using a simple linear congruential generator.
1274     seed_ = (seed_ * 48271) % 2147483647;
1275     return pass;
1276   }
1277 
1278  private:
1279   const int kPassProbability;
1280   uint32_t seed_;
1281 };
1282 
1283 class Debugger;
1284 
1285 template <uint32_t mode>
1286 uint64_t CryptoOp(uint64_t x, uint64_t y, uint64_t z);
1287 
1288 class Simulator : public DecoderVisitor {
1289  public:
1290   explicit Simulator(Decoder* decoder,
1291                      FILE* stream = stdout,
1292                      SimStack::Allocated stack = SimStack().Allocate());
1293   ~Simulator();
1294 
1295   void ResetState();
1296 
1297   // Run the simulator.
1298   virtual void Run();
1299   void RunFrom(const Instruction* first);
1300 
1301 
1302 #if defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
1303     (defined(_MSC_VER) || defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
1304   // Templated `RunFrom` version taking care of passing arguments and returning
1305   // the result value.
1306   // This allows code like:
1307   //    int32_t res = simulator.RunFrom<int32_t, int32_t>(GenerateCode(),
1308   //                                                      0x123);
1309   // It requires VIXL's ABI features, and C++11 or greater.
1310   // Also, the initialisation of tuples is incorrect in GCC before 4.9.1:
1311   // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51253
1312   template <typename R, typename... P>
RunFrom(const Instruction * code,P...arguments)1313   R RunFrom(const Instruction* code, P... arguments) {
1314     return RunFromStructHelper<R, P...>::Wrapper(this, code, arguments...);
1315   }
1316 
1317   template <typename R, typename... P>
1318   struct RunFromStructHelper {
WrapperRunFromStructHelper1319     static R Wrapper(Simulator* simulator,
1320                      const Instruction* code,
1321                      P... arguments) {
1322       ABI abi;
1323       std::tuple<P...> unused_tuple{
1324           // TODO: We currently do not support arguments passed on the stack. We
1325           // could do so by using `WriteGenericOperand()` here, but may need to
1326           // add features to handle situations where the stack is or is not set
1327           // up.
1328           (simulator->WriteCPURegister(abi.GetNextParameterGenericOperand<P>()
1329                                            .GetCPURegister(),
1330                                        arguments),
1331            arguments)...};
1332       simulator->RunFrom(code);
1333       return simulator->ReadGenericOperand<R>(abi.GetReturnGenericOperand<R>());
1334     }
1335   };
1336 
1337   // Partial specialization when the return type is `void`.
1338   template <typename... P>
1339   struct RunFromStructHelper<void, P...> {
1340     static void Wrapper(Simulator* simulator,
1341                         const Instruction* code,
1342                         P... arguments) {
1343       ABI abi;
1344       std::tuple<P...> unused_tuple{
1345           // TODO: We currently do not support arguments passed on the stack. We
1346           // could do so by using `WriteGenericOperand()` here, but may need to
1347           // add features to handle situations where the stack is or is not set
1348           // up.
1349           (simulator->WriteCPURegister(abi.GetNextParameterGenericOperand<P>()
1350                                            .GetCPURegister(),
1351                                        arguments),
1352            arguments)...};
1353       simulator->RunFrom(code);
1354     }
1355   };
1356 #endif
1357 
1358   // Execution ends when the PC hits this address.
1359   static const Instruction* kEndOfSimAddress;
1360 
1361   // Simulation helpers.
1362   bool IsSimulationFinished() const { return pc_ == kEndOfSimAddress; }
1363 
1364   const Instruction* ReadPc() const { return pc_; }
1365   VIXL_DEPRECATED("ReadPc", const Instruction* pc() const) { return ReadPc(); }
1366 
1367   enum BranchLogMode { LogBranches, NoBranchLog };
1368 
1369   void WritePc(const Instruction* new_pc,
1370                BranchLogMode log_mode = LogBranches) {
1371     if (log_mode == LogBranches) LogTakenBranch(new_pc);
1372     pc_ = AddressUntag(new_pc);
1373     pc_modified_ = true;
1374   }
1375   VIXL_DEPRECATED("WritePc", void set_pc(const Instruction* new_pc)) {
1376     return WritePc(new_pc);
1377   }
1378 
1379   void IncrementPc() {
1380     if (!pc_modified_) {
1381       pc_ = pc_->GetNextInstruction();
1382     }
1383   }
1384   VIXL_DEPRECATED("IncrementPc", void increment_pc()) { IncrementPc(); }
1385 
1386   BType ReadBType() const { return btype_; }
1387   void WriteNextBType(BType btype) { next_btype_ = btype; }
1388   void UpdateBType() {
1389     btype_ = next_btype_;
1390     next_btype_ = DefaultBType;
1391   }
1392 
1393   // Helper function to determine BType for branches.
1394   BType GetBTypeFromInstruction(const Instruction* instr) const;
1395 
1396   bool PcIsInGuardedPage() const { return guard_pages_; }
1397   void SetGuardedPages(bool guard_pages) { guard_pages_ = guard_pages; }
1398 
1399   const Instruction* GetLastExecutedInstruction() const { return last_instr_; }
1400 
1401   void ExecuteInstruction() {
1402     // The program counter should always be aligned.
1403     VIXL_ASSERT(IsWordAligned(pc_));
1404     pc_modified_ = false;
1405 
1406     // On guarded pages, if BType is not zero, take an exception on any
1407     // instruction other than BTI, PACI[AB]SP, HLT or BRK.
1408     if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) {
1409       if (pc_->IsPAuth()) {
1410         Instr i = pc_->Mask(SystemPAuthMask);
1411         if ((i != PACIASP) && (i != PACIBSP)) {
1412           VIXL_ABORT_WITH_MSG(
1413               "Executing non-BTI instruction with wrong BType.");
1414         }
1415       } else if (!pc_->IsBti() && !pc_->IsException()) {
1416         VIXL_ABORT_WITH_MSG("Executing non-BTI instruction with wrong BType.");
1417       }
1418     }
1419 
1420     bool last_instr_was_movprfx =
1421         (form_hash_ == "movprfx_z_z"_h) || (form_hash_ == "movprfx_z_p_z"_h);
1422 
1423     // decoder_->Decode(...) triggers at least the following visitors:
1424     //  1. The CPUFeaturesAuditor (`cpu_features_auditor_`).
1425     //  2. The PrintDisassembler (`print_disasm_`), if enabled.
1426     //  3. The Simulator (`this`).
1427     // User can add additional visitors at any point, but the Simulator requires
1428     // that the ordering above is preserved.
1429     decoder_->Decode(pc_);
1430 
1431     if (last_instr_was_movprfx) {
1432       VIXL_ASSERT(last_instr_ != NULL);
1433       VIXL_CHECK(pc_->CanTakeSVEMovprfx(form_hash_, last_instr_));
1434     }
1435 
1436     last_instr_ = ReadPc();
1437     IncrementPc();
1438     LogAllWrittenRegisters();
1439     UpdateBType();
1440 
1441     VIXL_CHECK(cpu_features_auditor_.InstructionIsAvailable());
1442   }
1443 
1444   virtual void Visit(Metadata* metadata,
1445                      const Instruction* instr) VIXL_OVERRIDE;
1446 
1447 #define DECLARE(A) virtual void Visit##A(const Instruction* instr);
1448   VISITOR_LIST_THAT_RETURN(DECLARE)
1449 #undef DECLARE
1450 #define DECLARE(A) \
1451   VIXL_NO_RETURN virtual void Visit##A(const Instruction* instr);
1452   VISITOR_LIST_THAT_DONT_RETURN(DECLARE)
1453 #undef DECLARE
1454 
1455   void Simulate_PdT_PgZ_ZnT_ZmT(const Instruction* instr);
1456   void Simulate_PdT_Xn_Xm(const Instruction* instr);
1457   void Simulate_ZdB_Zn1B_Zn2B_imm(const Instruction* instr);
1458   void Simulate_ZdB_ZnB_ZmB(const Instruction* instr);
1459   void Simulate_ZdD_ZnD_ZmD_imm(const Instruction* instr);
1460   void Simulate_ZdH_PgM_ZnS(const Instruction* instr);
1461   void Simulate_ZdH_ZnH_ZmH_imm(const Instruction* instr);
1462   void Simulate_ZdS_PgM_ZnD(const Instruction* instr);
1463   void Simulate_ZdS_PgM_ZnS(const Instruction* instr);
1464   void Simulate_ZdS_ZnS_ZmS_imm(const Instruction* instr);
1465   void Simulate_ZdT_PgM_ZnT(const Instruction* instr);
1466   void Simulate_ZdT_PgZ_ZnT_ZmT(const Instruction* instr);
1467   void Simulate_ZdT_ZnT_ZmT(const Instruction* instr);
1468   void Simulate_ZdT_ZnT_ZmTb(const Instruction* instr);
1469   void Simulate_ZdT_ZnT_const(const Instruction* instr);
1470   void Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr);
1471   void Simulate_ZdaH_ZnH_ZmH_imm_const(const Instruction* instr);
1472   void Simulate_ZdaS_ZnH_ZmH(const Instruction* instr);
1473   void Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr);
1474   void Simulate_ZdaS_ZnS_ZmS_imm_const(const Instruction* instr);
1475   void Simulate_ZdaT_PgM_ZnTb(const Instruction* instr);
1476   void Simulate_ZdaT_ZnT_ZmT(const Instruction* instr);
1477   void Simulate_ZdaT_ZnT_const(const Instruction* instr);
1478   void Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr);
1479   void Simulate_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr);
1480   void Simulate_ZdnT_PgM_ZdnT_const(const Instruction* instr);
1481   void Simulate_ZdnT_ZdnT_ZmT_const(const Instruction* instr);
1482   void Simulate_ZtD_PgZ_ZnD_Xm(const Instruction* instr);
1483   void Simulate_ZtD_Pg_ZnD_Xm(const Instruction* instr);
1484   void Simulate_ZtS_PgZ_ZnS_Xm(const Instruction* instr);
1485   void Simulate_ZtS_Pg_ZnS_Xm(const Instruction* instr);
1486 
1487   void SimulateSVEHalvingAddSub(const Instruction* instr);
1488   void SimulateSVESaturatingArithmetic(const Instruction* instr);
1489   void SimulateSVEIntArithPair(const Instruction* instr);
1490   void SimulateSVENarrow(const Instruction* instr);
1491   void SimulateSVEInterleavedArithLong(const Instruction* instr);
1492   void SimulateSVEShiftLeftImm(const Instruction* instr);
1493   void SimulateSVEAddSubCarry(const Instruction* instr);
1494   void SimulateSVEAddSubHigh(const Instruction* instr);
1495   void SimulateSVEIntMulLongVec(const Instruction* instr);
1496   void SimulateSVESaturatingIntMulLongIdx(const Instruction* instr);
1497   void SimulateSVEExclusiveOrRotate(const Instruction* instr);
1498   void SimulateSVEBitwiseTernary(const Instruction* instr);
1499   void SimulateSVEComplexDotProduct(const Instruction* instr);
1500   void SimulateSVEMulIndex(const Instruction* instr);
1501   void SimulateSVEMlaMlsIndex(const Instruction* instr);
1502   void SimulateSVEComplexIntMulAdd(const Instruction* instr);
1503   void SimulateSVESaturatingMulAddHigh(const Instruction* instr);
1504   void SimulateSVESaturatingMulHighIndex(const Instruction* instr);
1505   void SimulateSVEFPConvertLong(const Instruction* instr);
1506   void SimulateSVEPmull128(const Instruction* instr);
1507   void SimulateMatrixMul(const Instruction* instr);
1508   void SimulateSVEFPMatrixMul(const Instruction* instr);
1509   void SimulateNEONMulByElementLong(const Instruction* instr);
1510   void SimulateNEONFPMulByElement(const Instruction* instr);
1511   void SimulateNEONFPMulByElementLong(const Instruction* instr);
1512   void SimulateNEONComplexMulByElement(const Instruction* instr);
1513   void SimulateNEONDotProdByElement(const Instruction* instr);
1514   void SimulateNEONSHA3(const Instruction* instr);
1515   void SimulateMTEAddSubTag(const Instruction* instr);
1516   void SimulateMTETagMaskInsert(const Instruction* instr);
1517   void SimulateMTESubPointer(const Instruction* instr);
1518   void SimulateMTELoadTag(const Instruction* instr);
1519   void SimulateMTEStoreTag(const Instruction* instr);
1520   void SimulateMTEStoreTagPair(const Instruction* instr);
1521   void Simulate_XdSP_XnSP_Xm(const Instruction* instr);
1522   void SimulateCpy(const Instruction* instr);
1523   void SimulateCpyFP(const Instruction* instr);
1524   void SimulateCpyP(const Instruction* instr);
1525   void SimulateCpyM(const Instruction* instr);
1526   void SimulateCpyE(const Instruction* instr);
1527   void SimulateSetP(const Instruction* instr);
1528   void SimulateSetM(const Instruction* instr);
1529   void SimulateSetE(const Instruction* instr);
1530   void SimulateSetGP(const Instruction* instr);
1531   void SimulateSetGM(const Instruction* instr);
1532   void SimulateSignedMinMax(const Instruction* instr);
1533   void SimulateUnsignedMinMax(const Instruction* instr);
1534   void SimulateSHA512(const Instruction* instr);
1535 
1536   void VisitCryptoSM3(const Instruction* instr);
1537   void VisitCryptoSM4(const Instruction* instr);
1538 
1539   // Integer register accessors.
1540 
1541   // Basic accessor: Read the register as the specified type.
1542   template <typename T>
1543   T ReadRegister(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) const {
1544     VIXL_ASSERT(
1545         code < kNumberOfRegisters ||
1546         ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode)));
1547     if ((code == 31) && (r31mode == Reg31IsZeroRegister)) {
1548       T result;
1549       memset(&result, 0, sizeof(result));
1550       return result;
1551     }
1552     if ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode)) {
1553       code = 31;
1554     }
1555     return registers_[code].Get<T>();
1556   }
1557   template <typename T>
1558   VIXL_DEPRECATED("ReadRegister",
1559                   T reg(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister)
1560                       const) {
1561     return ReadRegister<T>(code, r31mode);
1562   }
1563 
1564   // Common specialized accessors for the ReadRegister() template.
1565   int32_t ReadWRegister(unsigned code,
1566                         Reg31Mode r31mode = Reg31IsZeroRegister) const {
1567     return ReadRegister<int32_t>(code, r31mode);
1568   }
1569   VIXL_DEPRECATED("ReadWRegister",
1570                   int32_t wreg(unsigned code,
1571                                Reg31Mode r31mode = Reg31IsZeroRegister) const) {
1572     return ReadWRegister(code, r31mode);
1573   }
1574 
1575   int64_t ReadXRegister(unsigned code,
1576                         Reg31Mode r31mode = Reg31IsZeroRegister) const {
1577     return ReadRegister<int64_t>(code, r31mode);
1578   }
1579   VIXL_DEPRECATED("ReadXRegister",
1580                   int64_t xreg(unsigned code,
1581                                Reg31Mode r31mode = Reg31IsZeroRegister) const) {
1582     return ReadXRegister(code, r31mode);
1583   }
1584 
1585   SimPRegister& ReadPRegister(unsigned code) {
1586     VIXL_ASSERT(code < kNumberOfPRegisters);
1587     return pregisters_[code];
1588   }
1589 
1590   SimFFRRegister& ReadFFR() { return ffr_register_; }
1591 
1592   // As above, with parameterized size and return type. The value is
1593   // either zero-extended or truncated to fit, as required.
1594   template <typename T>
1595   T ReadRegister(unsigned size,
1596                  unsigned code,
1597                  Reg31Mode r31mode = Reg31IsZeroRegister) const {
1598     uint64_t raw;
1599     switch (size) {
1600       case kWRegSize:
1601         raw = ReadRegister<uint32_t>(code, r31mode);
1602         break;
1603       case kXRegSize:
1604         raw = ReadRegister<uint64_t>(code, r31mode);
1605         break;
1606       default:
1607         VIXL_UNREACHABLE();
1608         return 0;
1609     }
1610 
1611     T result;
1612     VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw));
1613     // Copy the result and truncate to fit. This assumes a little-endian host.
1614     memcpy(&result, &raw, sizeof(result));
1615     return result;
1616   }
1617   template <typename T>
1618   VIXL_DEPRECATED("ReadRegister",
1619                   T reg(unsigned size,
1620                         unsigned code,
1621                         Reg31Mode r31mode = Reg31IsZeroRegister) const) {
1622     return ReadRegister<T>(size, code, r31mode);
1623   }
1624 
1625   // Use int64_t by default if T is not specified.
1626   int64_t ReadRegister(unsigned size,
1627                        unsigned code,
1628                        Reg31Mode r31mode = Reg31IsZeroRegister) const {
1629     return ReadRegister<int64_t>(size, code, r31mode);
1630   }
1631   VIXL_DEPRECATED("ReadRegister",
1632                   int64_t reg(unsigned size,
1633                               unsigned code,
1634                               Reg31Mode r31mode = Reg31IsZeroRegister) const) {
1635     return ReadRegister(size, code, r31mode);
1636   }
1637 
1638   enum RegLogMode { LogRegWrites, NoRegLog };
1639 
1640   // Write 'value' into an integer register. The value is zero-extended. This
1641   // behaviour matches AArch64 register writes.
1642   //
1643   // SP may be specified in one of two ways:
1644   //  - (code == kSPRegInternalCode) && (r31mode == Reg31IsZeroRegister)
1645   //  - (code == 31) && (r31mode == Reg31IsStackPointer)
1646   template <typename T>
1647   void WriteRegister(unsigned code,
1648                      T value,
1649                      RegLogMode log_mode = LogRegWrites,
1650                      Reg31Mode r31mode = Reg31IsZeroRegister) {
1651     if (sizeof(T) < kWRegSizeInBytes) {
1652       // We use a C-style cast on purpose here.
1653       // Since we do not have access to 'constepxr if', the casts in this `if`
1654       // must be valid even if we know the code will never be executed, in
1655       // particular when `T` is a pointer type.
1656       int64_t tmp_64bit = (int64_t)value;
1657       int32_t tmp_32bit = static_cast<int32_t>(tmp_64bit);
1658       WriteRegister<int32_t>(code, tmp_32bit, log_mode, r31mode);
1659       return;
1660     }
1661 
1662     VIXL_ASSERT((sizeof(T) == kWRegSizeInBytes) ||
1663                 (sizeof(T) == kXRegSizeInBytes));
1664     VIXL_ASSERT(
1665         (code < kNumberOfRegisters) ||
1666         ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode)));
1667 
1668     if (code == 31) {
1669       if (r31mode == Reg31IsZeroRegister) {
1670         // Discard writes to the zero register.
1671         return;
1672       } else {
1673         code = kSPRegInternalCode;
1674       }
1675     }
1676 
1677     // registers_[31] is the stack pointer.
1678     VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31);
1679     registers_[code % kNumberOfRegisters].Write(value);
1680 
1681     if (log_mode == LogRegWrites) {
1682       LogRegister(code, GetPrintRegisterFormatForSize(sizeof(T)));
1683     }
1684   }
1685   template <typename T>
1686   VIXL_DEPRECATED("WriteRegister",
1687                   void set_reg(unsigned code,
1688                                T value,
1689                                RegLogMode log_mode = LogRegWrites,
1690                                Reg31Mode r31mode = Reg31IsZeroRegister)) {
1691     WriteRegister<T>(code, value, log_mode, r31mode);
1692   }
1693 
1694   // Common specialized accessors for the set_reg() template.
1695   void WriteWRegister(unsigned code,
1696                       int32_t value,
1697                       RegLogMode log_mode = LogRegWrites,
1698                       Reg31Mode r31mode = Reg31IsZeroRegister) {
1699     WriteRegister(code, value, log_mode, r31mode);
1700   }
1701   VIXL_DEPRECATED("WriteWRegister",
1702                   void set_wreg(unsigned code,
1703                                 int32_t value,
1704                                 RegLogMode log_mode = LogRegWrites,
1705                                 Reg31Mode r31mode = Reg31IsZeroRegister)) {
1706     WriteWRegister(code, value, log_mode, r31mode);
1707   }
1708 
1709   void WriteXRegister(unsigned code,
1710                       int64_t value,
1711                       RegLogMode log_mode = LogRegWrites,
1712                       Reg31Mode r31mode = Reg31IsZeroRegister) {
1713     WriteRegister(code, value, log_mode, r31mode);
1714   }
1715   VIXL_DEPRECATED("WriteXRegister",
1716                   void set_xreg(unsigned code,
1717                                 int64_t value,
1718                                 RegLogMode log_mode = LogRegWrites,
1719                                 Reg31Mode r31mode = Reg31IsZeroRegister)) {
1720     WriteXRegister(code, value, log_mode, r31mode);
1721   }
1722 
1723   // As above, with parameterized size and type. The value is either
1724   // zero-extended or truncated to fit, as required.
1725   template <typename T>
1726   void WriteRegister(unsigned size,
1727                      unsigned code,
1728                      T value,
1729                      RegLogMode log_mode = LogRegWrites,
1730                      Reg31Mode r31mode = Reg31IsZeroRegister) {
1731     // Zero-extend the input.
1732     uint64_t raw = 0;
1733     VIXL_STATIC_ASSERT(sizeof(value) <= sizeof(raw));
1734     memcpy(&raw, &value, sizeof(value));
1735 
1736     // Write (and possibly truncate) the value.
1737     switch (size) {
1738       case kWRegSize:
1739         WriteRegister(code, static_cast<uint32_t>(raw), log_mode, r31mode);
1740         break;
1741       case kXRegSize:
1742         WriteRegister(code, raw, log_mode, r31mode);
1743         break;
1744       default:
1745         VIXL_UNREACHABLE();
1746         return;
1747     }
1748   }
1749   template <typename T>
1750   VIXL_DEPRECATED("WriteRegister",
1751                   void set_reg(unsigned size,
1752                                unsigned code,
1753                                T value,
1754                                RegLogMode log_mode = LogRegWrites,
1755                                Reg31Mode r31mode = Reg31IsZeroRegister)) {
1756     WriteRegister(size, code, value, log_mode, r31mode);
1757   }
1758 
1759   // Common specialized accessors for the set_reg() template.
1760 
1761   // Commonly-used special cases.
1762   template <typename T>
1763   void WriteLr(T value) {
1764     WriteRegister(kLinkRegCode, value);
1765   }
1766   template <typename T>
1767   VIXL_DEPRECATED("WriteLr", void set_lr(T value)) {
1768     WriteLr(value);
1769   }
1770 
1771   template <typename T>
1772   void WriteSp(T value) {
1773     WriteRegister(31, value, LogRegWrites, Reg31IsStackPointer);
1774   }
1775   template <typename T>
1776   VIXL_DEPRECATED("WriteSp", void set_sp(T value)) {
1777     WriteSp(value);
1778   }
1779 
1780   // Vector register accessors.
1781   // These are equivalent to the integer register accessors, but for vector
1782   // registers.
1783 
1784   // A structure for representing a 128-bit Q register.
1785   struct qreg_t {
1786     uint8_t val[kQRegSizeInBytes];
1787   };
1788 
1789   // A structure for representing a SVE Z register.
1790   struct zreg_t {
1791     uint8_t val[kZRegMaxSizeInBytes];
1792   };
1793 
1794   // Basic accessor: read the register as the specified type.
1795   template <typename T>
1796   T ReadVRegister(unsigned code) const {
1797     VIXL_STATIC_ASSERT(
1798         (sizeof(T) == kBRegSizeInBytes) || (sizeof(T) == kHRegSizeInBytes) ||
1799         (sizeof(T) == kSRegSizeInBytes) || (sizeof(T) == kDRegSizeInBytes) ||
1800         (sizeof(T) == kQRegSizeInBytes));
1801     VIXL_ASSERT(code < kNumberOfVRegisters);
1802 
1803     return vregisters_[code].Get<T>();
1804   }
1805   template <typename T>
1806   VIXL_DEPRECATED("ReadVRegister", T vreg(unsigned code) const) {
1807     return ReadVRegister<T>(code);
1808   }
1809 
1810   // Common specialized accessors for the vreg() template.
1811   int8_t ReadBRegister(unsigned code) const {
1812     return ReadVRegister<int8_t>(code);
1813   }
1814   VIXL_DEPRECATED("ReadBRegister", int8_t breg(unsigned code) const) {
1815     return ReadBRegister(code);
1816   }
1817 
1818   vixl::internal::SimFloat16 ReadHRegister(unsigned code) const {
1819     return RawbitsToFloat16(ReadHRegisterBits(code));
1820   }
1821   VIXL_DEPRECATED("ReadHRegister", int16_t hreg(unsigned code) const) {
1822     return Float16ToRawbits(ReadHRegister(code));
1823   }
1824 
1825   uint16_t ReadHRegisterBits(unsigned code) const {
1826     return ReadVRegister<uint16_t>(code);
1827   }
1828 
1829   float ReadSRegister(unsigned code) const {
1830     return ReadVRegister<float>(code);
1831   }
1832   VIXL_DEPRECATED("ReadSRegister", float sreg(unsigned code) const) {
1833     return ReadSRegister(code);
1834   }
1835 
1836   uint32_t ReadSRegisterBits(unsigned code) const {
1837     return ReadVRegister<uint32_t>(code);
1838   }
1839   VIXL_DEPRECATED("ReadSRegisterBits",
1840                   uint32_t sreg_bits(unsigned code) const) {
1841     return ReadSRegisterBits(code);
1842   }
1843 
1844   double ReadDRegister(unsigned code) const {
1845     return ReadVRegister<double>(code);
1846   }
1847   VIXL_DEPRECATED("ReadDRegister", double dreg(unsigned code) const) {
1848     return ReadDRegister(code);
1849   }
1850 
1851   uint64_t ReadDRegisterBits(unsigned code) const {
1852     return ReadVRegister<uint64_t>(code);
1853   }
1854   VIXL_DEPRECATED("ReadDRegisterBits",
1855                   uint64_t dreg_bits(unsigned code) const) {
1856     return ReadDRegisterBits(code);
1857   }
1858 
1859   qreg_t ReadQRegister(unsigned code) const {
1860     return ReadVRegister<qreg_t>(code);
1861   }
1862   VIXL_DEPRECATED("ReadQRegister", qreg_t qreg(unsigned code) const) {
1863     return ReadQRegister(code);
1864   }
1865 
1866   // As above, with parameterized size and return type. The value is
1867   // either zero-extended or truncated to fit, as required.
1868   template <typename T>
1869   T ReadVRegister(unsigned size, unsigned code) const {
1870     uint64_t raw = 0;
1871     T result;
1872 
1873     switch (size) {
1874       case kSRegSize:
1875         raw = ReadVRegister<uint32_t>(code);
1876         break;
1877       case kDRegSize:
1878         raw = ReadVRegister<uint64_t>(code);
1879         break;
1880       default:
1881         VIXL_UNREACHABLE();
1882         break;
1883     }
1884 
1885     VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw));
1886     // Copy the result and truncate to fit. This assumes a little-endian host.
1887     memcpy(&result, &raw, sizeof(result));
1888     return result;
1889   }
1890   template <typename T>
1891   VIXL_DEPRECATED("ReadVRegister", T vreg(unsigned size, unsigned code) const) {
1892     return ReadVRegister<T>(size, code);
1893   }
1894 
1895   SimVRegister& ReadVRegister(unsigned code) { return vregisters_[code]; }
1896   VIXL_DEPRECATED("ReadVRegister", SimVRegister& vreg(unsigned code)) {
1897     return ReadVRegister(code);
1898   }
1899 
1900   // Basic accessor: Write the specified value.
1901   template <typename T>
1902   void WriteVRegister(unsigned code,
1903                       T value,
1904                       RegLogMode log_mode = LogRegWrites) {
1905     VIXL_STATIC_ASSERT((sizeof(value) == kBRegSizeInBytes) ||
1906                        (sizeof(value) == kHRegSizeInBytes) ||
1907                        (sizeof(value) == kSRegSizeInBytes) ||
1908                        (sizeof(value) == kDRegSizeInBytes) ||
1909                        (sizeof(value) == kQRegSizeInBytes) ||
1910                        (sizeof(value) == kZRegMaxSizeInBytes));
1911     VIXL_ASSERT(code < kNumberOfVRegisters);
1912     vregisters_[code].Write(value);
1913 
1914     if (log_mode == LogRegWrites) {
1915       LogVRegister(code, GetPrintRegisterFormat(value));
1916     }
1917   }
1918   template <typename T>
1919   VIXL_DEPRECATED("WriteVRegister",
1920                   void set_vreg(unsigned code,
1921                                 T value,
1922                                 RegLogMode log_mode = LogRegWrites)) {
1923     WriteVRegister(code, value, log_mode);
1924   }
1925 
1926   // Common specialized accessors for the WriteVRegister() template.
1927   void WriteBRegister(unsigned code,
1928                       int8_t value,
1929                       RegLogMode log_mode = LogRegWrites) {
1930     WriteVRegister(code, value, log_mode);
1931   }
1932   VIXL_DEPRECATED("WriteBRegister",
1933                   void set_breg(unsigned code,
1934                                 int8_t value,
1935                                 RegLogMode log_mode = LogRegWrites)) {
1936     return WriteBRegister(code, value, log_mode);
1937   }
1938 
1939   void WriteHRegister(unsigned code,
1940                       vixl::internal::SimFloat16 value,
1941                       RegLogMode log_mode = LogRegWrites) {
1942     WriteVRegister(code, Float16ToRawbits(value), log_mode);
1943   }
1944 
1945   void WriteHRegister(unsigned code,
1946                       int16_t value,
1947                       RegLogMode log_mode = LogRegWrites) {
1948     WriteVRegister(code, value, log_mode);
1949   }
1950   VIXL_DEPRECATED("WriteHRegister",
1951                   void set_hreg(unsigned code,
1952                                 int16_t value,
1953                                 RegLogMode log_mode = LogRegWrites)) {
1954     return WriteHRegister(code, value, log_mode);
1955   }
1956 
1957   void WriteSRegister(unsigned code,
1958                       float value,
1959                       RegLogMode log_mode = LogRegWrites) {
1960     WriteVRegister(code, value, log_mode);
1961   }
1962   VIXL_DEPRECATED("WriteSRegister",
1963                   void set_sreg(unsigned code,
1964                                 float value,
1965                                 RegLogMode log_mode = LogRegWrites)) {
1966     WriteSRegister(code, value, log_mode);
1967   }
1968 
1969   void WriteSRegisterBits(unsigned code,
1970                           uint32_t value,
1971                           RegLogMode log_mode = LogRegWrites) {
1972     WriteVRegister(code, value, log_mode);
1973   }
1974   VIXL_DEPRECATED("WriteSRegisterBits",
1975                   void set_sreg_bits(unsigned code,
1976                                      uint32_t value,
1977                                      RegLogMode log_mode = LogRegWrites)) {
1978     WriteSRegisterBits(code, value, log_mode);
1979   }
1980 
1981   void WriteDRegister(unsigned code,
1982                       double value,
1983                       RegLogMode log_mode = LogRegWrites) {
1984     WriteVRegister(code, value, log_mode);
1985   }
1986   VIXL_DEPRECATED("WriteDRegister",
1987                   void set_dreg(unsigned code,
1988                                 double value,
1989                                 RegLogMode log_mode = LogRegWrites)) {
1990     WriteDRegister(code, value, log_mode);
1991   }
1992 
1993   void WriteDRegisterBits(unsigned code,
1994                           uint64_t value,
1995                           RegLogMode log_mode = LogRegWrites) {
1996     WriteVRegister(code, value, log_mode);
1997   }
1998   VIXL_DEPRECATED("WriteDRegisterBits",
1999                   void set_dreg_bits(unsigned code,
2000                                      uint64_t value,
2001                                      RegLogMode log_mode = LogRegWrites)) {
2002     WriteDRegisterBits(code, value, log_mode);
2003   }
2004 
2005   void WriteQRegister(unsigned code,
2006                       qreg_t value,
2007                       RegLogMode log_mode = LogRegWrites) {
2008     WriteVRegister(code, value, log_mode);
2009   }
2010   VIXL_DEPRECATED("WriteQRegister",
2011                   void set_qreg(unsigned code,
2012                                 qreg_t value,
2013                                 RegLogMode log_mode = LogRegWrites)) {
2014     WriteQRegister(code, value, log_mode);
2015   }
2016 
2017   void WriteZRegister(unsigned code,
2018                       zreg_t value,
2019                       RegLogMode log_mode = LogRegWrites) {
2020     WriteVRegister(code, value, log_mode);
2021   }
2022 
2023   template <typename T>
2024   T ReadRegister(Register reg) const {
2025     return ReadRegister<T>(reg.GetCode(), Reg31IsZeroRegister);
2026   }
2027 
2028   template <typename T>
2029   void WriteRegister(Register reg,
2030                      T value,
2031                      RegLogMode log_mode = LogRegWrites) {
2032     WriteRegister<T>(reg.GetCode(), value, log_mode, Reg31IsZeroRegister);
2033   }
2034 
2035   template <typename T>
2036   T ReadVRegister(VRegister vreg) const {
2037     return ReadVRegister<T>(vreg.GetCode());
2038   }
2039 
2040   template <typename T>
2041   void WriteVRegister(VRegister vreg,
2042                       T value,
2043                       RegLogMode log_mode = LogRegWrites) {
2044     WriteVRegister<T>(vreg.GetCode(), value, log_mode);
2045   }
2046 
2047   template <typename T>
2048   T ReadCPURegister(CPURegister reg) const {
2049     if (reg.IsVRegister()) {
2050       return ReadVRegister<T>(VRegister(reg));
2051     } else {
2052       return ReadRegister<T>(Register(reg));
2053     }
2054   }
2055 
2056   template <typename T>
2057   void WriteCPURegister(CPURegister reg,
2058                         T value,
2059                         RegLogMode log_mode = LogRegWrites) {
2060     if (reg.IsVRegister()) {
2061       WriteVRegister<T>(VRegister(reg), value, log_mode);
2062     } else {
2063       WriteRegister<T>(Register(reg), value, log_mode);
2064     }
2065   }
2066 
2067   template <typename T, typename A>
2068   std::optional<T> MemRead(A address) const {
2069     Instruction const* pc = ReadPc();
2070     return memory_.Read<T>(address, pc);
2071   }
2072 
2073   template <typename T, typename A>
2074   bool MemWrite(A address, T value) const {
2075     Instruction const* pc = ReadPc();
2076     return memory_.Write(address, value, pc);
2077   }
2078 
2079   template <typename A>
2080   std::optional<uint64_t> MemReadUint(int size_in_bytes, A address) const {
2081     return memory_.ReadUint(size_in_bytes, address);
2082   }
2083 
2084   template <typename A>
2085   std::optional<int64_t> MemReadInt(int size_in_bytes, A address) const {
2086     return memory_.ReadInt(size_in_bytes, address);
2087   }
2088 
2089   template <typename A>
2090   bool MemWrite(int size_in_bytes, A address, uint64_t value) const {
2091     return memory_.Write(size_in_bytes, address, value);
2092   }
2093 
2094   bool LoadLane(LogicVRegister dst,
2095                 VectorFormat vform,
2096                 int index,
2097                 uint64_t addr) const {
2098     unsigned msize_in_bytes = LaneSizeInBytesFromFormat(vform);
2099     return LoadUintToLane(dst, vform, msize_in_bytes, index, addr);
2100   }
2101 
2102   bool LoadUintToLane(LogicVRegister dst,
2103                       VectorFormat vform,
2104                       unsigned msize_in_bytes,
2105                       int index,
2106                       uint64_t addr) const {
2107     VIXL_DEFINE_OR_RETURN_FALSE(value, MemReadUint(msize_in_bytes, addr));
2108     dst.SetUint(vform, index, value);
2109     return true;
2110   }
2111 
2112   bool LoadIntToLane(LogicVRegister dst,
2113                      VectorFormat vform,
2114                      unsigned msize_in_bytes,
2115                      int index,
2116                      uint64_t addr) const {
2117     VIXL_DEFINE_OR_RETURN_FALSE(value, MemReadInt(msize_in_bytes, addr));
2118     dst.SetInt(vform, index, value);
2119     return true;
2120   }
2121 
2122   bool StoreLane(const LogicVRegister& src,
2123                  VectorFormat vform,
2124                  int index,
2125                  uint64_t addr) const {
2126     unsigned msize_in_bytes = LaneSizeInBytesFromFormat(vform);
2127     return MemWrite(msize_in_bytes, addr, src.Uint(vform, index));
2128   }
2129 
2130   uint64_t ComputeMemOperandAddress(const MemOperand& mem_op) const;
2131 
2132   template <typename T>
2133   T ReadGenericOperand(GenericOperand operand) const {
2134     if (operand.IsCPURegister()) {
2135       return ReadCPURegister<T>(operand.GetCPURegister());
2136     } else {
2137       VIXL_ASSERT(operand.IsMemOperand());
2138       auto res = MemRead<T>(ComputeMemOperandAddress(operand.GetMemOperand()));
2139       VIXL_ASSERT(res);
2140       return *res;
2141     }
2142   }
2143 
2144   template <typename T>
2145   bool WriteGenericOperand(GenericOperand operand,
2146                            T value,
2147                            RegLogMode log_mode = LogRegWrites) {
2148     if (operand.IsCPURegister()) {
2149       // Outside SIMD, registers are 64-bit or a subset of a 64-bit register. If
2150       // the width of the value to write is smaller than 64 bits, the unused
2151       // bits may contain unrelated values that the code following this write
2152       // needs to handle gracefully.
2153       // Here we fill the unused bits with a predefined pattern to catch issues
2154       // early.
2155       VIXL_ASSERT(operand.GetCPURegister().GetSizeInBits() <= 64);
2156       uint64_t raw = 0xdeadda1adeadda1a;
2157       memcpy(&raw, &value, sizeof(value));
2158       WriteCPURegister(operand.GetCPURegister(), raw, log_mode);
2159     } else {
2160       VIXL_ASSERT(operand.IsMemOperand());
2161       return MemWrite(ComputeMemOperandAddress(operand.GetMemOperand()), value);
2162     }
2163     return true;
2164   }
2165 
2166   bool ReadN() const { return nzcv_.GetN() != 0; }
2167   VIXL_DEPRECATED("ReadN", bool N() const) { return ReadN(); }
2168 
2169   bool ReadZ() const { return nzcv_.GetZ() != 0; }
2170   VIXL_DEPRECATED("ReadZ", bool Z() const) { return ReadZ(); }
2171 
2172   bool ReadC() const { return nzcv_.GetC() != 0; }
2173   VIXL_DEPRECATED("ReadC", bool C() const) { return ReadC(); }
2174 
2175   bool ReadV() const { return nzcv_.GetV() != 0; }
2176   VIXL_DEPRECATED("ReadV", bool V() const) { return ReadV(); }
2177 
2178   SimSystemRegister& ReadNzcv() { return nzcv_; }
2179   VIXL_DEPRECATED("ReadNzcv", SimSystemRegister& nzcv()) { return ReadNzcv(); }
2180 
2181   // TODO: Find a way to make the fpcr_ members return the proper types, so
2182   // these accessors are not necessary.
2183   FPRounding ReadRMode() const {
2184     return static_cast<FPRounding>(fpcr_.GetRMode());
2185   }
2186   VIXL_DEPRECATED("ReadRMode", FPRounding RMode()) { return ReadRMode(); }
2187 
2188   UseDefaultNaN ReadDN() const {
2189     return fpcr_.GetDN() != 0 ? kUseDefaultNaN : kIgnoreDefaultNaN;
2190   }
2191 
2192   VIXL_DEPRECATED("ReadDN", bool DN()) {
2193     return ReadDN() == kUseDefaultNaN ? true : false;
2194   }
2195 
2196   SimSystemRegister& ReadFpcr() { return fpcr_; }
2197   VIXL_DEPRECATED("ReadFpcr", SimSystemRegister& fpcr()) { return ReadFpcr(); }
2198 
2199   // Specify relevant register formats for Print(V)Register and related helpers.
2200   enum PrintRegisterFormat {
2201     // The lane size.
2202     kPrintRegLaneSizeB = 0 << 0,
2203     kPrintRegLaneSizeH = 1 << 0,
2204     kPrintRegLaneSizeS = 2 << 0,
2205     kPrintRegLaneSizeW = kPrintRegLaneSizeS,
2206     kPrintRegLaneSizeD = 3 << 0,
2207     kPrintRegLaneSizeX = kPrintRegLaneSizeD,
2208     kPrintRegLaneSizeQ = 4 << 0,
2209     kPrintRegLaneSizeUnknown = 5 << 0,
2210 
2211     kPrintRegLaneSizeOffset = 0,
2212     kPrintRegLaneSizeMask = 7 << 0,
2213 
2214     // The overall register size.
2215     kPrintRegAsScalar = 0,
2216     kPrintRegAsDVector = 1 << 3,
2217     kPrintRegAsQVector = 2 << 3,
2218     kPrintRegAsSVEVector = 3 << 3,
2219 
2220     kPrintRegAsVectorMask = 3 << 3,
2221 
2222     // Indicate floating-point format lanes. (This flag is only supported for
2223     // S-, H-, and D-sized lanes.)
2224     kPrintRegAsFP = 1 << 5,
2225 
2226     // With this flag, print helpers won't check that the upper bits are zero.
2227     // This also forces the register name to be printed with the `reg<msb:0>`
2228     // format.
2229     //
2230     // The flag is supported with any PrintRegisterFormat other than those with
2231     // kPrintRegAsSVEVector.
2232     kPrintRegPartial = 1 << 6,
2233 
2234 // Supported combinations.
2235 // These exist so that they can be referred to by name, but also because C++
2236 // does not allow enum types to hold values that aren't explicitly
2237 // enumerated, and we want to be able to combine the above flags.
2238 
2239 // Scalar formats.
2240 #define VIXL_DECL_PRINT_REG_SCALAR(size)                           \
2241   kPrint##size##Reg = kPrintRegLaneSize##size | kPrintRegAsScalar, \
2242   kPrint##size##RegPartial = kPrintRegLaneSize##size | kPrintRegPartial
2243 #define VIXL_DECL_PRINT_REG_SCALAR_FP(size)                  \
2244   VIXL_DECL_PRINT_REG_SCALAR(size)                           \
2245   , kPrint##size##RegFP = kPrint##size##Reg | kPrintRegAsFP, \
2246     kPrint##size##RegPartialFP = kPrint##size##RegPartial | kPrintRegAsFP
2247     VIXL_DECL_PRINT_REG_SCALAR(W),
2248     VIXL_DECL_PRINT_REG_SCALAR(X),
2249     VIXL_DECL_PRINT_REG_SCALAR_FP(H),
2250     VIXL_DECL_PRINT_REG_SCALAR_FP(S),
2251     VIXL_DECL_PRINT_REG_SCALAR_FP(D),
2252     VIXL_DECL_PRINT_REG_SCALAR(Q),
2253 #undef VIXL_DECL_PRINT_REG_SCALAR
2254 #undef VIXL_DECL_PRINT_REG_SCALAR_FP
2255 
2256 #define VIXL_DECL_PRINT_REG_NEON(count, type, size)                     \
2257   kPrintReg##count##type = kPrintRegLaneSize##type | kPrintRegAs##size, \
2258   kPrintReg##count##type##Partial = kPrintReg##count##type | kPrintRegPartial
2259 #define VIXL_DECL_PRINT_REG_NEON_FP(count, type, size)                   \
2260   VIXL_DECL_PRINT_REG_NEON(count, type, size)                            \
2261   , kPrintReg##count##type##FP = kPrintReg##count##type | kPrintRegAsFP, \
2262     kPrintReg##count##type##PartialFP =                                  \
2263         kPrintReg##count##type##Partial | kPrintRegAsFP
2264     VIXL_DECL_PRINT_REG_NEON(1, B, Scalar),
2265     VIXL_DECL_PRINT_REG_NEON(8, B, DVector),
2266     VIXL_DECL_PRINT_REG_NEON(16, B, QVector),
2267     VIXL_DECL_PRINT_REG_NEON_FP(1, H, Scalar),
2268     VIXL_DECL_PRINT_REG_NEON_FP(4, H, DVector),
2269     VIXL_DECL_PRINT_REG_NEON_FP(8, H, QVector),
2270     VIXL_DECL_PRINT_REG_NEON_FP(1, S, Scalar),
2271     VIXL_DECL_PRINT_REG_NEON_FP(2, S, DVector),
2272     VIXL_DECL_PRINT_REG_NEON_FP(4, S, QVector),
2273     VIXL_DECL_PRINT_REG_NEON_FP(1, D, Scalar),
2274     VIXL_DECL_PRINT_REG_NEON_FP(2, D, QVector),
2275     VIXL_DECL_PRINT_REG_NEON(1, Q, Scalar),
2276 #undef VIXL_DECL_PRINT_REG_NEON
2277 #undef VIXL_DECL_PRINT_REG_NEON_FP
2278 
2279 #define VIXL_DECL_PRINT_REG_SVE(type)                                 \
2280   kPrintRegVn##type = kPrintRegLaneSize##type | kPrintRegAsSVEVector, \
2281   kPrintRegVn##type##Partial = kPrintRegVn##type | kPrintRegPartial
2282 #define VIXL_DECL_PRINT_REG_SVE_FP(type)                       \
2283   VIXL_DECL_PRINT_REG_SVE(type)                                \
2284   , kPrintRegVn##type##FP = kPrintRegVn##type | kPrintRegAsFP, \
2285     kPrintRegVn##type##PartialFP = kPrintRegVn##type##Partial | kPrintRegAsFP
2286     VIXL_DECL_PRINT_REG_SVE(B),
2287     VIXL_DECL_PRINT_REG_SVE_FP(H),
2288     VIXL_DECL_PRINT_REG_SVE_FP(S),
2289     VIXL_DECL_PRINT_REG_SVE_FP(D),
2290     VIXL_DECL_PRINT_REG_SVE(Q)
2291 #undef VIXL_DECL_PRINT_REG_SVE
2292 #undef VIXL_DECL_PRINT_REG_SVE_FP
2293   };
2294 
2295   // Return `format` with the kPrintRegPartial flag set.
2296   PrintRegisterFormat GetPrintRegPartial(PrintRegisterFormat format) {
2297     // Every PrintRegisterFormat has a kPrintRegPartial counterpart, so the
2298     // result of this cast will always be well-defined.
2299     return static_cast<PrintRegisterFormat>(format | kPrintRegPartial);
2300   }
2301 
2302   // For SVE formats, return the format of a Q register part of it.
2303   PrintRegisterFormat GetPrintRegAsQChunkOfSVE(PrintRegisterFormat format) {
2304     VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
2305     // Keep the FP and lane size fields.
2306     int q_format = format & (kPrintRegLaneSizeMask | kPrintRegAsFP);
2307     // The resulting format must always be partial, because we're not formatting
2308     // the whole Z register.
2309     q_format |= (kPrintRegAsQVector | kPrintRegPartial);
2310 
2311     // This cast is always safe because NEON QVector formats support every
2312     // combination of FP and lane size that SVE formats do.
2313     return static_cast<PrintRegisterFormat>(q_format);
2314   }
2315 
2316   unsigned GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format) {
2317     VIXL_ASSERT((format & kPrintRegLaneSizeMask) != kPrintRegLaneSizeUnknown);
2318     return (format & kPrintRegLaneSizeMask) >> kPrintRegLaneSizeOffset;
2319   }
2320 
2321   unsigned GetPrintRegLaneSizeInBytes(PrintRegisterFormat format) {
2322     return 1 << GetPrintRegLaneSizeInBytesLog2(format);
2323   }
2324 
2325   unsigned GetPrintRegSizeInBytesLog2(PrintRegisterFormat format) {
2326     switch (format & kPrintRegAsVectorMask) {
2327       case kPrintRegAsScalar:
2328         return GetPrintRegLaneSizeInBytesLog2(format);
2329       case kPrintRegAsDVector:
2330         return kDRegSizeInBytesLog2;
2331       case kPrintRegAsQVector:
2332         return kQRegSizeInBytesLog2;
2333       default:
2334       case kPrintRegAsSVEVector:
2335         // We print SVE vectors in Q-sized chunks. These need special handling,
2336         // and it's probably an error to call this function in that case.
2337         VIXL_UNREACHABLE();
2338         return kQRegSizeInBytesLog2;
2339     }
2340   }
2341 
2342   unsigned GetPrintRegSizeInBytes(PrintRegisterFormat format) {
2343     return 1 << GetPrintRegSizeInBytesLog2(format);
2344   }
2345 
2346   unsigned GetPrintRegSizeInBitsLog2(PrintRegisterFormat format) {
2347     return GetPrintRegSizeInBytesLog2(format) + kBitsPerByteLog2;
2348   }
2349 
2350   unsigned GetPrintRegSizeInBits(PrintRegisterFormat format) {
2351     return 1 << GetPrintRegSizeInBitsLog2(format);
2352   }
2353 
2354   const char* GetPartialRegSuffix(PrintRegisterFormat format) {
2355     switch (GetPrintRegSizeInBitsLog2(format)) {
2356       case kBRegSizeLog2:
2357         return "<7:0>";
2358       case kHRegSizeLog2:
2359         return "<15:0>";
2360       case kSRegSizeLog2:
2361         return "<31:0>";
2362       case kDRegSizeLog2:
2363         return "<63:0>";
2364       case kQRegSizeLog2:
2365         return "<127:0>";
2366     }
2367     VIXL_UNREACHABLE();
2368     return "<UNKNOWN>";
2369   }
2370 
2371   unsigned GetPrintRegLaneCount(PrintRegisterFormat format) {
2372     unsigned reg_size_log2 = GetPrintRegSizeInBytesLog2(format);
2373     unsigned lane_size_log2 = GetPrintRegLaneSizeInBytesLog2(format);
2374     VIXL_ASSERT(reg_size_log2 >= lane_size_log2);
2375     return 1 << (reg_size_log2 - lane_size_log2);
2376   }
2377 
2378   uint16_t GetPrintRegLaneMask(PrintRegisterFormat format) {
2379     int print_as = format & kPrintRegAsVectorMask;
2380     if (print_as == kPrintRegAsScalar) return 1;
2381 
2382     // Vector formats, including SVE formats printed in Q-sized chunks.
2383     static const uint16_t masks[] = {0xffff, 0x5555, 0x1111, 0x0101, 0x0001};
2384     unsigned size_in_bytes_log2 = GetPrintRegLaneSizeInBytesLog2(format);
2385     VIXL_ASSERT(size_in_bytes_log2 < ArrayLength(masks));
2386     uint16_t mask = masks[size_in_bytes_log2];
2387 
2388     // Exclude lanes that aren't visible in D vectors.
2389     if (print_as == kPrintRegAsDVector) mask &= 0x00ff;
2390     return mask;
2391   }
2392 
2393   PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned reg_size,
2394                                                     unsigned lane_size);
2395 
2396   PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned size) {
2397     return GetPrintRegisterFormatForSize(size, size);
2398   }
2399 
2400   PrintRegisterFormat GetPrintRegisterFormatForSizeFP(unsigned size) {
2401     switch (size) {
2402       default:
2403         VIXL_UNREACHABLE();
2404         return kPrintDReg;
2405       case kDRegSizeInBytes:
2406         return kPrintDReg;
2407       case kSRegSizeInBytes:
2408         return kPrintSReg;
2409       case kHRegSizeInBytes:
2410         return kPrintHReg;
2411     }
2412   }
2413 
2414   PrintRegisterFormat GetPrintRegisterFormatTryFP(PrintRegisterFormat format) {
2415     if ((GetPrintRegLaneSizeInBytes(format) == kHRegSizeInBytes) ||
2416         (GetPrintRegLaneSizeInBytes(format) == kSRegSizeInBytes) ||
2417         (GetPrintRegLaneSizeInBytes(format) == kDRegSizeInBytes)) {
2418       return static_cast<PrintRegisterFormat>(format | kPrintRegAsFP);
2419     }
2420     return format;
2421   }
2422 
2423   PrintRegisterFormat GetPrintRegisterFormatForSizeTryFP(unsigned size) {
2424     return GetPrintRegisterFormatTryFP(GetPrintRegisterFormatForSize(size));
2425   }
2426 
2427   template <typename T>
2428   PrintRegisterFormat GetPrintRegisterFormat(T value) {
2429     return GetPrintRegisterFormatForSize(sizeof(value));
2430   }
2431 
2432   PrintRegisterFormat GetPrintRegisterFormat(double value) {
2433     VIXL_STATIC_ASSERT(sizeof(value) == kDRegSizeInBytes);
2434     return GetPrintRegisterFormatForSizeFP(sizeof(value));
2435   }
2436 
2437   PrintRegisterFormat GetPrintRegisterFormat(float value) {
2438     VIXL_STATIC_ASSERT(sizeof(value) == kSRegSizeInBytes);
2439     return GetPrintRegisterFormatForSizeFP(sizeof(value));
2440   }
2441 
2442   PrintRegisterFormat GetPrintRegisterFormat(Float16 value) {
2443     VIXL_STATIC_ASSERT(sizeof(Float16ToRawbits(value)) == kHRegSizeInBytes);
2444     return GetPrintRegisterFormatForSizeFP(sizeof(Float16ToRawbits(value)));
2445   }
2446 
2447   PrintRegisterFormat GetPrintRegisterFormat(VectorFormat vform);
2448   PrintRegisterFormat GetPrintRegisterFormatFP(VectorFormat vform);
2449 
2450   // Print all registers of the specified types.
2451   void PrintRegisters();
2452   void PrintVRegisters();
2453   void PrintZRegisters();
2454   void PrintSystemRegisters();
2455 
2456   // As above, but only print the registers that have been updated.
2457   void PrintWrittenRegisters();
2458   void PrintWrittenVRegisters();
2459   void PrintWrittenPRegisters();
2460 
2461   // As above, but respect LOG_REG and LOG_VREG.
2462   void LogWrittenRegisters() {
2463     if (ShouldTraceRegs()) PrintWrittenRegisters();
2464   }
2465   void LogWrittenVRegisters() {
2466     if (ShouldTraceVRegs()) PrintWrittenVRegisters();
2467   }
2468   void LogWrittenPRegisters() {
2469     if (ShouldTraceVRegs()) PrintWrittenPRegisters();
2470   }
2471   void LogAllWrittenRegisters() {
2472     LogWrittenRegisters();
2473     LogWrittenVRegisters();
2474     LogWrittenPRegisters();
2475   }
2476 
2477   // The amount of space to leave for a register name. This is used to keep the
2478   // values vertically aligned. The longest register name has the form
2479   // "z31<2047:1920>". The total overall value indentation must also take into
2480   // account the fixed formatting: "# {name}: 0x{value}".
2481   static const int kPrintRegisterNameFieldWidth = 14;
2482 
2483   // Print whole, individual register values.
2484   // - The format can be used to restrict how much of the register is printed,
2485   //   but such formats indicate that the unprinted high-order bits are zero and
2486   //   these helpers will assert that.
2487   // - If the format includes the kPrintRegAsFP flag then human-friendly FP
2488   //   value annotations will be printed.
2489   // - The suffix can be used to add annotations (such as memory access
2490   //   details), or to suppress the newline.
2491   void PrintRegister(int code,
2492                      PrintRegisterFormat format = kPrintXReg,
2493                      const char* suffix = "\n");
2494   void PrintVRegister(int code,
2495                       PrintRegisterFormat format = kPrintReg1Q,
2496                       const char* suffix = "\n");
2497   // PrintZRegister and PrintPRegister print over several lines, so they cannot
2498   // allow the suffix to be overridden.
2499   void PrintZRegister(int code, PrintRegisterFormat format = kPrintRegVnQ);
2500   void PrintPRegister(int code, PrintRegisterFormat format = kPrintRegVnQ);
2501   void PrintFFR(PrintRegisterFormat format = kPrintRegVnQ);
2502   // Print a single Q-sized part of a Z register, or the corresponding two-byte
2503   // part of a P register. These print single lines, and therefore allow the
2504   // suffix to be overridden. The format must include the kPrintRegPartial flag.
2505   void PrintPartialZRegister(int code,
2506                              int q_index,
2507                              PrintRegisterFormat format = kPrintRegVnQ,
2508                              const char* suffix = "\n");
2509   void PrintPartialPRegister(int code,
2510                              int q_index,
2511                              PrintRegisterFormat format = kPrintRegVnQ,
2512                              const char* suffix = "\n");
2513   void PrintPartialPRegister(const char* name,
2514                              const SimPRegister& reg,
2515                              int q_index,
2516                              PrintRegisterFormat format = kPrintRegVnQ,
2517                              const char* suffix = "\n");
2518 
2519   // Like Print*Register (above), but respect trace parameters.
2520   void LogRegister(unsigned code, PrintRegisterFormat format) {
2521     if (ShouldTraceRegs()) PrintRegister(code, format);
2522   }
2523   void LogVRegister(unsigned code, PrintRegisterFormat format) {
2524     if (ShouldTraceVRegs()) PrintVRegister(code, format);
2525   }
2526   void LogZRegister(unsigned code, PrintRegisterFormat format) {
2527     if (ShouldTraceVRegs()) PrintZRegister(code, format);
2528   }
2529   void LogPRegister(unsigned code, PrintRegisterFormat format) {
2530     if (ShouldTraceVRegs()) PrintPRegister(code, format);
2531   }
2532   void LogFFR(PrintRegisterFormat format) {
2533     if (ShouldTraceVRegs()) PrintFFR(format);
2534   }
2535 
2536   // Other state updates, including system registers.
2537   void PrintSystemRegister(SystemRegister id);
2538   void PrintTakenBranch(const Instruction* target);
2539   void PrintGCS(bool is_push, uint64_t addr, size_t entry);
2540   void LogSystemRegister(SystemRegister id) {
2541     if (ShouldTraceSysRegs()) PrintSystemRegister(id);
2542   }
2543   void LogTakenBranch(const Instruction* target) {
2544     if (ShouldTraceBranches()) PrintTakenBranch(target);
2545   }
2546   void LogGCS(bool is_push, uint64_t addr, size_t entry) {
2547     if (ShouldTraceSysRegs()) PrintGCS(is_push, addr, entry);
2548   }
2549 
2550   // Trace memory accesses.
2551 
2552   // Common, contiguous register accesses (such as for scalars).
2553   // The *Write variants automatically set kPrintRegPartial on the format.
2554   void PrintRead(int rt_code, PrintRegisterFormat format, uintptr_t address);
2555   void PrintExtendingRead(int rt_code,
2556                           PrintRegisterFormat format,
2557                           int access_size_in_bytes,
2558                           uintptr_t address);
2559   void PrintWrite(int rt_code, PrintRegisterFormat format, uintptr_t address);
2560   void PrintVRead(int rt_code, PrintRegisterFormat format, uintptr_t address);
2561   void PrintVWrite(int rt_code, PrintRegisterFormat format, uintptr_t address);
2562   // Simple, unpredicated SVE accesses always access the whole vector, and never
2563   // know the lane type, so there's no need to accept a `format`.
2564   void PrintZRead(int rt_code, uintptr_t address) {
2565     vregisters_[rt_code].NotifyRegisterLogged();
2566     PrintZAccess(rt_code, "<-", address);
2567   }
2568   void PrintZWrite(int rt_code, uintptr_t address) {
2569     PrintZAccess(rt_code, "->", address);
2570   }
2571   void PrintPRead(int rt_code, uintptr_t address) {
2572     pregisters_[rt_code].NotifyRegisterLogged();
2573     PrintPAccess(rt_code, "<-", address);
2574   }
2575   void PrintPWrite(int rt_code, uintptr_t address) {
2576     PrintPAccess(rt_code, "->", address);
2577   }
2578   void PrintWriteU64(uint64_t x, uintptr_t address) {
2579     fprintf(stream_,
2580             "#      0x%016" PRIx64 " -> %s0x%016" PRIxPTR "%s\n",
2581             x,
2582             clr_memory_address,
2583             address,
2584             clr_normal);
2585   }
2586 
2587   // Like Print* (above), but respect GetTraceParameters().
2588   void LogRead(int rt_code, PrintRegisterFormat format, uintptr_t address) {
2589     if (ShouldTraceRegs()) PrintRead(rt_code, format, address);
2590   }
2591   void LogExtendingRead(int rt_code,
2592                         PrintRegisterFormat format,
2593                         int access_size_in_bytes,
2594                         uintptr_t address) {
2595     if (ShouldTraceRegs()) {
2596       PrintExtendingRead(rt_code, format, access_size_in_bytes, address);
2597     }
2598   }
2599   void LogWrite(int rt_code, PrintRegisterFormat format, uintptr_t address) {
2600     if (ShouldTraceWrites()) PrintWrite(rt_code, format, address);
2601   }
2602   void LogVRead(int rt_code, PrintRegisterFormat format, uintptr_t address) {
2603     if (ShouldTraceVRegs()) PrintVRead(rt_code, format, address);
2604   }
2605   void LogVWrite(int rt_code, PrintRegisterFormat format, uintptr_t address) {
2606     if (ShouldTraceWrites()) PrintVWrite(rt_code, format, address);
2607   }
2608   void LogZRead(int rt_code, uintptr_t address) {
2609     if (ShouldTraceVRegs()) PrintZRead(rt_code, address);
2610   }
2611   void LogZWrite(int rt_code, uintptr_t address) {
2612     if (ShouldTraceWrites()) PrintZWrite(rt_code, address);
2613   }
2614   void LogPRead(int rt_code, uintptr_t address) {
2615     if (ShouldTraceVRegs()) PrintPRead(rt_code, address);
2616   }
2617   void LogPWrite(int rt_code, uintptr_t address) {
2618     if (ShouldTraceWrites()) PrintPWrite(rt_code, address);
2619   }
2620   void LogWriteU64(uint64_t x, uintptr_t address) {
2621     if (ShouldTraceWrites()) PrintWriteU64(x, address);
2622   }
2623   void LogMemTransfer(uintptr_t dst, uintptr_t src, uint8_t value) {
2624     if (ShouldTraceWrites()) PrintMemTransfer(dst, src, value);
2625   }
2626   // Helpers for the above, where the access operation is parameterised.
2627   // - For loads, set op = "<-".
2628   // - For stores, set op = "->".
2629   void PrintAccess(int rt_code,
2630                    PrintRegisterFormat format,
2631                    const char* op,
2632                    uintptr_t address);
2633   void PrintVAccess(int rt_code,
2634                     PrintRegisterFormat format,
2635                     const char* op,
2636                     uintptr_t address);
2637   void PrintMemTransfer(uintptr_t dst, uintptr_t src, uint8_t value);
2638   // Simple, unpredicated SVE accesses always access the whole vector, and never
2639   // know the lane type, so these don't accept a `format`.
2640   void PrintZAccess(int rt_code, const char* op, uintptr_t address);
2641   void PrintPAccess(int rt_code, const char* op, uintptr_t address);
2642 
2643   // Multiple-structure accesses.
2644   void PrintVStructAccess(int rt_code,
2645                           int reg_count,
2646                           PrintRegisterFormat format,
2647                           const char* op,
2648                           uintptr_t address);
2649   // Single-structure (single-lane) accesses.
2650   void PrintVSingleStructAccess(int rt_code,
2651                                 int reg_count,
2652                                 int lane,
2653                                 PrintRegisterFormat format,
2654                                 const char* op,
2655                                 uintptr_t address);
2656   // Replicating accesses.
2657   void PrintVReplicatingStructAccess(int rt_code,
2658                                      int reg_count,
2659                                      PrintRegisterFormat format,
2660                                      const char* op,
2661                                      uintptr_t address);
2662 
2663   // Multiple-structure accesses.
2664   void PrintZStructAccess(int rt_code,
2665                           int reg_count,
2666                           const LogicPRegister& pg,
2667                           PrintRegisterFormat format,
2668                           int msize_in_bytes,
2669                           const char* op,
2670                           const LogicSVEAddressVector& addr);
2671 
2672   // Register-printing helper for all structured accessors.
2673   //
2674   // All lanes (according to `format`) are printed, but lanes indicated by
2675   // `focus_mask` are of particular interest. Each bit corresponds to a byte in
2676   // the printed register, in a manner similar to SVE's predicates. Currently,
2677   // this is used to determine when to print human-readable FP annotations.
2678   void PrintVRegistersForStructuredAccess(int rt_code,
2679                                           int reg_count,
2680                                           uint16_t focus_mask,
2681                                           PrintRegisterFormat format);
2682 
2683   // As for the VRegister variant, but print partial Z register names.
2684   void PrintZRegistersForStructuredAccess(int rt_code,
2685                                           int q_index,
2686                                           int reg_count,
2687                                           uint16_t focus_mask,
2688                                           PrintRegisterFormat format);
2689 
2690   // Print part of a memory access. This should be used for annotating
2691   // non-trivial accesses, such as structured or sign-extending loads. Call
2692   // Print*Register (or Print*RegistersForStructuredAccess), then
2693   // PrintPartialAccess for each contiguous access that makes up the
2694   // instruction.
2695   //
2696   //  access_mask:
2697   //      The lanes to be printed. Each bit corresponds to a byte in the printed
2698   //      register, in a manner similar to SVE's predicates, except that the
2699   //      lane size is not respected when interpreting lane_mask: unaligned bits
2700   //      must be zeroed.
2701   //
2702   //      This function asserts that this mask is non-zero.
2703   //
2704   //  future_access_mask:
2705   //      The lanes to be printed by a future invocation. This must be specified
2706   //      because vertical lines are drawn for partial accesses that haven't yet
2707   //      been printed. The format is the same as for accessed_mask.
2708   //
2709   //      If a lane is active in both `access_mask` and `future_access_mask`,
2710   //      `access_mask` takes precedence.
2711   //
2712   //  struct_element_count:
2713   //      The number of elements in each structure. For non-structured accesses,
2714   //      set this to one. Along with lane_size_in_bytes, this is used determine
2715   //      the size of each access, and to format the accessed value.
2716   //
2717   //  op:
2718   //      For stores, use "->". For loads, use "<-".
2719   //
2720   //  address:
2721   //      The address of this partial access. (Not the base address of the whole
2722   //      instruction.) The traced value is read from this address (according to
2723   //      part_count and lane_size_in_bytes) so it must be accessible, and when
2724   //      tracing stores, the store must have been executed before this function
2725   //      is called.
2726   //
2727   //  reg_size_in_bytes:
2728   //      The size of the register being accessed. This helper is usually used
2729   //      for V registers or Q-sized chunks of Z registers, so that is the
2730   //      default, but it is possible to use this to annotate X register
2731   //      accesses by specifying kXRegSizeInBytes.
2732   //
2733   // The return value is a future_access_mask suitable for the next iteration,
2734   // so that it is possible to execute this in a loop, until the mask is zero.
2735   // Note that accessed_mask must still be updated by the caller for each call.
2736   uint16_t PrintPartialAccess(uint16_t access_mask,
2737                               uint16_t future_access_mask,
2738                               int struct_element_count,
2739                               int lane_size_in_bytes,
2740                               const char* op,
2741                               uintptr_t address,
2742                               int reg_size_in_bytes = kQRegSizeInBytes);
2743 
2744   // Print an abstract register value. This works for all register types, and
2745   // can print parts of registers. This exists to ensure consistent formatting
2746   // of values.
2747   void PrintRegisterValue(const uint8_t* value,
2748                           int value_size,
2749                           PrintRegisterFormat format);
2750   template <typename T>
2751   void PrintRegisterValue(const T& sim_register, PrintRegisterFormat format) {
2752     PrintRegisterValue(sim_register.GetBytes(),
2753                        std::min(sim_register.GetSizeInBytes(),
2754                                 kQRegSizeInBytes),
2755                        format);
2756   }
2757 
2758   // As above, but format as an SVE predicate value, using binary notation with
2759   // spaces between each bit so that they align with the Z register bytes that
2760   // they predicate.
2761   void PrintPRegisterValue(uint16_t value);
2762 
2763   void PrintRegisterValueFPAnnotations(const uint8_t* value,
2764                                        uint16_t lane_mask,
2765                                        PrintRegisterFormat format);
2766   template <typename T>
2767   void PrintRegisterValueFPAnnotations(const T& sim_register,
2768                                        uint16_t lane_mask,
2769                                        PrintRegisterFormat format) {
2770     PrintRegisterValueFPAnnotations(sim_register.GetBytes(), lane_mask, format);
2771   }
2772   template <typename T>
2773   void PrintRegisterValueFPAnnotations(const T& sim_register,
2774                                        PrintRegisterFormat format) {
2775     PrintRegisterValueFPAnnotations(sim_register.GetBytes(),
2776                                     GetPrintRegLaneMask(format),
2777                                     format);
2778   }
2779 
2780   VIXL_NO_RETURN void DoUnreachable(const Instruction* instr);
2781   void DoTrace(const Instruction* instr);
2782   void DoLog(const Instruction* instr);
2783 
2784   static const char* WRegNameForCode(unsigned code,
2785                                      Reg31Mode mode = Reg31IsZeroRegister);
2786   static const char* XRegNameForCode(unsigned code,
2787                                      Reg31Mode mode = Reg31IsZeroRegister);
2788   static const char* BRegNameForCode(unsigned code);
2789   static const char* HRegNameForCode(unsigned code);
2790   static const char* SRegNameForCode(unsigned code);
2791   static const char* DRegNameForCode(unsigned code);
2792   static const char* VRegNameForCode(unsigned code);
2793   static const char* ZRegNameForCode(unsigned code);
2794   static const char* PRegNameForCode(unsigned code);
2795 
2796   bool IsColouredTrace() const { return coloured_trace_; }
2797   VIXL_DEPRECATED("IsColouredTrace", bool coloured_trace() const) {
2798     return IsColouredTrace();
2799   }
2800 
2801   void SetColouredTrace(bool value);
2802   VIXL_DEPRECATED("SetColouredTrace", void set_coloured_trace(bool value)) {
2803     SetColouredTrace(value);
2804   }
2805 
2806   // Values for traces parameters defined in simulator-constants-aarch64.h in
2807   // enum TraceParameters.
2808   int GetTraceParameters() const { return trace_parameters_; }
2809   VIXL_DEPRECATED("GetTraceParameters", int trace_parameters() const) {
2810     return GetTraceParameters();
2811   }
2812 
2813   bool ShouldTraceWrites() const {
2814     return (GetTraceParameters() & LOG_WRITE) != 0;
2815   }
2816   bool ShouldTraceRegs() const {
2817     return (GetTraceParameters() & LOG_REGS) != 0;
2818   }
2819   bool ShouldTraceVRegs() const {
2820     return (GetTraceParameters() & LOG_VREGS) != 0;
2821   }
2822   bool ShouldTraceSysRegs() const {
2823     return (GetTraceParameters() & LOG_SYSREGS) != 0;
2824   }
2825   bool ShouldTraceBranches() const {
2826     return (GetTraceParameters() & LOG_BRANCH) != 0;
2827   }
2828 
2829   void SetTraceParameters(int parameters);
2830   VIXL_DEPRECATED("SetTraceParameters",
2831                   void set_trace_parameters(int parameters)) {
2832     SetTraceParameters(parameters);
2833   }
2834 
2835   // Clear the simulated local monitor to force the next store-exclusive
2836   // instruction to fail.
2837   void ClearLocalMonitor() { local_monitor_.Clear(); }
2838 
2839   void SilenceExclusiveAccessWarning() {
2840     print_exclusive_access_warning_ = false;
2841   }
2842 
2843   void CheckIsValidUnalignedAtomicAccess(int rn,
2844                                          uint64_t address,
2845                                          unsigned access_size) {
2846     // Verify that the address is available to the host.
2847     VIXL_ASSERT(address == static_cast<uintptr_t>(address));
2848 
2849     if (GetCPUFeatures()->Has(CPUFeatures::kUSCAT)) {
2850       // Check that the access falls entirely within one atomic access granule.
2851       if (AlignDown(address, kAtomicAccessGranule) !=
2852           AlignDown(address + access_size - 1, kAtomicAccessGranule)) {
2853         VIXL_ALIGNMENT_EXCEPTION();
2854       }
2855     } else {
2856       // Check that the access is aligned.
2857       if (AlignDown(address, access_size) != address) {
2858         VIXL_ALIGNMENT_EXCEPTION();
2859       }
2860     }
2861 
2862     // The sp must be aligned to 16 bytes when it is accessed.
2863     if ((rn == kSpRegCode) && (AlignDown(address, 16) != address)) {
2864       VIXL_ALIGNMENT_EXCEPTION();
2865     }
2866   }
2867 
2868   enum PointerType { kDataPointer, kInstructionPointer };
2869 
2870   struct PACKey {
2871     uint64_t high;
2872     uint64_t low;
2873     int number;
2874   };
2875 
2876   // Current implementation is that all pointers are tagged.
2877   bool HasTBI(uint64_t ptr, PointerType type) {
2878     USE(ptr, type);
2879     return true;
2880   }
2881 
2882   // Current implementation uses 48-bit virtual addresses.
2883   int GetBottomPACBit(uint64_t ptr, int ttbr) {
2884     USE(ptr, ttbr);
2885     VIXL_ASSERT((ttbr == 0) || (ttbr == 1));
2886     return 48;
2887   }
2888 
2889   // The top PAC bit is 55 for the purposes of relative bit fields with TBI,
2890   // however bit 55 is the TTBR bit regardless of TBI so isn't part of the PAC
2891   // codes in pointers.
2892   int GetTopPACBit(uint64_t ptr, PointerType type) {
2893     return HasTBI(ptr, type) ? 55 : 63;
2894   }
2895 
2896   // Armv8.3 Pointer authentication helpers.
2897   uint64_t CalculatePACMask(uint64_t ptr, PointerType type, int ext_bit);
2898   uint64_t ComputePAC(uint64_t data, uint64_t context, PACKey key);
2899   uint64_t AuthPAC(uint64_t ptr,
2900                    uint64_t context,
2901                    PACKey key,
2902                    PointerType type);
2903   uint64_t AddPAC(uint64_t ptr, uint64_t context, PACKey key, PointerType type);
2904   uint64_t StripPAC(uint64_t ptr, PointerType type);
2905   void PACHelper(int dst,
2906                  int src,
2907                  PACKey key,
2908                  decltype(&Simulator::AddPAC) pac_fn);
2909 
2910   // Armv8.5 MTE helpers.
2911   uint64_t ChooseNonExcludedTag(uint64_t tag,
2912                                 uint64_t offset,
2913                                 uint64_t exclude = 0) {
2914     VIXL_ASSERT(IsUint4(tag) && IsUint4(offset) && IsUint16(exclude));
2915 
2916     if (exclude == 0xffff) {
2917       return 0;
2918     }
2919 
2920     if (offset == 0) {
2921       while ((exclude & (uint64_t{1} << tag)) != 0) {
2922         tag = (tag + 1) % 16;
2923       }
2924     }
2925 
2926     while (offset > 0) {
2927       offset--;
2928       tag = (tag + 1) % 16;
2929       while ((exclude & (uint64_t{1} << tag)) != 0) {
2930         tag = (tag + 1) % 16;
2931       }
2932     }
2933     return tag;
2934   }
2935 
2936   uint64_t GetAddressWithAllocationTag(uint64_t addr, uint64_t tag) {
2937     VIXL_ASSERT(IsUint4(tag));
2938     return (addr & ~(UINT64_C(0xf) << 56)) | (tag << 56);
2939   }
2940 
2941 #if __linux__
2942 #define VIXL_HAS_SIMULATED_MMAP
2943   // Create or remove a mapping with memory protection. Memory attributes such
2944   // as MTE and BTI are represented by metadata in Simulator.
2945   void* Mmap(
2946       void* address, size_t length, int prot, int flags, int fd, off_t offset);
2947 
2948   int Munmap(void* address, size_t length, int prot);
2949 #endif
2950 
2951   // The common CPUFeatures interface with the set of available features.
2952 
2953   CPUFeatures* GetCPUFeatures() {
2954     return cpu_features_auditor_.GetCPUFeatures();
2955   }
2956 
2957   void SetCPUFeatures(const CPUFeatures& cpu_features) {
2958     cpu_features_auditor_.SetCPUFeatures(cpu_features);
2959   }
2960 
2961   // The set of features that the simulator has encountered.
2962   const CPUFeatures& GetSeenFeatures() {
2963     return cpu_features_auditor_.GetSeenFeatures();
2964   }
2965   void ResetSeenFeatures() { cpu_features_auditor_.ResetSeenFeatures(); }
2966 
2967 // Runtime call emulation support.
2968 // It requires VIXL's ABI features, and C++11 or greater.
2969 // Also, the initialisation of the tuples in RuntimeCall(Non)Void is incorrect
2970 // in GCC before 4.9.1: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51253
2971 #if defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
2972     (defined(_MSC_VER) || defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
2973 
2974 #define VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT
2975 
2976 // The implementation of the runtime call helpers require the functionality
2977 // provided by `std::index_sequence`. It is only available from C++14, but
2978 // we want runtime call simulation to work from C++11, so we emulate if
2979 // necessary.
2980 #if __cplusplus >= 201402L
2981   template <std::size_t... I>
2982   using local_index_sequence = std::index_sequence<I...>;
2983   template <typename... P>
2984   using __local_index_sequence_for = std::index_sequence_for<P...>;
2985 #else
2986   // Emulate the behaviour of `std::index_sequence` and
2987   // `std::index_sequence_for`.
2988   // Naming follow the `std` names, prefixed with `emulated_`.
2989   template <size_t... I>
2990   struct emulated_index_sequence {};
2991 
2992   // A recursive template to create a sequence of indexes.
2993   // The base case (for `N == 0`) is declared outside of the class scope, as
2994   // required by C++.
2995   template <std::size_t N, size_t... I>
2996   struct emulated_make_index_sequence_helper
2997       : emulated_make_index_sequence_helper<N - 1, N - 1, I...> {};
2998 
2999   template <std::size_t N>
3000   struct emulated_make_index_sequence : emulated_make_index_sequence_helper<N> {
3001   };
3002 
3003   template <typename... P>
3004   struct emulated_index_sequence_for
3005       : emulated_make_index_sequence<sizeof...(P)> {};
3006 
3007   template <std::size_t... I>
3008   using local_index_sequence = emulated_index_sequence<I...>;
3009   template <typename... P>
3010   using __local_index_sequence_for = emulated_index_sequence_for<P...>;
3011 #endif
3012 
3013   // Expand the argument tuple and perform the call.
3014   template <typename R, typename... P, std::size_t... I>
3015   R DoRuntimeCall(R (*function)(P...),
3016                   std::tuple<P...> arguments,
3017                   local_index_sequence<I...>) {
3018     USE(arguments);
3019     return function(std::get<I>(arguments)...);
3020   }
3021 
3022   template <typename R, typename... P>
3023   void RuntimeCallNonVoid(R (*function)(P...)) {
3024     ABI abi;
3025     std::tuple<P...> argument_operands{
3026         ReadGenericOperand<P>(abi.GetNextParameterGenericOperand<P>())...};
3027     R return_value = DoRuntimeCall(function,
3028                                    argument_operands,
3029                                    __local_index_sequence_for<P...>{});
3030     bool succeeded =
3031         WriteGenericOperand(abi.GetReturnGenericOperand<R>(), return_value);
3032     USE(succeeded);
3033     VIXL_ASSERT(succeeded);
3034   }
3035 
3036   template <typename R, typename... P>
3037   void RuntimeCallVoid(R (*function)(P...)) {
3038     ABI abi;
3039     std::tuple<P...> argument_operands{
3040         ReadGenericOperand<P>(abi.GetNextParameterGenericOperand<P>())...};
3041     DoRuntimeCall(function,
3042                   argument_operands,
3043                   __local_index_sequence_for<P...>{});
3044   }
3045 
3046   // We use `struct` for `void` return type specialisation.
3047   template <typename R, typename... P>
3048   struct RuntimeCallStructHelper {
3049     static void Wrapper(Simulator* simulator, uintptr_t function_pointer) {
3050       R (*function)(P...) = reinterpret_cast<R (*)(P...)>(function_pointer);
3051       simulator->RuntimeCallNonVoid(function);
3052     }
3053   };
3054 
3055   // Partial specialization when the return type is `void`.
3056   template <typename... P>
3057   struct RuntimeCallStructHelper<void, P...> {
3058     static void Wrapper(Simulator* simulator, uintptr_t function_pointer) {
3059       void (*function)(P...) =
3060           reinterpret_cast<void (*)(P...)>(function_pointer);
3061       simulator->RuntimeCallVoid(function);
3062     }
3063   };
3064 #endif
3065 
3066   // Configure the simulated value of 'VL', which is the size of a Z register.
3067   // Because this cannot occur during a program's lifetime, this function also
3068   // resets the SVE registers.
3069   void SetVectorLengthInBits(unsigned vector_length);
3070 
3071   unsigned GetVectorLengthInBits() const { return vector_length_; }
3072   unsigned GetVectorLengthInBytes() const {
3073     VIXL_ASSERT((vector_length_ % kBitsPerByte) == 0);
3074     return vector_length_ / kBitsPerByte;
3075   }
3076   unsigned GetPredicateLengthInBits() const {
3077     VIXL_ASSERT((GetVectorLengthInBits() % kZRegBitsPerPRegBit) == 0);
3078     return GetVectorLengthInBits() / kZRegBitsPerPRegBit;
3079   }
3080   unsigned GetPredicateLengthInBytes() const {
3081     VIXL_ASSERT((GetVectorLengthInBytes() % kZRegBitsPerPRegBit) == 0);
3082     return GetVectorLengthInBytes() / kZRegBitsPerPRegBit;
3083   }
3084 
3085   unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) const {
3086     if (IsSVEFormat(vform)) {
3087       return GetVectorLengthInBits();
3088     } else {
3089       return vixl::aarch64::RegisterSizeInBitsFromFormat(vform);
3090     }
3091   }
3092 
3093   unsigned RegisterSizeInBytesFromFormat(VectorFormat vform) const {
3094     unsigned size_in_bits = RegisterSizeInBitsFromFormat(vform);
3095     VIXL_ASSERT((size_in_bits % kBitsPerByte) == 0);
3096     return size_in_bits / kBitsPerByte;
3097   }
3098 
3099   int LaneCountFromFormat(VectorFormat vform) const {
3100     if (IsSVEFormat(vform)) {
3101       return GetVectorLengthInBits() / LaneSizeInBitsFromFormat(vform);
3102     } else {
3103       return vixl::aarch64::LaneCountFromFormat(vform);
3104     }
3105   }
3106 
3107   bool IsFirstActive(VectorFormat vform,
3108                      const LogicPRegister& mask,
3109                      const LogicPRegister& bits) {
3110     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3111       if (mask.IsActive(vform, i)) {
3112         return bits.IsActive(vform, i);
3113       }
3114     }
3115     return false;
3116   }
3117 
3118   bool AreNoneActive(VectorFormat vform,
3119                      const LogicPRegister& mask,
3120                      const LogicPRegister& bits) {
3121     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3122       if (mask.IsActive(vform, i) && bits.IsActive(vform, i)) {
3123         return false;
3124       }
3125     }
3126     return true;
3127   }
3128 
3129   bool IsLastActive(VectorFormat vform,
3130                     const LogicPRegister& mask,
3131                     const LogicPRegister& bits) {
3132     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3133       if (mask.IsActive(vform, i)) {
3134         return bits.IsActive(vform, i);
3135       }
3136     }
3137     return false;
3138   }
3139 
3140   void PredTest(VectorFormat vform,
3141                 const LogicPRegister& mask,
3142                 const LogicPRegister& bits) {
3143     ReadNzcv().SetN(IsFirstActive(vform, mask, bits));
3144     ReadNzcv().SetZ(AreNoneActive(vform, mask, bits));
3145     ReadNzcv().SetC(!IsLastActive(vform, mask, bits));
3146     ReadNzcv().SetV(0);
3147     LogSystemRegister(NZCV);
3148   }
3149 
3150   SimPRegister& GetPTrue() { return pregister_all_true_; }
3151 
3152   template <typename T>
3153   size_t CleanGranuleTag(T address, size_t length = kMTETagGranuleInBytes) {
3154     size_t count = 0;
3155     for (size_t offset = 0; offset < length; offset += kMTETagGranuleInBytes) {
3156       count +=
3157           meta_data_.CleanMTETag(reinterpret_cast<uintptr_t>(address) + offset);
3158     }
3159     size_t expected =
3160         length / kMTETagGranuleInBytes + (length % kMTETagGranuleInBytes != 0);
3161 
3162     // Give a warning when the memory region that is being unmapped isn't all
3163     // either MTE protected or not.
3164     if (count != expected) {
3165       std::stringstream sstream;
3166       sstream << std::hex
3167               << "MTE WARNING : the memory region being unmapped "
3168                  "starting at address 0x"
3169               << reinterpret_cast<uint64_t>(address)
3170               << "is not fully MTE protected.\n";
3171       VIXL_WARNING(sstream.str().c_str());
3172     }
3173     return count;
3174   }
3175 
3176   template <typename T>
3177   void SetGranuleTag(T address,
3178                      int tag,
3179                      size_t length = kMTETagGranuleInBytes) {
3180     for (size_t offset = 0; offset < length; offset += kMTETagGranuleInBytes) {
3181       meta_data_.SetMTETag((uintptr_t)(address) + offset, tag);
3182     }
3183   }
3184 
3185   template <typename T>
3186   int GetGranuleTag(T address) {
3187     return meta_data_.GetMTETag(address);
3188   }
3189 
3190   // Generate a random address tag, and any tags specified in the input are
3191   // excluded from the selection.
3192   uint64_t GenerateRandomTag(uint16_t exclude = 0);
3193 
3194   // Register a new BranchInterception object. If 'function' is branched to
3195   // (e.g: "bl function") in the future; instead, if provided, 'callback' will
3196   // be called otherwise a runtime call will be performed on 'function'.
3197   //
3198   // For example: this can be used to always perform runtime calls on
3199   // non-AArch64 functions without using the macroassembler.
3200   template <typename R, typename... P>
3201   void RegisterBranchInterception(R (*function)(P...),
3202                                   InterceptionCallback callback = nullptr) {
3203     meta_data_.RegisterBranchInterception(*function, callback);
3204   }
3205 
3206   // Return the current output stream in use by the simulator.
3207   FILE* GetOutputStream() const { return stream_; }
3208 
3209   bool IsDebuggerEnabled() const { return debugger_enabled_; }
3210 
3211   void SetDebuggerEnabled(bool enabled) { debugger_enabled_ = enabled; }
3212 
3213   Debugger* GetDebugger() const { return debugger_.get(); }
3214 
3215 #ifdef VIXL_ENABLE_IMPLICIT_CHECKS
3216   // Returns true if the faulting instruction address (usually the program
3217   // counter or instruction pointer) comes from an internal VIXL memory access.
3218   // This can be used by signal handlers to check if a signal was raised from
3219   // the simulator (via TryMemoryAccess) before the actual
3220   // access occurs.
3221   bool IsSimulatedMemoryAccess(uintptr_t fault_pc) const {
3222     return (fault_pc ==
3223             reinterpret_cast<uintptr_t>(&_vixl_internal_ReadMemory));
3224   }
3225 
3226   // Get the instruction address of the internal VIXL memory access continuation
3227   // label. Signal handlers can resume execution at this address to return to
3228   // TryMemoryAccess which will continue simulation.
3229   uintptr_t GetSignalReturnAddress() const {
3230     return reinterpret_cast<uintptr_t>(&_vixl_internal_AccessMemory_continue);
3231   }
3232 
3233   // Replace the fault address reported by the kernel with the actual faulting
3234   // address.
3235   //
3236   // This is required because TryMemoryAccess reads a section of
3237   // memory 1 byte at a time meaning the fault address reported may not be the
3238   // base address of memory being accessed.
3239   void ReplaceFaultAddress(siginfo_t* siginfo, void* context) {
3240 #ifdef __x86_64__
3241     // The base address being accessed is passed in as the first argument to
3242     // _vixl_internal_ReadMemory.
3243     ucontext_t* uc = reinterpret_cast<ucontext_t*>(context);
3244     siginfo->si_addr = reinterpret_cast<void*>(uc->uc_mcontext.gregs[REG_RDI]);
3245 #else
3246     USE(siginfo);
3247     USE(context);
3248 #endif  // __x86_64__
3249   }
3250 #endif  // VIXL_ENABLE_IMPLICIT_CHECKS
3251 
3252  protected:
3253   const char* clr_normal;
3254   const char* clr_flag_name;
3255   const char* clr_flag_value;
3256   const char* clr_reg_name;
3257   const char* clr_reg_value;
3258   const char* clr_vreg_name;
3259   const char* clr_vreg_value;
3260   const char* clr_preg_name;
3261   const char* clr_preg_value;
3262   const char* clr_memory_address;
3263   const char* clr_warning;
3264   const char* clr_warning_message;
3265   const char* clr_printf;
3266   const char* clr_branch_marker;
3267 
3268   // Simulation helpers ------------------------------------
3269 
3270   void ResetSystemRegisters();
3271   void ResetRegisters();
3272   void ResetVRegisters();
3273   void ResetPRegisters();
3274   void ResetFFR();
3275 
3276   bool ConditionPassed(Condition cond) {
3277     switch (cond) {
3278       case eq:
3279         return ReadZ();
3280       case ne:
3281         return !ReadZ();
3282       case hs:
3283         return ReadC();
3284       case lo:
3285         return !ReadC();
3286       case mi:
3287         return ReadN();
3288       case pl:
3289         return !ReadN();
3290       case vs:
3291         return ReadV();
3292       case vc:
3293         return !ReadV();
3294       case hi:
3295         return ReadC() && !ReadZ();
3296       case ls:
3297         return !(ReadC() && !ReadZ());
3298       case ge:
3299         return ReadN() == ReadV();
3300       case lt:
3301         return ReadN() != ReadV();
3302       case gt:
3303         return !ReadZ() && (ReadN() == ReadV());
3304       case le:
3305         return !(!ReadZ() && (ReadN() == ReadV()));
3306       case nv:
3307         VIXL_FALLTHROUGH();
3308       case al:
3309         return true;
3310       default:
3311         VIXL_UNREACHABLE();
3312         return false;
3313     }
3314   }
3315 
3316   bool ConditionPassed(Instr cond) {
3317     return ConditionPassed(static_cast<Condition>(cond));
3318   }
3319 
3320   bool ConditionFailed(Condition cond) { return !ConditionPassed(cond); }
3321 
3322   void AddSubHelper(const Instruction* instr, int64_t op2);
3323   uint64_t AddWithCarry(unsigned reg_size,
3324                         bool set_flags,
3325                         uint64_t left,
3326                         uint64_t right,
3327                         int carry_in = 0);
3328   std::pair<uint64_t, uint8_t> AddWithCarry(unsigned reg_size,
3329                                             uint64_t left,
3330                                             uint64_t right,
3331                                             int carry_in);
3332   vixl_uint128_t Add128(vixl_uint128_t x, vixl_uint128_t y);
3333   vixl_uint128_t Lsl128(vixl_uint128_t x, unsigned shift) const;
3334   vixl_uint128_t Eor128(vixl_uint128_t x, vixl_uint128_t y) const;
3335   vixl_uint128_t Mul64(uint64_t x, uint64_t y);
3336   vixl_uint128_t Neg128(vixl_uint128_t x);
3337   void LogicalHelper(const Instruction* instr, int64_t op2);
3338   void ConditionalCompareHelper(const Instruction* instr, int64_t op2);
3339   void LoadStoreHelper(const Instruction* instr,
3340                        int64_t offset,
3341                        AddrMode addrmode);
3342   void LoadStorePairHelper(const Instruction* instr, AddrMode addrmode);
3343   template <typename T>
3344   void CompareAndSwapHelper(const Instruction* instr);
3345   template <typename T>
3346   void CompareAndSwapPairHelper(const Instruction* instr);
3347   template <typename T>
3348   void AtomicMemorySimpleHelper(const Instruction* instr);
3349   template <typename T>
3350   void AtomicMemorySwapHelper(const Instruction* instr);
3351   template <typename T>
3352   void LoadAcquireRCpcHelper(const Instruction* instr);
3353   template <typename T1, typename T2>
3354   void LoadAcquireRCpcUnscaledOffsetHelper(const Instruction* instr);
3355   template <typename T>
3356   void StoreReleaseUnscaledOffsetHelper(const Instruction* instr);
3357   uintptr_t AddressModeHelper(unsigned addr_reg,
3358                               int64_t offset,
3359                               AddrMode addrmode);
3360   void NEONLoadStoreMultiStructHelper(const Instruction* instr,
3361                                       AddrMode addr_mode);
3362   void NEONLoadStoreSingleStructHelper(const Instruction* instr,
3363                                        AddrMode addr_mode);
3364   template <uint32_t mops_type>
3365   void MOPSPHelper(const Instruction* instr) {
3366     VIXL_ASSERT(instr->IsConsistentMOPSTriplet<mops_type>());
3367 
3368     int d = instr->GetRd();
3369     int n = instr->GetRn();
3370     int s = instr->GetRs();
3371 
3372     // Aliased registers and xzr are disallowed for Xd and Xn.
3373     if ((d == n) || (d == s) || (n == s) || (d == 31) || (n == 31)) {
3374       VisitUnallocated(instr);
3375     }
3376 
3377     // Additionally, Xs may not be xzr for cpy.
3378     if ((mops_type == "cpy"_h) && (s == 31)) {
3379       VisitUnallocated(instr);
3380     }
3381 
3382     // Bits 31 and 30 must be zero.
3383     if (instr->ExtractBits(31, 30) != 0) {
3384       VisitUnallocated(instr);
3385     }
3386 
3387     // Saturate copy count.
3388     uint64_t xn = ReadXRegister(n);
3389     int saturation_bits = (mops_type == "cpy"_h) ? 55 : 63;
3390     if ((xn >> saturation_bits) != 0) {
3391       xn = (UINT64_C(1) << saturation_bits) - 1;
3392       if (mops_type == "setg"_h) {
3393         // Align saturated value to granule.
3394         xn &= ~UINT64_C(kMTETagGranuleInBytes - 1);
3395       }
3396       WriteXRegister(n, xn);
3397     }
3398 
3399     ReadNzcv().SetN(0);
3400     ReadNzcv().SetZ(0);
3401     ReadNzcv().SetC(1);  // Indicates "option B" implementation.
3402     ReadNzcv().SetV(0);
3403   }
3404 
3405   int64_t ShiftOperand(unsigned reg_size,
3406                        uint64_t value,
3407                        Shift shift_type,
3408                        unsigned amount) const;
3409   int64_t ExtendValue(unsigned reg_width,
3410                       int64_t value,
3411                       Extend extend_type,
3412                       unsigned left_shift = 0) const;
3413   uint64_t PolynomialMult(uint64_t op1,
3414                           uint64_t op2,
3415                           int lane_size_in_bits) const;
3416   vixl_uint128_t PolynomialMult128(uint64_t op1,
3417                                    uint64_t op2,
3418                                    int lane_size_in_bits) const;
3419 
3420   bool ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr);
3421   bool ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr);
3422   bool ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr);
3423   bool ld1r(VectorFormat vform,
3424             VectorFormat unpack_vform,
3425             LogicVRegister dst,
3426             uint64_t addr,
3427             bool is_signed = false);
3428   bool ld2(VectorFormat vform,
3429            LogicVRegister dst1,
3430            LogicVRegister dst2,
3431            uint64_t addr);
3432   bool ld2(VectorFormat vform,
3433            LogicVRegister dst1,
3434            LogicVRegister dst2,
3435            int index,
3436            uint64_t addr);
3437   bool ld2r(VectorFormat vform,
3438             LogicVRegister dst1,
3439             LogicVRegister dst2,
3440             uint64_t addr);
3441   bool ld3(VectorFormat vform,
3442            LogicVRegister dst1,
3443            LogicVRegister dst2,
3444            LogicVRegister dst3,
3445            uint64_t addr);
3446   bool ld3(VectorFormat vform,
3447            LogicVRegister dst1,
3448            LogicVRegister dst2,
3449            LogicVRegister dst3,
3450            int index,
3451            uint64_t addr);
3452   bool ld3r(VectorFormat vform,
3453             LogicVRegister dst1,
3454             LogicVRegister dst2,
3455             LogicVRegister dst3,
3456             uint64_t addr);
3457   bool ld4(VectorFormat vform,
3458            LogicVRegister dst1,
3459            LogicVRegister dst2,
3460            LogicVRegister dst3,
3461            LogicVRegister dst4,
3462            uint64_t addr);
3463   bool ld4(VectorFormat vform,
3464            LogicVRegister dst1,
3465            LogicVRegister dst2,
3466            LogicVRegister dst3,
3467            LogicVRegister dst4,
3468            int index,
3469            uint64_t addr);
3470   bool ld4r(VectorFormat vform,
3471             LogicVRegister dst1,
3472             LogicVRegister dst2,
3473             LogicVRegister dst3,
3474             LogicVRegister dst4,
3475             uint64_t addr);
3476   bool st1(VectorFormat vform, LogicVRegister src, uint64_t addr);
3477   bool st1(VectorFormat vform, LogicVRegister src, int index, uint64_t addr);
3478   bool st2(VectorFormat vform,
3479            LogicVRegister src,
3480            LogicVRegister src2,
3481            uint64_t addr);
3482   bool st2(VectorFormat vform,
3483            LogicVRegister src,
3484            LogicVRegister src2,
3485            int index,
3486            uint64_t addr);
3487   bool st3(VectorFormat vform,
3488            LogicVRegister src,
3489            LogicVRegister src2,
3490            LogicVRegister src3,
3491            uint64_t addr);
3492   bool st3(VectorFormat vform,
3493            LogicVRegister src,
3494            LogicVRegister src2,
3495            LogicVRegister src3,
3496            int index,
3497            uint64_t addr);
3498   bool st4(VectorFormat vform,
3499            LogicVRegister src,
3500            LogicVRegister src2,
3501            LogicVRegister src3,
3502            LogicVRegister src4,
3503            uint64_t addr);
3504   bool st4(VectorFormat vform,
3505            LogicVRegister src,
3506            LogicVRegister src2,
3507            LogicVRegister src3,
3508            LogicVRegister src4,
3509            int index,
3510            uint64_t addr);
3511   LogicVRegister cmp(VectorFormat vform,
3512                      LogicVRegister dst,
3513                      const LogicVRegister& src1,
3514                      const LogicVRegister& src2,
3515                      Condition cond);
3516   LogicVRegister cmp(VectorFormat vform,
3517                      LogicVRegister dst,
3518                      const LogicVRegister& src1,
3519                      int imm,
3520                      Condition cond);
3521   LogicVRegister cmptst(VectorFormat vform,
3522                         LogicVRegister dst,
3523                         const LogicVRegister& src1,
3524                         const LogicVRegister& src2);
3525   LogicVRegister add(VectorFormat vform,
3526                      LogicVRegister dst,
3527                      const LogicVRegister& src1,
3528                      const LogicVRegister& src2);
3529   // Add `value` to each lane of `src1`, treating `value` as unsigned for the
3530   // purposes of setting the saturation flags.
3531   LogicVRegister add_uint(VectorFormat vform,
3532                           LogicVRegister dst,
3533                           const LogicVRegister& src1,
3534                           uint64_t value);
3535   LogicVRegister addp(VectorFormat vform,
3536                       LogicVRegister dst,
3537                       const LogicVRegister& src1,
3538                       const LogicVRegister& src2);
3539   LogicPRegister brka(LogicPRegister pd,
3540                       const LogicPRegister& pg,
3541                       const LogicPRegister& pn);
3542   LogicPRegister brkb(LogicPRegister pd,
3543                       const LogicPRegister& pg,
3544                       const LogicPRegister& pn);
3545   LogicPRegister brkn(LogicPRegister pdm,
3546                       const LogicPRegister& pg,
3547                       const LogicPRegister& pn);
3548   LogicPRegister brkpa(LogicPRegister pd,
3549                        const LogicPRegister& pg,
3550                        const LogicPRegister& pn,
3551                        const LogicPRegister& pm);
3552   LogicPRegister brkpb(LogicPRegister pd,
3553                        const LogicPRegister& pg,
3554                        const LogicPRegister& pn,
3555                        const LogicPRegister& pm);
3556   // dst = srca + src1 * src2
3557   LogicVRegister mla(VectorFormat vform,
3558                      LogicVRegister dst,
3559                      const LogicVRegister& srca,
3560                      const LogicVRegister& src1,
3561                      const LogicVRegister& src2);
3562   // dst = srca - src1 * src2
3563   LogicVRegister mls(VectorFormat vform,
3564                      LogicVRegister dst,
3565                      const LogicVRegister& srca,
3566                      const LogicVRegister& src1,
3567                      const LogicVRegister& src2);
3568   LogicVRegister mul(VectorFormat vform,
3569                      LogicVRegister dst,
3570                      const LogicVRegister& src1,
3571                      const LogicVRegister& src2);
3572   LogicVRegister mul(VectorFormat vform,
3573                      LogicVRegister dst,
3574                      const LogicVRegister& src1,
3575                      const LogicVRegister& src2,
3576                      int index);
3577   LogicVRegister mla(VectorFormat vform,
3578                      LogicVRegister dst,
3579                      const LogicVRegister& src1,
3580                      const LogicVRegister& src2,
3581                      int index);
3582   LogicVRegister mls(VectorFormat vform,
3583                      LogicVRegister dst,
3584                      const LogicVRegister& src1,
3585                      const LogicVRegister& src2,
3586                      int index);
3587   LogicVRegister pmul(VectorFormat vform,
3588                       LogicVRegister dst,
3589                       const LogicVRegister& src1,
3590                       const LogicVRegister& src2);
3591   LogicVRegister sdiv(VectorFormat vform,
3592                       LogicVRegister dst,
3593                       const LogicVRegister& src1,
3594                       const LogicVRegister& src2);
3595   LogicVRegister udiv(VectorFormat vform,
3596                       LogicVRegister dst,
3597                       const LogicVRegister& src1,
3598                       const LogicVRegister& src2);
3599 
3600   typedef LogicVRegister (Simulator::*ByElementOp)(VectorFormat vform,
3601                                                    LogicVRegister dst,
3602                                                    const LogicVRegister& src1,
3603                                                    const LogicVRegister& src2,
3604                                                    int index);
3605   LogicVRegister fmul(VectorFormat vform,
3606                       LogicVRegister dst,
3607                       const LogicVRegister& src1,
3608                       const LogicVRegister& src2,
3609                       int index);
3610   LogicVRegister fmla(VectorFormat vform,
3611                       LogicVRegister dst,
3612                       const LogicVRegister& src1,
3613                       const LogicVRegister& src2,
3614                       int index);
3615   LogicVRegister fmlal(VectorFormat vform,
3616                        LogicVRegister dst,
3617                        const LogicVRegister& src1,
3618                        const LogicVRegister& src2,
3619                        int index);
3620   LogicVRegister fmlal2(VectorFormat vform,
3621                         LogicVRegister dst,
3622                         const LogicVRegister& src1,
3623                         const LogicVRegister& src2,
3624                         int index);
3625   LogicVRegister fmls(VectorFormat vform,
3626                       LogicVRegister dst,
3627                       const LogicVRegister& src1,
3628                       const LogicVRegister& src2,
3629                       int index);
3630   LogicVRegister fmlsl(VectorFormat vform,
3631                        LogicVRegister dst,
3632                        const LogicVRegister& src1,
3633                        const LogicVRegister& src2,
3634                        int index);
3635   LogicVRegister fmlsl2(VectorFormat vform,
3636                         LogicVRegister dst,
3637                         const LogicVRegister& src1,
3638                         const LogicVRegister& src2,
3639                         int index);
3640   LogicVRegister fmulx(VectorFormat vform,
3641                        LogicVRegister dst,
3642                        const LogicVRegister& src1,
3643                        const LogicVRegister& src2,
3644                        int index);
3645   LogicVRegister smulh(VectorFormat vform,
3646                        LogicVRegister dst,
3647                        const LogicVRegister& src1,
3648                        const LogicVRegister& src2);
3649   LogicVRegister umulh(VectorFormat vform,
3650                        LogicVRegister dst,
3651                        const LogicVRegister& src1,
3652                        const LogicVRegister& src2);
3653   LogicVRegister sqdmull(VectorFormat vform,
3654                          LogicVRegister dst,
3655                          const LogicVRegister& src1,
3656                          const LogicVRegister& src2,
3657                          int index);
3658   LogicVRegister sqdmlal(VectorFormat vform,
3659                          LogicVRegister dst,
3660                          const LogicVRegister& src1,
3661                          const LogicVRegister& src2,
3662                          int index);
3663   LogicVRegister sqdmlsl(VectorFormat vform,
3664                          LogicVRegister dst,
3665                          const LogicVRegister& src1,
3666                          const LogicVRegister& src2,
3667                          int index);
3668   LogicVRegister sqdmulh(VectorFormat vform,
3669                          LogicVRegister dst,
3670                          const LogicVRegister& src1,
3671                          const LogicVRegister& src2,
3672                          int index);
3673   LogicVRegister sqrdmulh(VectorFormat vform,
3674                           LogicVRegister dst,
3675                           const LogicVRegister& src1,
3676                           const LogicVRegister& src2,
3677                           int index);
3678   LogicVRegister sqrdmlah(VectorFormat vform,
3679                           LogicVRegister dst,
3680                           const LogicVRegister& src1,
3681                           const LogicVRegister& src2,
3682                           int index);
3683   LogicVRegister sqrdmlsh(VectorFormat vform,
3684                           LogicVRegister dst,
3685                           const LogicVRegister& src1,
3686                           const LogicVRegister& src2,
3687                           int index);
3688   LogicVRegister sub(VectorFormat vform,
3689                      LogicVRegister dst,
3690                      const LogicVRegister& src1,
3691                      const LogicVRegister& src2);
3692   // Subtract `value` from each lane of `src1`, treating `value` as unsigned for
3693   // the purposes of setting the saturation flags.
3694   LogicVRegister sub_uint(VectorFormat vform,
3695                           LogicVRegister dst,
3696                           const LogicVRegister& src1,
3697                           uint64_t value);
3698   LogicVRegister and_(VectorFormat vform,
3699                       LogicVRegister dst,
3700                       const LogicVRegister& src1,
3701                       const LogicVRegister& src2);
3702   LogicVRegister orr(VectorFormat vform,
3703                      LogicVRegister dst,
3704                      const LogicVRegister& src1,
3705                      const LogicVRegister& src2);
3706   LogicVRegister orn(VectorFormat vform,
3707                      LogicVRegister dst,
3708                      const LogicVRegister& src1,
3709                      const LogicVRegister& src2);
3710   LogicVRegister eor(VectorFormat vform,
3711                      LogicVRegister dst,
3712                      const LogicVRegister& src1,
3713                      const LogicVRegister& src2);
3714   LogicVRegister bic(VectorFormat vform,
3715                      LogicVRegister dst,
3716                      const LogicVRegister& src1,
3717                      const LogicVRegister& src2);
3718   LogicVRegister bic(VectorFormat vform,
3719                      LogicVRegister dst,
3720                      const LogicVRegister& src,
3721                      uint64_t imm);
3722   LogicVRegister bif(VectorFormat vform,
3723                      LogicVRegister dst,
3724                      const LogicVRegister& src1,
3725                      const LogicVRegister& src2);
3726   LogicVRegister bit(VectorFormat vform,
3727                      LogicVRegister dst,
3728                      const LogicVRegister& src1,
3729                      const LogicVRegister& src2);
3730   LogicVRegister bsl(VectorFormat vform,
3731                      LogicVRegister dst,
3732                      const LogicVRegister& src_mask,
3733                      const LogicVRegister& src1,
3734                      const LogicVRegister& src2);
3735   LogicVRegister cls(VectorFormat vform,
3736                      LogicVRegister dst,
3737                      const LogicVRegister& src);
3738   LogicVRegister clz(VectorFormat vform,
3739                      LogicVRegister dst,
3740                      const LogicVRegister& src);
3741   LogicVRegister cnot(VectorFormat vform,
3742                       LogicVRegister dst,
3743                       const LogicVRegister& src);
3744   LogicVRegister cnt(VectorFormat vform,
3745                      LogicVRegister dst,
3746                      const LogicVRegister& src);
3747   LogicVRegister not_(VectorFormat vform,
3748                       LogicVRegister dst,
3749                       const LogicVRegister& src);
3750   LogicVRegister rbit(VectorFormat vform,
3751                       LogicVRegister dst,
3752                       const LogicVRegister& src);
3753   LogicVRegister rev(VectorFormat vform,
3754                      LogicVRegister dst,
3755                      const LogicVRegister& src);
3756   LogicVRegister rev_byte(VectorFormat vform,
3757                           LogicVRegister dst,
3758                           const LogicVRegister& src,
3759                           int rev_size);
3760   LogicVRegister rev16(VectorFormat vform,
3761                        LogicVRegister dst,
3762                        const LogicVRegister& src);
3763   LogicVRegister rev32(VectorFormat vform,
3764                        LogicVRegister dst,
3765                        const LogicVRegister& src);
3766   LogicVRegister rev64(VectorFormat vform,
3767                        LogicVRegister dst,
3768                        const LogicVRegister& src);
3769   LogicVRegister addlp(VectorFormat vform,
3770                        LogicVRegister dst,
3771                        const LogicVRegister& src,
3772                        bool is_signed,
3773                        bool do_accumulate);
3774   LogicVRegister saddlp(VectorFormat vform,
3775                         LogicVRegister dst,
3776                         const LogicVRegister& src);
3777   LogicVRegister uaddlp(VectorFormat vform,
3778                         LogicVRegister dst,
3779                         const LogicVRegister& src);
3780   LogicVRegister sadalp(VectorFormat vform,
3781                         LogicVRegister dst,
3782                         const LogicVRegister& src);
3783   LogicVRegister uadalp(VectorFormat vform,
3784                         LogicVRegister dst,
3785                         const LogicVRegister& src);
3786   LogicVRegister ror(VectorFormat vform,
3787                      LogicVRegister dst,
3788                      const LogicVRegister& src,
3789                      int rotation);
3790   LogicVRegister rol(VectorFormat vform,
3791                      LogicVRegister dst,
3792                      const LogicVRegister& src,
3793                      int rotation);
3794   LogicVRegister ext(VectorFormat vform,
3795                      LogicVRegister dst,
3796                      const LogicVRegister& src1,
3797                      const LogicVRegister& src2,
3798                      int index);
3799   LogicVRegister rotate_elements_right(VectorFormat vform,
3800                                        LogicVRegister dst,
3801                                        const LogicVRegister& src,
3802                                        int index);
3803   template <typename T>
3804   LogicVRegister fcadd(VectorFormat vform,
3805                        LogicVRegister dst,
3806                        const LogicVRegister& src1,
3807                        const LogicVRegister& src2,
3808                        int rot);
3809   LogicVRegister fcadd(VectorFormat vform,
3810                        LogicVRegister dst,
3811                        const LogicVRegister& src1,
3812                        const LogicVRegister& src2,
3813                        int rot);
3814   template <typename T>
3815   LogicVRegister fcmla(VectorFormat vform,
3816                        LogicVRegister dst,
3817                        const LogicVRegister& src1,
3818                        const LogicVRegister& src2,
3819                        const LogicVRegister& acc,
3820                        int index,
3821                        int rot);
3822   LogicVRegister fcmla(VectorFormat vform,
3823                        LogicVRegister dst,
3824                        const LogicVRegister& src1,
3825                        const LogicVRegister& src2,
3826                        int index,
3827                        int rot);
3828   LogicVRegister fcmla(VectorFormat vform,
3829                        LogicVRegister dst,
3830                        const LogicVRegister& src1,
3831                        const LogicVRegister& src2,
3832                        const LogicVRegister& acc,
3833                        int rot);
3834   template <typename T>
3835   LogicVRegister fadda(VectorFormat vform,
3836                        LogicVRegister acc,
3837                        const LogicPRegister& pg,
3838                        const LogicVRegister& src);
3839   LogicVRegister fadda(VectorFormat vform,
3840                        LogicVRegister acc,
3841                        const LogicPRegister& pg,
3842                        const LogicVRegister& src);
3843   LogicVRegister cadd(VectorFormat vform,
3844                       LogicVRegister dst,
3845                       const LogicVRegister& src1,
3846                       const LogicVRegister& src2,
3847                       int rot,
3848                       bool saturate = false);
3849   LogicVRegister cmla(VectorFormat vform,
3850                       LogicVRegister dst,
3851                       const LogicVRegister& srca,
3852                       const LogicVRegister& src1,
3853                       const LogicVRegister& src2,
3854                       int rot);
3855   LogicVRegister cmla(VectorFormat vform,
3856                       LogicVRegister dst,
3857                       const LogicVRegister& srca,
3858                       const LogicVRegister& src1,
3859                       const LogicVRegister& src2,
3860                       int index,
3861                       int rot);
3862   LogicVRegister bgrp(VectorFormat vform,
3863                       LogicVRegister dst,
3864                       const LogicVRegister& src1,
3865                       const LogicVRegister& src2,
3866                       bool do_bext = false);
3867   LogicVRegister bdep(VectorFormat vform,
3868                       LogicVRegister dst,
3869                       const LogicVRegister& src1,
3870                       const LogicVRegister& src2);
3871   LogicVRegister histogram(VectorFormat vform,
3872                            LogicVRegister dst,
3873                            const LogicPRegister& pg,
3874                            const LogicVRegister& src1,
3875                            const LogicVRegister& src2,
3876                            bool do_segmented = false);
3877   LogicVRegister index(VectorFormat vform,
3878                        LogicVRegister dst,
3879                        uint64_t start,
3880                        uint64_t step);
3881   LogicVRegister ins_element(VectorFormat vform,
3882                              LogicVRegister dst,
3883                              int dst_index,
3884                              const LogicVRegister& src,
3885                              int src_index);
3886   LogicVRegister ins_immediate(VectorFormat vform,
3887                                LogicVRegister dst,
3888                                int dst_index,
3889                                uint64_t imm);
3890   LogicVRegister insr(VectorFormat vform, LogicVRegister dst, uint64_t imm);
3891   LogicVRegister dup_element(VectorFormat vform,
3892                              LogicVRegister dst,
3893                              const LogicVRegister& src,
3894                              int src_index);
3895   LogicVRegister dup_elements_to_segments(VectorFormat vform,
3896                                           LogicVRegister dst,
3897                                           const LogicVRegister& src,
3898                                           int src_index);
3899   LogicVRegister dup_elements_to_segments(
3900       VectorFormat vform,
3901       LogicVRegister dst,
3902       const std::pair<int, int>& src_and_index);
3903   LogicVRegister dup_immediate(VectorFormat vform,
3904                                LogicVRegister dst,
3905                                uint64_t imm);
3906   LogicVRegister mov(VectorFormat vform,
3907                      LogicVRegister dst,
3908                      const LogicVRegister& src);
3909   LogicPRegister mov(LogicPRegister dst, const LogicPRegister& src);
3910   LogicVRegister mov_merging(VectorFormat vform,
3911                              LogicVRegister dst,
3912                              const SimPRegister& pg,
3913                              const LogicVRegister& src);
3914   LogicVRegister mov_zeroing(VectorFormat vform,
3915                              LogicVRegister dst,
3916                              const SimPRegister& pg,
3917                              const LogicVRegister& src);
3918   LogicVRegister mov_alternating(VectorFormat vform,
3919                                  LogicVRegister dst,
3920                                  const LogicVRegister& src,
3921                                  int start_at);
3922   LogicPRegister mov_merging(LogicPRegister dst,
3923                              const LogicPRegister& pg,
3924                              const LogicPRegister& src);
3925   LogicPRegister mov_zeroing(LogicPRegister dst,
3926                              const LogicPRegister& pg,
3927                              const LogicPRegister& src);
3928   LogicVRegister movi(VectorFormat vform, LogicVRegister dst, uint64_t imm);
3929   LogicVRegister mvni(VectorFormat vform, LogicVRegister dst, uint64_t imm);
3930   LogicVRegister orr(VectorFormat vform,
3931                      LogicVRegister dst,
3932                      const LogicVRegister& src,
3933                      uint64_t imm);
3934   LogicVRegister sshl(VectorFormat vform,
3935                       LogicVRegister dst,
3936                       const LogicVRegister& src1,
3937                       const LogicVRegister& src2,
3938                       bool shift_is_8bit = true);
3939   LogicVRegister ushl(VectorFormat vform,
3940                       LogicVRegister dst,
3941                       const LogicVRegister& src1,
3942                       const LogicVRegister& src2,
3943                       bool shift_is_8bit = true);
3944   LogicVRegister sshr(VectorFormat vform,
3945                       LogicVRegister dst,
3946                       const LogicVRegister& src1,
3947                       const LogicVRegister& src2);
3948   LogicVRegister ushr(VectorFormat vform,
3949                       LogicVRegister dst,
3950                       const LogicVRegister& src1,
3951                       const LogicVRegister& src2);
3952   // Perform a "conditional last" operation. The first part of the pair is true
3953   // if any predicate lane is active, false otherwise. The second part takes the
3954   // value of the last active (plus offset) lane, or last (plus offset) lane if
3955   // none active.
3956   std::pair<bool, uint64_t> clast(VectorFormat vform,
3957                                   const LogicPRegister& pg,
3958                                   const LogicVRegister& src2,
3959                                   int offset_from_last_active);
3960   LogicPRegister match(VectorFormat vform,
3961                        LogicPRegister dst,
3962                        const LogicVRegister& haystack,
3963                        const LogicVRegister& needles,
3964                        bool negate_match);
3965   LogicVRegister compact(VectorFormat vform,
3966                          LogicVRegister dst,
3967                          const LogicPRegister& pg,
3968                          const LogicVRegister& src);
3969   LogicVRegister splice(VectorFormat vform,
3970                         LogicVRegister dst,
3971                         const LogicPRegister& pg,
3972                         const LogicVRegister& src1,
3973                         const LogicVRegister& src2);
3974   LogicVRegister sel(VectorFormat vform,
3975                      LogicVRegister dst,
3976                      const SimPRegister& pg,
3977                      const LogicVRegister& src1,
3978                      const LogicVRegister& src2);
3979   LogicPRegister sel(LogicPRegister dst,
3980                      const LogicPRegister& pg,
3981                      const LogicPRegister& src1,
3982                      const LogicPRegister& src2);
3983   LogicVRegister sminmax(VectorFormat vform,
3984                          LogicVRegister dst,
3985                          const LogicVRegister& src1,
3986                          const LogicVRegister& src2,
3987                          bool max);
3988   LogicVRegister smax(VectorFormat vform,
3989                       LogicVRegister dst,
3990                       const LogicVRegister& src1,
3991                       const LogicVRegister& src2);
3992   LogicVRegister smin(VectorFormat vform,
3993                       LogicVRegister dst,
3994                       const LogicVRegister& src1,
3995                       const LogicVRegister& src2);
3996   LogicVRegister sminmaxp(VectorFormat vform,
3997                           LogicVRegister dst,
3998                           const LogicVRegister& src1,
3999                           const LogicVRegister& src2,
4000                           bool max);
4001   LogicVRegister smaxp(VectorFormat vform,
4002                        LogicVRegister dst,
4003                        const LogicVRegister& src1,
4004                        const LogicVRegister& src2);
4005   LogicVRegister sminp(VectorFormat vform,
4006                        LogicVRegister dst,
4007                        const LogicVRegister& src1,
4008                        const LogicVRegister& src2);
4009   LogicVRegister addp(VectorFormat vform,
4010                       LogicVRegister dst,
4011                       const LogicVRegister& src);
4012   LogicVRegister addv(VectorFormat vform,
4013                       LogicVRegister dst,
4014                       const LogicVRegister& src);
4015   LogicVRegister uaddlv(VectorFormat vform,
4016                         LogicVRegister dst,
4017                         const LogicVRegister& src);
4018   LogicVRegister saddlv(VectorFormat vform,
4019                         LogicVRegister dst,
4020                         const LogicVRegister& src);
4021   LogicVRegister sminmaxv(VectorFormat vform,
4022                           LogicVRegister dst,
4023                           const LogicPRegister& pg,
4024                           const LogicVRegister& src,
4025                           bool max);
4026   LogicVRegister smaxv(VectorFormat vform,
4027                        LogicVRegister dst,
4028                        const LogicVRegister& src);
4029   LogicVRegister sminv(VectorFormat vform,
4030                        LogicVRegister dst,
4031                        const LogicVRegister& src);
4032   LogicVRegister uxtl(VectorFormat vform,
4033                       LogicVRegister dst,
4034                       const LogicVRegister& src,
4035                       bool is_2 = false);
4036   LogicVRegister uxtl2(VectorFormat vform,
4037                        LogicVRegister dst,
4038                        const LogicVRegister& src);
4039   LogicVRegister sxtl(VectorFormat vform,
4040                       LogicVRegister dst,
4041                       const LogicVRegister& src,
4042                       bool is_2 = false);
4043   LogicVRegister sxtl2(VectorFormat vform,
4044                        LogicVRegister dst,
4045                        const LogicVRegister& src);
4046   LogicVRegister uxt(VectorFormat vform,
4047                      LogicVRegister dst,
4048                      const LogicVRegister& src,
4049                      unsigned from_size_in_bits);
4050   LogicVRegister sxt(VectorFormat vform,
4051                      LogicVRegister dst,
4052                      const LogicVRegister& src,
4053                      unsigned from_size_in_bits);
4054   LogicVRegister tbl(VectorFormat vform,
4055                      LogicVRegister dst,
4056                      const LogicVRegister& tab,
4057                      const LogicVRegister& ind);
4058   LogicVRegister tbl(VectorFormat vform,
4059                      LogicVRegister dst,
4060                      const LogicVRegister& tab,
4061                      const LogicVRegister& tab2,
4062                      const LogicVRegister& ind);
4063   LogicVRegister tbl(VectorFormat vform,
4064                      LogicVRegister dst,
4065                      const LogicVRegister& tab,
4066                      const LogicVRegister& tab2,
4067                      const LogicVRegister& tab3,
4068                      const LogicVRegister& ind);
4069   LogicVRegister tbl(VectorFormat vform,
4070                      LogicVRegister dst,
4071                      const LogicVRegister& tab,
4072                      const LogicVRegister& tab2,
4073                      const LogicVRegister& tab3,
4074                      const LogicVRegister& tab4,
4075                      const LogicVRegister& ind);
4076   LogicVRegister Table(VectorFormat vform,
4077                        LogicVRegister dst,
4078                        const LogicVRegister& ind,
4079                        bool zero_out_of_bounds,
4080                        const LogicVRegister* tab1,
4081                        const LogicVRegister* tab2 = NULL,
4082                        const LogicVRegister* tab3 = NULL,
4083                        const LogicVRegister* tab4 = NULL);
4084   LogicVRegister tbx(VectorFormat vform,
4085                      LogicVRegister dst,
4086                      const LogicVRegister& tab,
4087                      const LogicVRegister& ind);
4088   LogicVRegister tbx(VectorFormat vform,
4089                      LogicVRegister dst,
4090                      const LogicVRegister& tab,
4091                      const LogicVRegister& tab2,
4092                      const LogicVRegister& ind);
4093   LogicVRegister tbx(VectorFormat vform,
4094                      LogicVRegister dst,
4095                      const LogicVRegister& tab,
4096                      const LogicVRegister& tab2,
4097                      const LogicVRegister& tab3,
4098                      const LogicVRegister& ind);
4099   LogicVRegister tbx(VectorFormat vform,
4100                      LogicVRegister dst,
4101                      const LogicVRegister& tab,
4102                      const LogicVRegister& tab2,
4103                      const LogicVRegister& tab3,
4104                      const LogicVRegister& tab4,
4105                      const LogicVRegister& ind);
4106   LogicVRegister uaddl(VectorFormat vform,
4107                        LogicVRegister dst,
4108                        const LogicVRegister& src1,
4109                        const LogicVRegister& src2);
4110   LogicVRegister uaddl2(VectorFormat vform,
4111                         LogicVRegister dst,
4112                         const LogicVRegister& src1,
4113                         const LogicVRegister& src2);
4114   LogicVRegister uaddw(VectorFormat vform,
4115                        LogicVRegister dst,
4116                        const LogicVRegister& src1,
4117                        const LogicVRegister& src2);
4118   LogicVRegister uaddw2(VectorFormat vform,
4119                         LogicVRegister dst,
4120                         const LogicVRegister& src1,
4121                         const LogicVRegister& src2);
4122   LogicVRegister saddl(VectorFormat vform,
4123                        LogicVRegister dst,
4124                        const LogicVRegister& src1,
4125                        const LogicVRegister& src2);
4126   LogicVRegister saddl2(VectorFormat vform,
4127                         LogicVRegister dst,
4128                         const LogicVRegister& src1,
4129                         const LogicVRegister& src2);
4130   LogicVRegister saddw(VectorFormat vform,
4131                        LogicVRegister dst,
4132                        const LogicVRegister& src1,
4133                        const LogicVRegister& src2);
4134   LogicVRegister saddw2(VectorFormat vform,
4135                         LogicVRegister dst,
4136                         const LogicVRegister& src1,
4137                         const LogicVRegister& src2);
4138   LogicVRegister usubl(VectorFormat vform,
4139                        LogicVRegister dst,
4140                        const LogicVRegister& src1,
4141                        const LogicVRegister& src2);
4142   LogicVRegister usubl2(VectorFormat vform,
4143                         LogicVRegister dst,
4144                         const LogicVRegister& src1,
4145                         const LogicVRegister& src2);
4146   LogicVRegister usubw(VectorFormat vform,
4147                        LogicVRegister dst,
4148                        const LogicVRegister& src1,
4149                        const LogicVRegister& src2);
4150   LogicVRegister usubw2(VectorFormat vform,
4151                         LogicVRegister dst,
4152                         const LogicVRegister& src1,
4153                         const LogicVRegister& src2);
4154   LogicVRegister ssubl(VectorFormat vform,
4155                        LogicVRegister dst,
4156                        const LogicVRegister& src1,
4157                        const LogicVRegister& src2);
4158   LogicVRegister ssubl2(VectorFormat vform,
4159                         LogicVRegister dst,
4160                         const LogicVRegister& src1,
4161                         const LogicVRegister& src2);
4162   LogicVRegister ssubw(VectorFormat vform,
4163                        LogicVRegister dst,
4164                        const LogicVRegister& src1,
4165                        const LogicVRegister& src2);
4166   LogicVRegister ssubw2(VectorFormat vform,
4167                         LogicVRegister dst,
4168                         const LogicVRegister& src1,
4169                         const LogicVRegister& src2);
4170   LogicVRegister uminmax(VectorFormat vform,
4171                          LogicVRegister dst,
4172                          const LogicVRegister& src1,
4173                          const LogicVRegister& src2,
4174                          bool max);
4175   LogicVRegister umax(VectorFormat vform,
4176                       LogicVRegister dst,
4177                       const LogicVRegister& src1,
4178                       const LogicVRegister& src2);
4179   LogicVRegister umin(VectorFormat vform,
4180                       LogicVRegister dst,
4181                       const LogicVRegister& src1,
4182                       const LogicVRegister& src2);
4183   LogicVRegister uminmaxp(VectorFormat vform,
4184                           LogicVRegister dst,
4185                           const LogicVRegister& src1,
4186                           const LogicVRegister& src2,
4187                           bool max);
4188   LogicVRegister umaxp(VectorFormat vform,
4189                        LogicVRegister dst,
4190                        const LogicVRegister& src1,
4191                        const LogicVRegister& src2);
4192   LogicVRegister uminp(VectorFormat vform,
4193                        LogicVRegister dst,
4194                        const LogicVRegister& src1,
4195                        const LogicVRegister& src2);
4196   LogicVRegister uminmaxv(VectorFormat vform,
4197                           LogicVRegister dst,
4198                           const LogicPRegister& pg,
4199                           const LogicVRegister& src,
4200                           bool max);
4201   LogicVRegister umaxv(VectorFormat vform,
4202                        LogicVRegister dst,
4203                        const LogicVRegister& src);
4204   LogicVRegister uminv(VectorFormat vform,
4205                        LogicVRegister dst,
4206                        const LogicVRegister& src);
4207   LogicVRegister trn1(VectorFormat vform,
4208                       LogicVRegister dst,
4209                       const LogicVRegister& src1,
4210                       const LogicVRegister& src2);
4211   LogicVRegister trn2(VectorFormat vform,
4212                       LogicVRegister dst,
4213                       const LogicVRegister& src1,
4214                       const LogicVRegister& src2);
4215   LogicVRegister zip1(VectorFormat vform,
4216                       LogicVRegister dst,
4217                       const LogicVRegister& src1,
4218                       const LogicVRegister& src2);
4219   LogicVRegister zip2(VectorFormat vform,
4220                       LogicVRegister dst,
4221                       const LogicVRegister& src1,
4222                       const LogicVRegister& src2);
4223   LogicVRegister uzp1(VectorFormat vform,
4224                       LogicVRegister dst,
4225                       const LogicVRegister& src1,
4226                       const LogicVRegister& src2);
4227   LogicVRegister uzp2(VectorFormat vform,
4228                       LogicVRegister dst,
4229                       const LogicVRegister& src1,
4230                       const LogicVRegister& src2);
4231   LogicVRegister shl(VectorFormat vform,
4232                      LogicVRegister dst,
4233                      const LogicVRegister& src,
4234                      int shift);
4235   LogicVRegister scvtf(VectorFormat vform,
4236                        unsigned dst_data_size_in_bits,
4237                        unsigned src_data_size_in_bits,
4238                        LogicVRegister dst,
4239                        const LogicPRegister& pg,
4240                        const LogicVRegister& src,
4241                        FPRounding round,
4242                        int fbits = 0);
4243   LogicVRegister scvtf(VectorFormat vform,
4244                        LogicVRegister dst,
4245                        const LogicVRegister& src,
4246                        int fbits,
4247                        FPRounding rounding_mode);
4248   LogicVRegister ucvtf(VectorFormat vform,
4249                        unsigned dst_data_size,
4250                        unsigned src_data_size,
4251                        LogicVRegister dst,
4252                        const LogicPRegister& pg,
4253                        const LogicVRegister& src,
4254                        FPRounding round,
4255                        int fbits = 0);
4256   LogicVRegister ucvtf(VectorFormat vform,
4257                        LogicVRegister dst,
4258                        const LogicVRegister& src,
4259                        int fbits,
4260                        FPRounding rounding_mode);
4261   LogicVRegister sshll(VectorFormat vform,
4262                        LogicVRegister dst,
4263                        const LogicVRegister& src,
4264                        int shift);
4265   LogicVRegister sshll2(VectorFormat vform,
4266                         LogicVRegister dst,
4267                         const LogicVRegister& src,
4268                         int shift);
4269   LogicVRegister shll(VectorFormat vform,
4270                       LogicVRegister dst,
4271                       const LogicVRegister& src);
4272   LogicVRegister shll2(VectorFormat vform,
4273                        LogicVRegister dst,
4274                        const LogicVRegister& src);
4275   LogicVRegister ushll(VectorFormat vform,
4276                        LogicVRegister dst,
4277                        const LogicVRegister& src,
4278                        int shift);
4279   LogicVRegister ushll2(VectorFormat vform,
4280                         LogicVRegister dst,
4281                         const LogicVRegister& src,
4282                         int shift);
4283   LogicVRegister sli(VectorFormat vform,
4284                      LogicVRegister dst,
4285                      const LogicVRegister& src,
4286                      int shift);
4287   LogicVRegister sri(VectorFormat vform,
4288                      LogicVRegister dst,
4289                      const LogicVRegister& src,
4290                      int shift);
4291   LogicVRegister sshr(VectorFormat vform,
4292                       LogicVRegister dst,
4293                       const LogicVRegister& src,
4294                       int shift);
4295   LogicVRegister ushr(VectorFormat vform,
4296                       LogicVRegister dst,
4297                       const LogicVRegister& src,
4298                       int shift);
4299   LogicVRegister ssra(VectorFormat vform,
4300                       LogicVRegister dst,
4301                       const LogicVRegister& src,
4302                       int shift);
4303   LogicVRegister usra(VectorFormat vform,
4304                       LogicVRegister dst,
4305                       const LogicVRegister& src,
4306                       int shift);
4307   LogicVRegister srsra(VectorFormat vform,
4308                        LogicVRegister dst,
4309                        const LogicVRegister& src,
4310                        int shift);
4311   LogicVRegister ursra(VectorFormat vform,
4312                        LogicVRegister dst,
4313                        const LogicVRegister& src,
4314                        int shift);
4315   LogicVRegister suqadd(VectorFormat vform,
4316                         LogicVRegister dst,
4317                         const LogicVRegister& src1,
4318                         const LogicVRegister& src2);
4319   LogicVRegister usqadd(VectorFormat vform,
4320                         LogicVRegister dst,
4321                         const LogicVRegister& src1,
4322                         const LogicVRegister& src2);
4323   LogicVRegister sqshl(VectorFormat vform,
4324                        LogicVRegister dst,
4325                        const LogicVRegister& src,
4326                        int shift);
4327   LogicVRegister uqshl(VectorFormat vform,
4328                        LogicVRegister dst,
4329                        const LogicVRegister& src,
4330                        int shift);
4331   LogicVRegister sqshlu(VectorFormat vform,
4332                         LogicVRegister dst,
4333                         const LogicVRegister& src,
4334                         int shift);
4335   LogicVRegister abs(VectorFormat vform,
4336                      LogicVRegister dst,
4337                      const LogicVRegister& src);
4338   LogicVRegister neg(VectorFormat vform,
4339                      LogicVRegister dst,
4340                      const LogicVRegister& src);
4341   LogicVRegister extractnarrow(VectorFormat vform,
4342                                LogicVRegister dst,
4343                                bool dst_is_signed,
4344                                const LogicVRegister& src,
4345                                bool src_is_signed);
4346   LogicVRegister xtn(VectorFormat vform,
4347                      LogicVRegister dst,
4348                      const LogicVRegister& src);
4349   LogicVRegister sqxtn(VectorFormat vform,
4350                        LogicVRegister dst,
4351                        const LogicVRegister& src);
4352   LogicVRegister uqxtn(VectorFormat vform,
4353                        LogicVRegister dst,
4354                        const LogicVRegister& src);
4355   LogicVRegister sqxtun(VectorFormat vform,
4356                         LogicVRegister dst,
4357                         const LogicVRegister& src);
4358   LogicVRegister absdiff(VectorFormat vform,
4359                          LogicVRegister dst,
4360                          const LogicVRegister& src1,
4361                          const LogicVRegister& src2,
4362                          bool is_signed);
4363   LogicVRegister saba(VectorFormat vform,
4364                       LogicVRegister dst,
4365                       const LogicVRegister& src1,
4366                       const LogicVRegister& src2);
4367   LogicVRegister uaba(VectorFormat vform,
4368                       LogicVRegister dst,
4369                       const LogicVRegister& src1,
4370                       const LogicVRegister& src2);
4371   LogicVRegister shrn(VectorFormat vform,
4372                       LogicVRegister dst,
4373                       const LogicVRegister& src,
4374                       int shift);
4375   LogicVRegister shrn2(VectorFormat vform,
4376                        LogicVRegister dst,
4377                        const LogicVRegister& src,
4378                        int shift);
4379   LogicVRegister rshrn(VectorFormat vform,
4380                        LogicVRegister dst,
4381                        const LogicVRegister& src,
4382                        int shift);
4383   LogicVRegister rshrn2(VectorFormat vform,
4384                         LogicVRegister dst,
4385                         const LogicVRegister& src,
4386                         int shift);
4387   LogicVRegister uqshrn(VectorFormat vform,
4388                         LogicVRegister dst,
4389                         const LogicVRegister& src,
4390                         int shift);
4391   LogicVRegister uqshrn2(VectorFormat vform,
4392                          LogicVRegister dst,
4393                          const LogicVRegister& src,
4394                          int shift);
4395   LogicVRegister uqrshrn(VectorFormat vform,
4396                          LogicVRegister dst,
4397                          const LogicVRegister& src,
4398                          int shift);
4399   LogicVRegister uqrshrn2(VectorFormat vform,
4400                           LogicVRegister dst,
4401                           const LogicVRegister& src,
4402                           int shift);
4403   LogicVRegister sqshrn(VectorFormat vform,
4404                         LogicVRegister dst,
4405                         const LogicVRegister& src,
4406                         int shift);
4407   LogicVRegister sqshrn2(VectorFormat vform,
4408                          LogicVRegister dst,
4409                          const LogicVRegister& src,
4410                          int shift);
4411   LogicVRegister sqrshrn(VectorFormat vform,
4412                          LogicVRegister dst,
4413                          const LogicVRegister& src,
4414                          int shift);
4415   LogicVRegister sqrshrn2(VectorFormat vform,
4416                           LogicVRegister dst,
4417                           const LogicVRegister& src,
4418                           int shift);
4419   LogicVRegister sqshrun(VectorFormat vform,
4420                          LogicVRegister dst,
4421                          const LogicVRegister& src,
4422                          int shift);
4423   LogicVRegister sqshrun2(VectorFormat vform,
4424                           LogicVRegister dst,
4425                           const LogicVRegister& src,
4426                           int shift);
4427   LogicVRegister sqrshrun(VectorFormat vform,
4428                           LogicVRegister dst,
4429                           const LogicVRegister& src,
4430                           int shift);
4431   LogicVRegister sqrshrun2(VectorFormat vform,
4432                            LogicVRegister dst,
4433                            const LogicVRegister& src,
4434                            int shift);
4435   LogicVRegister sqrdmulh(VectorFormat vform,
4436                           LogicVRegister dst,
4437                           const LogicVRegister& src1,
4438                           const LogicVRegister& src2,
4439                           bool round = true);
4440   LogicVRegister dot(VectorFormat vform,
4441                      LogicVRegister dst,
4442                      const LogicVRegister& src1,
4443                      const LogicVRegister& src2,
4444                      bool is_src1_signed,
4445                      bool is_src2_signed);
4446   LogicVRegister sdot(VectorFormat vform,
4447                       LogicVRegister dst,
4448                       const LogicVRegister& src1,
4449                       const LogicVRegister& src2);
4450   LogicVRegister udot(VectorFormat vform,
4451                       LogicVRegister dst,
4452                       const LogicVRegister& src1,
4453                       const LogicVRegister& src2);
4454   LogicVRegister usdot(VectorFormat vform,
4455                        LogicVRegister dst,
4456                        const LogicVRegister& src1,
4457                        const LogicVRegister& src2);
4458   LogicVRegister cdot(VectorFormat vform,
4459                       LogicVRegister dst,
4460                       const LogicVRegister& acc,
4461                       const LogicVRegister& src1,
4462                       const LogicVRegister& src2,
4463                       int rot);
4464   LogicVRegister sqrdcmlah(VectorFormat vform,
4465                            LogicVRegister dst,
4466                            const LogicVRegister& srca,
4467                            const LogicVRegister& src1,
4468                            const LogicVRegister& src2,
4469                            int rot);
4470   LogicVRegister sqrdcmlah(VectorFormat vform,
4471                            LogicVRegister dst,
4472                            const LogicVRegister& srca,
4473                            const LogicVRegister& src1,
4474                            const LogicVRegister& src2,
4475                            int index,
4476                            int rot);
4477   LogicVRegister sqrdmlash(VectorFormat vform,
4478                            LogicVRegister dst,
4479                            const LogicVRegister& src1,
4480                            const LogicVRegister& src2,
4481                            bool round = true,
4482                            bool sub_op = false);
4483   LogicVRegister sqrdmlash_d(VectorFormat vform,
4484                              LogicVRegister dst,
4485                              const LogicVRegister& src1,
4486                              const LogicVRegister& src2,
4487                              bool round = true,
4488                              bool sub_op = false);
4489   LogicVRegister sqrdmlah(VectorFormat vform,
4490                           LogicVRegister dst,
4491                           const LogicVRegister& src1,
4492                           const LogicVRegister& src2,
4493                           bool round = true);
4494   LogicVRegister sqrdmlsh(VectorFormat vform,
4495                           LogicVRegister dst,
4496                           const LogicVRegister& src1,
4497                           const LogicVRegister& src2,
4498                           bool round = true);
4499   LogicVRegister sqdmulh(VectorFormat vform,
4500                          LogicVRegister dst,
4501                          const LogicVRegister& src1,
4502                          const LogicVRegister& src2);
4503   LogicVRegister matmul(VectorFormat vform_dst,
4504                         LogicVRegister dst,
4505                         const LogicVRegister& src1,
4506                         const LogicVRegister& src2,
4507                         bool src1_signed,
4508                         bool src2_signed);
4509   template <typename T>
4510   LogicVRegister fmatmul(VectorFormat vform,
4511                          LogicVRegister srcdst,
4512                          const LogicVRegister& src1,
4513                          const LogicVRegister& src2);
4514   LogicVRegister fmatmul(VectorFormat vform,
4515                          LogicVRegister srcdst,
4516                          const LogicVRegister& src1,
4517                          const LogicVRegister& src2);
4518 
4519   template <unsigned N>
4520   static void SHARotateEltsLeftOne(uint64_t (&x)[N]) {
4521     VIXL_STATIC_ASSERT(N == 4);
4522     uint64_t temp = x[3];
4523     x[3] = x[2];
4524     x[2] = x[1];
4525     x[1] = x[0];
4526     x[0] = temp;
4527   }
4528 
4529   template <uint32_t mode>
4530   LogicVRegister sha1(LogicVRegister srcdst,
4531                       const LogicVRegister& src1,
4532                       const LogicVRegister& src2) {
4533     uint64_t y = src1.Uint(kFormat4S, 0);
4534     uint64_t sd[4] = {};
4535     srcdst.UintArray(kFormat4S, sd);
4536 
4537     for (unsigned i = 0; i < ArrayLength(sd); i++) {
4538       uint64_t t = CryptoOp<mode>(sd[1], sd[2], sd[3]);
4539 
4540       y += RotateLeft(sd[0], 5, kSRegSize) + t;
4541       y += src2.Uint(kFormat4S, i);
4542 
4543       sd[1] = RotateLeft(sd[1], 30, kSRegSize);
4544 
4545       // y:sd = ROL(y:sd, 32)
4546       SHARotateEltsLeftOne(sd);
4547       std::swap(sd[0], y);
4548     }
4549 
4550     srcdst.SetUintArray(kFormat4S, sd);
4551     return srcdst;
4552   }
4553 
4554   LogicVRegister sha2h(LogicVRegister srcdst,
4555                        const LogicVRegister& src1,
4556                        const LogicVRegister& src2,
4557                        bool part1);
4558   LogicVRegister sha2su0(LogicVRegister srcdst, const LogicVRegister& src1);
4559   LogicVRegister sha2su1(LogicVRegister srcdst,
4560                          const LogicVRegister& src1,
4561                          const LogicVRegister& src2);
4562   LogicVRegister sha512h(LogicVRegister srcdst,
4563                          const LogicVRegister& src1,
4564                          const LogicVRegister& src2);
4565   LogicVRegister sha512h2(LogicVRegister srcdst,
4566                           const LogicVRegister& src1,
4567                           const LogicVRegister& src2);
4568   LogicVRegister sha512su0(LogicVRegister srcdst, const LogicVRegister& src1);
4569   LogicVRegister sha512su1(LogicVRegister srcdst,
4570                            const LogicVRegister& src1,
4571                            const LogicVRegister& src2);
4572 
4573 
4574   LogicVRegister aes(LogicVRegister srcdst,
4575                      const LogicVRegister& src1,
4576                      bool decrypt);
4577   LogicVRegister aesmix(LogicVRegister srcdst,
4578                         const LogicVRegister& src1,
4579                         bool inverse);
4580 
4581   LogicVRegister sm3partw1(LogicVRegister dst,
4582                            const LogicVRegister& src1,
4583                            const LogicVRegister& src2);
4584   LogicVRegister sm3partw2(LogicVRegister dst,
4585                            const LogicVRegister& src1,
4586                            const LogicVRegister& src2);
4587   LogicVRegister sm3ss1(LogicVRegister dst,
4588                         const LogicVRegister& src1,
4589                         const LogicVRegister& src2,
4590                         const LogicVRegister& src3);
4591   LogicVRegister sm3tt1(LogicVRegister srcdst,
4592                         const LogicVRegister& src1,
4593                         const LogicVRegister& src2,
4594                         int index,
4595                         bool is_a);
4596   LogicVRegister sm3tt2(LogicVRegister srcdst,
4597                         const LogicVRegister& src1,
4598                         const LogicVRegister& src2,
4599                         int index,
4600                         bool is_a);
4601 
4602   LogicVRegister sm4(LogicVRegister dst,
4603                      const LogicVRegister& src1,
4604                      const LogicVRegister& src2,
4605                      bool is_key);
4606 
4607 #define NEON_3VREG_LOGIC_LIST(V) \
4608   V(addhn)                       \
4609   V(addhn2)                      \
4610   V(raddhn)                      \
4611   V(raddhn2)                     \
4612   V(subhn)                       \
4613   V(subhn2)                      \
4614   V(rsubhn)                      \
4615   V(rsubhn2)                     \
4616   V(pmull)                       \
4617   V(pmull2)                      \
4618   V(sabal)                       \
4619   V(sabal2)                      \
4620   V(uabal)                       \
4621   V(uabal2)                      \
4622   V(sabdl)                       \
4623   V(sabdl2)                      \
4624   V(uabdl)                       \
4625   V(uabdl2)                      \
4626   V(smull2)                      \
4627   V(umull2)                      \
4628   V(smlal2)                      \
4629   V(umlal2)                      \
4630   V(smlsl2)                      \
4631   V(umlsl2)                      \
4632   V(sqdmlal2)                    \
4633   V(sqdmlsl2)                    \
4634   V(sqdmull2)
4635 
4636 #define DEFINE_LOGIC_FUNC(FXN)                   \
4637   LogicVRegister FXN(VectorFormat vform,         \
4638                      LogicVRegister dst,         \
4639                      const LogicVRegister& src1, \
4640                      const LogicVRegister& src2);
4641   NEON_3VREG_LOGIC_LIST(DEFINE_LOGIC_FUNC)
4642 #undef DEFINE_LOGIC_FUNC
4643 
4644 #define NEON_MULL_LIST(V) \
4645   V(smull)                \
4646   V(umull)                \
4647   V(smlal)                \
4648   V(umlal)                \
4649   V(smlsl)                \
4650   V(umlsl)                \
4651   V(sqdmlal)              \
4652   V(sqdmlsl)              \
4653   V(sqdmull)
4654 
4655 #define DECLARE_NEON_MULL_OP(FN)                \
4656   LogicVRegister FN(VectorFormat vform,         \
4657                     LogicVRegister dst,         \
4658                     const LogicVRegister& src1, \
4659                     const LogicVRegister& src2, \
4660                     bool is_2 = false);
4661   NEON_MULL_LIST(DECLARE_NEON_MULL_OP)
4662 #undef DECLARE_NEON_MULL_OP
4663 
4664 #define NEON_FP3SAME_LIST(V) \
4665   V(fadd, FPAdd, false)      \
4666   V(fsub, FPSub, true)       \
4667   V(fmul, FPMul, true)       \
4668   V(fmulx, FPMulx, true)     \
4669   V(fdiv, FPDiv, true)       \
4670   V(fmax, FPMax, false)      \
4671   V(fmin, FPMin, false)      \
4672   V(fmaxnm, FPMaxNM, false)  \
4673   V(fminnm, FPMinNM, false)
4674 
4675 #define DECLARE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
4676   template <typename T>                            \
4677   LogicVRegister FN(VectorFormat vform,            \
4678                     LogicVRegister dst,            \
4679                     const LogicVRegister& src1,    \
4680                     const LogicVRegister& src2);   \
4681   LogicVRegister FN(VectorFormat vform,            \
4682                     LogicVRegister dst,            \
4683                     const LogicVRegister& src1,    \
4684                     const LogicVRegister& src2);
4685   NEON_FP3SAME_LIST(DECLARE_NEON_FP_VECTOR_OP)
4686 #undef DECLARE_NEON_FP_VECTOR_OP
4687 
4688 #define NEON_FPPAIRWISE_LIST(V) \
4689   V(faddp, fadd, FPAdd)         \
4690   V(fmaxp, fmax, FPMax)         \
4691   V(fmaxnmp, fmaxnm, FPMaxNM)   \
4692   V(fminp, fmin, FPMin)         \
4693   V(fminnmp, fminnm, FPMinNM)
4694 
4695 #define DECLARE_NEON_FP_PAIR_OP(FNP, FN, OP)      \
4696   LogicVRegister FNP(VectorFormat vform,          \
4697                      LogicVRegister dst,          \
4698                      const LogicVRegister& src1,  \
4699                      const LogicVRegister& src2); \
4700   LogicVRegister FNP(VectorFormat vform,          \
4701                      LogicVRegister dst,          \
4702                      const LogicVRegister& src);
4703   NEON_FPPAIRWISE_LIST(DECLARE_NEON_FP_PAIR_OP)
4704 #undef DECLARE_NEON_FP_PAIR_OP
4705 
4706   enum FrintMode {
4707     kFrintToInteger = 0,
4708     kFrintToInt32 = 32,
4709     kFrintToInt64 = 64
4710   };
4711 
4712   template <typename T>
4713   LogicVRegister frecps(VectorFormat vform,
4714                         LogicVRegister dst,
4715                         const LogicVRegister& src1,
4716                         const LogicVRegister& src2);
4717   LogicVRegister frecps(VectorFormat vform,
4718                         LogicVRegister dst,
4719                         const LogicVRegister& src1,
4720                         const LogicVRegister& src2);
4721   template <typename T>
4722   LogicVRegister frsqrts(VectorFormat vform,
4723                          LogicVRegister dst,
4724                          const LogicVRegister& src1,
4725                          const LogicVRegister& src2);
4726   LogicVRegister frsqrts(VectorFormat vform,
4727                          LogicVRegister dst,
4728                          const LogicVRegister& src1,
4729                          const LogicVRegister& src2);
4730   template <typename T>
4731   LogicVRegister fmla(VectorFormat vform,
4732                       LogicVRegister dst,
4733                       const LogicVRegister& srca,
4734                       const LogicVRegister& src1,
4735                       const LogicVRegister& src2);
4736   LogicVRegister fmla(VectorFormat vform,
4737                       LogicVRegister dst,
4738                       const LogicVRegister& srca,
4739                       const LogicVRegister& src1,
4740                       const LogicVRegister& src2);
4741   template <typename T>
4742   LogicVRegister fmls(VectorFormat vform,
4743                       LogicVRegister dst,
4744                       const LogicVRegister& srca,
4745                       const LogicVRegister& src1,
4746                       const LogicVRegister& src2);
4747   LogicVRegister fmls(VectorFormat vform,
4748                       LogicVRegister dst,
4749                       const LogicVRegister& srca,
4750                       const LogicVRegister& src1,
4751                       const LogicVRegister& src2);
4752   LogicVRegister fnmul(VectorFormat vform,
4753                        LogicVRegister dst,
4754                        const LogicVRegister& src1,
4755                        const LogicVRegister& src2);
4756 
4757   LogicVRegister fmlal(VectorFormat vform,
4758                        LogicVRegister dst,
4759                        const LogicVRegister& src1,
4760                        const LogicVRegister& src2);
4761   LogicVRegister fmlal2(VectorFormat vform,
4762                         LogicVRegister dst,
4763                         const LogicVRegister& src1,
4764                         const LogicVRegister& src2);
4765   LogicVRegister fmlsl(VectorFormat vform,
4766                        LogicVRegister dst,
4767                        const LogicVRegister& src1,
4768                        const LogicVRegister& src2);
4769   LogicVRegister fmlsl2(VectorFormat vform,
4770                         LogicVRegister dst,
4771                         const LogicVRegister& src1,
4772                         const LogicVRegister& src2);
4773 
4774   template <typename T>
4775   LogicVRegister fcmp(VectorFormat vform,
4776                       LogicVRegister dst,
4777                       const LogicVRegister& src1,
4778                       const LogicVRegister& src2,
4779                       Condition cond);
4780   LogicVRegister fcmp(VectorFormat vform,
4781                       LogicVRegister dst,
4782                       const LogicVRegister& src1,
4783                       const LogicVRegister& src2,
4784                       Condition cond);
4785   LogicVRegister fabscmp(VectorFormat vform,
4786                          LogicVRegister dst,
4787                          const LogicVRegister& src1,
4788                          const LogicVRegister& src2,
4789                          Condition cond);
4790   LogicVRegister fcmp_zero(VectorFormat vform,
4791                            LogicVRegister dst,
4792                            const LogicVRegister& src,
4793                            Condition cond);
4794 
4795   template <typename T>
4796   LogicVRegister fneg(VectorFormat vform,
4797                       LogicVRegister dst,
4798                       const LogicVRegister& src);
4799   LogicVRegister fneg(VectorFormat vform,
4800                       LogicVRegister dst,
4801                       const LogicVRegister& src);
4802   template <typename T>
4803   LogicVRegister frecpx(VectorFormat vform,
4804                         LogicVRegister dst,
4805                         const LogicVRegister& src);
4806   LogicVRegister frecpx(VectorFormat vform,
4807                         LogicVRegister dst,
4808                         const LogicVRegister& src);
4809   LogicVRegister ftsmul(VectorFormat vform,
4810                         LogicVRegister dst,
4811                         const LogicVRegister& src1,
4812                         const LogicVRegister& src2);
4813   LogicVRegister ftssel(VectorFormat vform,
4814                         LogicVRegister dst,
4815                         const LogicVRegister& src1,
4816                         const LogicVRegister& src2);
4817   LogicVRegister ftmad(VectorFormat vform,
4818                        LogicVRegister dst,
4819                        const LogicVRegister& src1,
4820                        const LogicVRegister& src2,
4821                        unsigned index);
4822   LogicVRegister fexpa(VectorFormat vform,
4823                        LogicVRegister dst,
4824                        const LogicVRegister& src);
4825   LogicVRegister flogb(VectorFormat vform,
4826                        LogicVRegister dst,
4827                        const LogicVRegister& src);
4828   template <typename T>
4829   LogicVRegister fscale(VectorFormat vform,
4830                         LogicVRegister dst,
4831                         const LogicVRegister& src1,
4832                         const LogicVRegister& src2);
4833   LogicVRegister fscale(VectorFormat vform,
4834                         LogicVRegister dst,
4835                         const LogicVRegister& src1,
4836                         const LogicVRegister& src2);
4837   template <typename T>
4838   LogicVRegister fabs_(VectorFormat vform,
4839                        LogicVRegister dst,
4840                        const LogicVRegister& src);
4841   LogicVRegister fabs_(VectorFormat vform,
4842                        LogicVRegister dst,
4843                        const LogicVRegister& src);
4844   LogicVRegister fabd(VectorFormat vform,
4845                       LogicVRegister dst,
4846                       const LogicVRegister& src1,
4847                       const LogicVRegister& src2);
4848   LogicVRegister frint(VectorFormat vform,
4849                        LogicVRegister dst,
4850                        const LogicVRegister& src,
4851                        FPRounding rounding_mode,
4852                        bool inexact_exception = false,
4853                        FrintMode frint_mode = kFrintToInteger);
4854   LogicVRegister fcvt(VectorFormat dst_vform,
4855                       VectorFormat src_vform,
4856                       LogicVRegister dst,
4857                       const LogicPRegister& pg,
4858                       const LogicVRegister& src);
4859   LogicVRegister fcvts(VectorFormat vform,
4860                        unsigned dst_data_size_in_bits,
4861                        unsigned src_data_size_in_bits,
4862                        LogicVRegister dst,
4863                        const LogicPRegister& pg,
4864                        const LogicVRegister& src,
4865                        FPRounding round,
4866                        int fbits = 0);
4867   LogicVRegister fcvts(VectorFormat vform,
4868                        LogicVRegister dst,
4869                        const LogicVRegister& src,
4870                        FPRounding rounding_mode,
4871                        int fbits = 0);
4872   LogicVRegister fcvtu(VectorFormat vform,
4873                        unsigned dst_data_size_in_bits,
4874                        unsigned src_data_size_in_bits,
4875                        LogicVRegister dst,
4876                        const LogicPRegister& pg,
4877                        const LogicVRegister& src,
4878                        FPRounding round,
4879                        int fbits = 0);
4880   LogicVRegister fcvtu(VectorFormat vform,
4881                        LogicVRegister dst,
4882                        const LogicVRegister& src,
4883                        FPRounding rounding_mode,
4884                        int fbits = 0);
4885   LogicVRegister fcvtl(VectorFormat vform,
4886                        LogicVRegister dst,
4887                        const LogicVRegister& src);
4888   LogicVRegister fcvtl2(VectorFormat vform,
4889                         LogicVRegister dst,
4890                         const LogicVRegister& src);
4891   LogicVRegister fcvtn(VectorFormat vform,
4892                        LogicVRegister dst,
4893                        const LogicVRegister& src);
4894   LogicVRegister fcvtn2(VectorFormat vform,
4895                         LogicVRegister dst,
4896                         const LogicVRegister& src);
4897   LogicVRegister fcvtxn(VectorFormat vform,
4898                         LogicVRegister dst,
4899                         const LogicVRegister& src);
4900   LogicVRegister fcvtxn2(VectorFormat vform,
4901                          LogicVRegister dst,
4902                          const LogicVRegister& src);
4903   LogicVRegister fsqrt(VectorFormat vform,
4904                        LogicVRegister dst,
4905                        const LogicVRegister& src);
4906   LogicVRegister frsqrte(VectorFormat vform,
4907                          LogicVRegister dst,
4908                          const LogicVRegister& src);
4909   LogicVRegister frecpe(VectorFormat vform,
4910                         LogicVRegister dst,
4911                         const LogicVRegister& src,
4912                         FPRounding rounding);
4913   LogicVRegister ursqrte(VectorFormat vform,
4914                          LogicVRegister dst,
4915                          const LogicVRegister& src);
4916   LogicVRegister urecpe(VectorFormat vform,
4917                         LogicVRegister dst,
4918                         const LogicVRegister& src);
4919 
4920   LogicPRegister pfalse(LogicPRegister dst);
4921   LogicPRegister pfirst(LogicPRegister dst,
4922                         const LogicPRegister& pg,
4923                         const LogicPRegister& src);
4924   LogicPRegister ptrue(VectorFormat vform, LogicPRegister dst, int pattern);
4925   LogicPRegister pnext(VectorFormat vform,
4926                        LogicPRegister dst,
4927                        const LogicPRegister& pg,
4928                        const LogicPRegister& src);
4929 
4930   LogicVRegister asrd(VectorFormat vform,
4931                       LogicVRegister dst,
4932                       const LogicVRegister& src1,
4933                       int shift);
4934 
4935   LogicVRegister andv(VectorFormat vform,
4936                       LogicVRegister dst,
4937                       const LogicPRegister& pg,
4938                       const LogicVRegister& src);
4939   LogicVRegister eorv(VectorFormat vform,
4940                       LogicVRegister dst,
4941                       const LogicPRegister& pg,
4942                       const LogicVRegister& src);
4943   LogicVRegister orv(VectorFormat vform,
4944                      LogicVRegister dst,
4945                      const LogicPRegister& pg,
4946                      const LogicVRegister& src);
4947   LogicVRegister saddv(VectorFormat vform,
4948                        LogicVRegister dst,
4949                        const LogicPRegister& pg,
4950                        const LogicVRegister& src);
4951   LogicVRegister sminv(VectorFormat vform,
4952                        LogicVRegister dst,
4953                        const LogicPRegister& pg,
4954                        const LogicVRegister& src);
4955   LogicVRegister smaxv(VectorFormat vform,
4956                        LogicVRegister dst,
4957                        const LogicPRegister& pg,
4958                        const LogicVRegister& src);
4959   LogicVRegister uaddv(VectorFormat vform,
4960                        LogicVRegister dst,
4961                        const LogicPRegister& pg,
4962                        const LogicVRegister& src);
4963   LogicVRegister uminv(VectorFormat vform,
4964                        LogicVRegister dst,
4965                        const LogicPRegister& pg,
4966                        const LogicVRegister& src);
4967   LogicVRegister umaxv(VectorFormat vform,
4968                        LogicVRegister dst,
4969                        const LogicPRegister& pg,
4970                        const LogicVRegister& src);
4971 
4972   LogicVRegister interleave_top_bottom(VectorFormat vform,
4973                                        LogicVRegister dst,
4974                                        const LogicVRegister& src);
4975 
4976   template <typename T>
4977   struct TFPPairOp {
4978     typedef T (Simulator::*type)(T a, T b);
4979   };
4980 
4981   template <typename T>
4982   LogicVRegister FPPairedAcrossHelper(VectorFormat vform,
4983                                       LogicVRegister dst,
4984                                       const LogicVRegister& src,
4985                                       typename TFPPairOp<T>::type fn,
4986                                       uint64_t inactive_value);
4987 
4988   LogicVRegister FPPairedAcrossHelper(
4989       VectorFormat vform,
4990       LogicVRegister dst,
4991       const LogicVRegister& src,
4992       typename TFPPairOp<vixl::internal::SimFloat16>::type fn16,
4993       typename TFPPairOp<float>::type fn32,
4994       typename TFPPairOp<double>::type fn64,
4995       uint64_t inactive_value);
4996 
4997   LogicVRegister fminv(VectorFormat vform,
4998                        LogicVRegister dst,
4999                        const LogicVRegister& src);
5000   LogicVRegister fmaxv(VectorFormat vform,
5001                        LogicVRegister dst,
5002                        const LogicVRegister& src);
5003   LogicVRegister fminnmv(VectorFormat vform,
5004                          LogicVRegister dst,
5005                          const LogicVRegister& src);
5006   LogicVRegister fmaxnmv(VectorFormat vform,
5007                          LogicVRegister dst,
5008                          const LogicVRegister& src);
5009   LogicVRegister faddv(VectorFormat vform,
5010                        LogicVRegister dst,
5011                        const LogicVRegister& src);
5012 
5013   static const uint32_t CRC32_POLY = 0x04C11DB7;
5014   static const uint32_t CRC32C_POLY = 0x1EDC6F41;
5015   uint32_t Poly32Mod2(unsigned n, uint64_t data, uint32_t poly);
5016   template <typename T>
5017   uint32_t Crc32Checksum(uint32_t acc, T val, uint32_t poly);
5018   uint32_t Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly);
5019 
5020   bool SysOp_W(int op, int64_t val);
5021 
5022   template <typename T>
5023   T FPRecipSqrtEstimate(T op);
5024   template <typename T>
5025   T FPRecipEstimate(T op, FPRounding rounding);
5026   template <typename T, typename R>
5027   R FPToFixed(T op, int fbits, bool is_signed, FPRounding rounding);
5028 
5029   void FPCompare(double val0, double val1, FPTrapFlags trap);
5030   double FPRoundInt(double value, FPRounding round_mode);
5031   double FPRoundInt(double value, FPRounding round_mode, FrintMode frint_mode);
5032   double FPRoundIntCommon(double value, FPRounding round_mode);
5033   double recip_sqrt_estimate(double a);
5034   double recip_estimate(double a);
5035   double FPRecipSqrtEstimate(double a);
5036   double FPRecipEstimate(double a);
5037   double FixedToDouble(int64_t src, int fbits, FPRounding round_mode);
5038   double UFixedToDouble(uint64_t src, int fbits, FPRounding round_mode);
5039   float FixedToFloat(int64_t src, int fbits, FPRounding round_mode);
5040   float UFixedToFloat(uint64_t src, int fbits, FPRounding round_mode);
5041   ::vixl::internal::SimFloat16 FixedToFloat16(int64_t src,
5042                                               int fbits,
5043                                               FPRounding round_mode);
5044   ::vixl::internal::SimFloat16 UFixedToFloat16(uint64_t src,
5045                                                int fbits,
5046                                                FPRounding round_mode);
5047   int16_t FPToInt16(double value, FPRounding rmode);
5048   int32_t FPToInt32(double value, FPRounding rmode);
5049   int64_t FPToInt64(double value, FPRounding rmode);
5050   uint16_t FPToUInt16(double value, FPRounding rmode);
5051   uint32_t FPToUInt32(double value, FPRounding rmode);
5052   uint64_t FPToUInt64(double value, FPRounding rmode);
5053   int32_t FPToFixedJS(double value);
5054 
5055   template <typename T>
5056   T FPAdd(T op1, T op2);
5057 
5058   template <typename T>
5059   T FPNeg(T op);
5060 
5061   template <typename T>
5062   T FPDiv(T op1, T op2);
5063 
5064   template <typename T>
5065   T FPMax(T a, T b);
5066 
5067   template <typename T>
5068   T FPMaxNM(T a, T b);
5069 
5070   template <typename T>
5071   T FPMin(T a, T b);
5072 
5073   template <typename T>
5074   T FPMinNM(T a, T b);
5075 
5076   template <typename T>
5077   T FPMulNaNs(T op1, T op2);
5078 
5079   template <typename T>
5080   T FPMul(T op1, T op2);
5081 
5082   template <typename T>
5083   T FPMulx(T op1, T op2);
5084 
5085   template <typename T>
5086   T FPMulAdd(T a, T op1, T op2);
5087 
5088   template <typename T>
5089   T FPSqrt(T op);
5090 
5091   template <typename T>
5092   T FPSub(T op1, T op2);
5093 
5094   template <typename T>
5095   T FPRecipStepFused(T op1, T op2);
5096 
5097   template <typename T>
5098   T FPRSqrtStepFused(T op1, T op2);
5099 
5100   // This doesn't do anything at the moment. We'll need it if we want support
5101   // for cumulative exception bits or floating-point exceptions.
5102   void FPProcessException() {}
5103 
5104   bool FPProcessNaNs(const Instruction* instr);
5105 
5106   // Pseudo Printf instruction
5107   void DoPrintf(const Instruction* instr);
5108 
5109   // Pseudo-instructions to configure CPU features dynamically.
5110   void DoConfigureCPUFeatures(const Instruction* instr);
5111 
5112   void DoSaveCPUFeatures(const Instruction* instr);
5113   void DoRestoreCPUFeatures(const Instruction* instr);
5114 
5115   // General arithmetic helpers ----------------------------
5116 
5117   // Add `delta` to the accumulator (`acc`), optionally saturate, then zero- or
5118   // sign-extend. Initial `acc` bits outside `n` are ignored, but the delta must
5119   // be a valid int<n>_t.
5120   uint64_t IncDecN(uint64_t acc,
5121                    int64_t delta,
5122                    unsigned n,
5123                    bool is_saturating = false,
5124                    bool is_signed = false);
5125 
5126   // SVE helpers -------------------------------------------
5127   LogicVRegister SVEBitwiseLogicalUnpredicatedHelper(LogicalOp op,
5128                                                      VectorFormat vform,
5129                                                      LogicVRegister zd,
5130                                                      const LogicVRegister& zn,
5131                                                      const LogicVRegister& zm);
5132 
5133   LogicPRegister SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
5134                                            LogicPRegister Pd,
5135                                            const LogicPRegister& pn,
5136                                            const LogicPRegister& pm);
5137 
5138   LogicVRegister SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op,
5139                                      VectorFormat vform,
5140                                      LogicVRegister zd,
5141                                      uint64_t imm);
5142   enum UnpackType { kHiHalf, kLoHalf };
5143   enum ExtendType { kSignedExtend, kUnsignedExtend };
5144   LogicVRegister unpk(VectorFormat vform,
5145                       LogicVRegister zd,
5146                       const LogicVRegister& zn,
5147                       UnpackType unpack_type,
5148                       ExtendType extend_type);
5149 
5150   LogicPRegister SVEIntCompareVectorsHelper(Condition cc,
5151                                             VectorFormat vform,
5152                                             LogicPRegister dst,
5153                                             const LogicPRegister& mask,
5154                                             const LogicVRegister& src1,
5155                                             const LogicVRegister& src2,
5156                                             bool is_wide_elements = false,
5157                                             FlagsUpdate flags = SetFlags);
5158 
5159   void SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
5160                                            VectorFormat vform,
5161                                            SVEOffsetModifier mod);
5162 
5163   // Store each active zt<i>[lane] to `addr.GetElementAddress(lane, ...)`.
5164   //
5165   // `zt_code` specifies the code of the first register (zt). Each additional
5166   // register (up to `reg_count`) is `(zt_code + i) % 32`.
5167   //
5168   // This helper calls LogZWrite in the proper way, according to `addr`.
5169   void SVEStructuredStoreHelper(VectorFormat vform,
5170                                 const LogicPRegister& pg,
5171                                 unsigned zt_code,
5172                                 const LogicSVEAddressVector& addr);
5173   // Load each active zt<i>[lane] from `addr.GetElementAddress(lane, ...)`.
5174   // Returns false if a load failed.
5175   bool SVEStructuredLoadHelper(VectorFormat vform,
5176                                const LogicPRegister& pg,
5177                                unsigned zt_code,
5178                                const LogicSVEAddressVector& addr,
5179                                bool is_signed = false);
5180 
5181   enum SVEFaultTolerantLoadType {
5182     // - Elements active in both FFR and pg are accessed as usual. If the access
5183     //   fails, the corresponding lane and all subsequent lanes are filled with
5184     //   an unpredictable value, and made inactive in FFR.
5185     //
5186     // - Elements active in FFR but not pg are set to zero.
5187     //
5188     // - Elements that are not active in FFR are filled with an unpredictable
5189     //   value, regardless of pg.
5190     kSVENonFaultLoad,
5191 
5192     // If type == kSVEFirstFaultLoad, the behaviour is the same, except that the
5193     // first active element is always accessed, regardless of FFR, and will
5194     // generate a real fault if it is inaccessible. If the lane is not active in
5195     // FFR, the actual value loaded into the result is still unpredictable.
5196     kSVEFirstFaultLoad
5197   };
5198 
5199   // Load with first-faulting or non-faulting load semantics, respecting and
5200   // updating FFR.
5201   void SVEFaultTolerantLoadHelper(VectorFormat vform,
5202                                   const LogicPRegister& pg,
5203                                   unsigned zt_code,
5204                                   const LogicSVEAddressVector& addr,
5205                                   SVEFaultTolerantLoadType type,
5206                                   bool is_signed);
5207 
5208   LogicVRegister SVEBitwiseShiftHelper(Shift shift_op,
5209                                        VectorFormat vform,
5210                                        LogicVRegister dst,
5211                                        const LogicVRegister& src1,
5212                                        const LogicVRegister& src2,
5213                                        bool is_wide_elements);
5214 
5215   // Pack all even- or odd-numbered elements of source vector side by side and
5216   // place in elements of lower half the destination vector, and leave the upper
5217   // half all zero.
5218   //    [...| H | G | F | E | D | C | B | A ]
5219   // => [...................| G | E | C | A ]
5220   LogicVRegister pack_even_elements(VectorFormat vform,
5221                                     LogicVRegister dst,
5222                                     const LogicVRegister& src);
5223 
5224   //    [...| H | G | F | E | D | C | B | A ]
5225   // => [...................| H | F | D | B ]
5226   LogicVRegister pack_odd_elements(VectorFormat vform,
5227                                    LogicVRegister dst,
5228                                    const LogicVRegister& src);
5229 
5230   LogicVRegister adcl(VectorFormat vform,
5231                       LogicVRegister dst,
5232                       const LogicVRegister& src1,
5233                       const LogicVRegister& src2,
5234                       bool top);
5235 
5236   template <typename T>
5237   LogicVRegister FTMaddHelper(VectorFormat vform,
5238                               LogicVRegister dst,
5239                               const LogicVRegister& src1,
5240                               const LogicVRegister& src2,
5241                               uint64_t coeff_pos,
5242                               uint64_t coeff_neg);
5243 
5244   // Return the first or last active lane, or -1 if none are active.
5245   int GetFirstActive(VectorFormat vform, const LogicPRegister& pg) const;
5246   int GetLastActive(VectorFormat vform, const LogicPRegister& pg) const;
5247 
5248   int CountActiveLanes(VectorFormat vform, const LogicPRegister& pg) const;
5249 
5250   // Count active and true lanes in `pn`.
5251   int CountActiveAndTrueLanes(VectorFormat vform,
5252                               const LogicPRegister& pg,
5253                               const LogicPRegister& pn) const;
5254 
5255   // Count the number of lanes referred to by `pattern`, given the vector
5256   // length. If `pattern` is not a recognised SVEPredicateConstraint, this
5257   // returns zero.
5258   int GetPredicateConstraintLaneCount(VectorFormat vform, int pattern) const;
5259 
5260   // Simulate a runtime call.
5261   void DoRuntimeCall(const Instruction* instr);
5262 
5263   // Processor state ---------------------------------------
5264 
5265   // Simulated monitors for exclusive access instructions.
5266   SimExclusiveLocalMonitor local_monitor_;
5267   SimExclusiveGlobalMonitor global_monitor_;
5268 
5269   // Output stream.
5270   FILE* stream_;
5271   PrintDisassembler* print_disasm_;
5272 
5273   // General purpose registers. Register 31 is the stack pointer.
5274   SimRegister registers_[kNumberOfRegisters];
5275 
5276   // Vector registers
5277   SimVRegister vregisters_[kNumberOfVRegisters];
5278 
5279   // SVE predicate registers.
5280   SimPRegister pregisters_[kNumberOfPRegisters];
5281 
5282   // SVE first-fault register.
5283   SimFFRRegister ffr_register_;
5284 
5285   // A pseudo SVE predicate register with all bits set to true.
5286   SimPRegister pregister_all_true_;
5287 
5288   // Program Status Register.
5289   // bits[31, 27]: Condition flags N, Z, C, and V.
5290   //               (Negative, Zero, Carry, Overflow)
5291   SimSystemRegister nzcv_;
5292 
5293   // Floating-Point Control Register
5294   SimSystemRegister fpcr_;
5295 
5296   // Only a subset of FPCR features are supported by the simulator. This helper
5297   // checks that the FPCR settings are supported.
5298   //
5299   // This is checked when floating-point instructions are executed, not when
5300   // FPCR is set. This allows generated code to modify FPCR for external
5301   // functions, or to save and restore it when entering and leaving generated
5302   // code.
5303   void AssertSupportedFPCR() {
5304     // No flush-to-zero support.
5305     VIXL_ASSERT(ReadFpcr().GetFZ() == 0);
5306     // Ties-to-even rounding only.
5307     VIXL_ASSERT(ReadFpcr().GetRMode() == FPTieEven);
5308     // No alternative half-precision support.
5309     VIXL_ASSERT(ReadFpcr().GetAHP() == 0);
5310   }
5311 
5312   static int CalcNFlag(uint64_t result, unsigned reg_size) {
5313     return (result >> (reg_size - 1)) & 1;
5314   }
5315 
5316   static int CalcZFlag(uint64_t result) { return (result == 0) ? 1 : 0; }
5317 
5318   static const uint32_t kConditionFlagsMask = 0xf0000000;
5319 
5320   Memory memory_;
5321 
5322   static const size_t kDefaultStackGuardStartSize = 0;
5323   static const size_t kDefaultStackGuardEndSize = 4 * 1024;
5324   static const size_t kDefaultStackUsableSize = 8 * 1024;
5325 
5326   Decoder* decoder_;
5327   // Indicates if the pc has been modified by the instruction and should not be
5328   // automatically incremented.
5329   bool pc_modified_;
5330   const Instruction* pc_;
5331 
5332   // Pointer to the last simulated instruction, used for checking the validity
5333   // of the current instruction with the previous instruction, such as movprfx.
5334   Instruction const* last_instr_;
5335 
5336   // Branch type register, used for branch target identification.
5337   BType btype_;
5338 
5339   // Next value of branch type register after the current instruction has been
5340   // decoded.
5341   BType next_btype_;
5342 
5343   // Global flag for enabling guarded pages.
5344   // TODO: implement guarding at page granularity, rather than globally.
5345   bool guard_pages_;
5346 
5347   static const char* xreg_names[];
5348   static const char* wreg_names[];
5349   static const char* breg_names[];
5350   static const char* hreg_names[];
5351   static const char* sreg_names[];
5352   static const char* dreg_names[];
5353   static const char* vreg_names[];
5354   static const char* zreg_names[];
5355   static const char* preg_names[];
5356 
5357  private:
5358   using FormToVisitorFnMap =
5359       std::unordered_map<uint32_t,
5360                          std::function<void(Simulator*, const Instruction*)>>;
5361   static const FormToVisitorFnMap* GetFormToVisitorFnMap();
5362 
5363   uint32_t form_hash_;
5364 
5365   static const PACKey kPACKeyIA;
5366   static const PACKey kPACKeyIB;
5367   static const PACKey kPACKeyDA;
5368   static const PACKey kPACKeyDB;
5369   static const PACKey kPACKeyGA;
5370 
5371   bool CanReadMemory(uintptr_t address, size_t size);
5372 
5373 #ifndef _WIN32
5374   // CanReadMemory needs placeholder file descriptors, so we use a pipe. We can
5375   // save some system call overhead by opening them on construction, rather than
5376   // on every call to CanReadMemory.
5377   int placeholder_pipe_fd_[2];
5378 #endif
5379 
5380   template <typename T>
5381   static T FPDefaultNaN();
5382 
5383   // Standard NaN processing.
5384   template <typename T>
5385   T FPProcessNaN(T op) {
5386     VIXL_ASSERT(IsNaN(op));
5387     if (IsSignallingNaN(op)) {
5388       FPProcessException();
5389     }
5390     return (ReadDN() == kUseDefaultNaN) ? FPDefaultNaN<T>() : ToQuietNaN(op);
5391   }
5392 
5393   template <typename T>
5394   T FPProcessNaNs(T op1, T op2) {
5395     if (IsSignallingNaN(op1)) {
5396       return FPProcessNaN(op1);
5397     } else if (IsSignallingNaN(op2)) {
5398       return FPProcessNaN(op2);
5399     } else if (IsNaN(op1)) {
5400       VIXL_ASSERT(IsQuietNaN(op1));
5401       return FPProcessNaN(op1);
5402     } else if (IsNaN(op2)) {
5403       VIXL_ASSERT(IsQuietNaN(op2));
5404       return FPProcessNaN(op2);
5405     } else {
5406       return 0.0;
5407     }
5408   }
5409 
5410   template <typename T>
5411   T FPProcessNaNs3(T op1, T op2, T op3) {
5412     if (IsSignallingNaN(op1)) {
5413       return FPProcessNaN(op1);
5414     } else if (IsSignallingNaN(op2)) {
5415       return FPProcessNaN(op2);
5416     } else if (IsSignallingNaN(op3)) {
5417       return FPProcessNaN(op3);
5418     } else if (IsNaN(op1)) {
5419       VIXL_ASSERT(IsQuietNaN(op1));
5420       return FPProcessNaN(op1);
5421     } else if (IsNaN(op2)) {
5422       VIXL_ASSERT(IsQuietNaN(op2));
5423       return FPProcessNaN(op2);
5424     } else if (IsNaN(op3)) {
5425       VIXL_ASSERT(IsQuietNaN(op3));
5426       return FPProcessNaN(op3);
5427     } else {
5428       return 0.0;
5429     }
5430   }
5431 
5432   // Construct a SimVRegister from a SimPRegister, where each byte-sized lane of
5433   // the destination is set to all true (0xff) when the corresponding
5434   // predicate flag is set, and false (0x00) otherwise.
5435   SimVRegister ExpandToSimVRegister(const SimPRegister& preg);
5436 
5437   // Set each predicate flag in pd where the corresponding assigned-sized lane
5438   // in vreg is non-zero. Clear the flag, otherwise. This is almost the opposite
5439   // operation to ExpandToSimVRegister(), except that any non-zero lane is
5440   // interpreted as true.
5441   void ExtractFromSimVRegister(VectorFormat vform,
5442                                SimPRegister& pd,  // NOLINT(runtime/references)
5443                                SimVRegister vreg);
5444 
5445   bool coloured_trace_;
5446 
5447   // A set of TraceParameters flags.
5448   int trace_parameters_;
5449 
5450   // Indicates whether the exclusive-access warning has been printed.
5451   bool print_exclusive_access_warning_;
5452   void PrintExclusiveAccessWarning();
5453 
5454   CPUFeaturesAuditor cpu_features_auditor_;
5455   std::vector<CPUFeatures> saved_cpu_features_;
5456 
5457   // linear_congruential_engine, used to simulate randomness with repeatable
5458   // behaviour (so that tests are deterministic). This is used to simulate RNDR
5459   // and RNDRRS, as well as to simulate a source of entropy for architecturally
5460   // undefined behaviour.
5461   std::linear_congruential_engine<uint64_t,
5462                                   0x5DEECE66D,
5463                                   0xB,
5464                                   static_cast<uint64_t>(1) << 48>
5465       rand_gen_;
5466 
5467   // A configurable size of SVE vector registers.
5468   unsigned vector_length_;
5469 
5470   // DC ZVA enable (= 0) status and block size.
5471   unsigned dczid_ = (0 << 4) | 4;  // 2^4 words => 64-byte block size.
5472 
5473   // Representation of memory attributes such as MTE tagging and BTI page
5474   // protection in addition to branch interceptions.
5475   MetaDataDepot meta_data_;
5476 
5477   // True if the debugger is enabled and might get entered.
5478   bool debugger_enabled_;
5479 
5480   // Debugger for the simulator.
5481   std::unique_ptr<Debugger> debugger_;
5482 
5483   // The Guarded Control Stack is represented using a vector, where the more
5484   // recently stored addresses are at higher-numbered indices.
5485   using GuardedControlStack = std::vector<uint64_t>;
5486 
5487   // The GCSManager handles the synchronisation of GCS across multiple
5488   // Simulator instances. Each Simulator has its own stack, but all share
5489   // a GCSManager instance. This allows exchanging stacks between Simulators
5490   // in a threaded application.
5491   class GCSManager {
5492    public:
5493     // Allocate a new Guarded Control Stack and add it to the vector of stacks.
5494     uint64_t AllocateStack() {
5495       const std::lock_guard<std::mutex> lock(stacks_mtx_);
5496 
5497       GuardedControlStack* new_stack = new GuardedControlStack;
5498       uint64_t result;
5499 
5500       // Put the new stack into the first available slot.
5501       for (result = 0; result < stacks_.size(); result++) {
5502         if (stacks_[result] == nullptr) {
5503           stacks_[result] = new_stack;
5504           break;
5505         }
5506       }
5507 
5508       // If there were no slots, create a new one.
5509       if (result == stacks_.size()) {
5510         stacks_.push_back(new_stack);
5511       }
5512 
5513       // Shift the index to look like a stack pointer aligned to a page.
5514       result <<= kPageSizeLog2;
5515 
5516       // Push the tagged index onto the new stack as a seal.
5517       new_stack->push_back(result + 1);
5518       return result;
5519     }
5520 
5521     // Free a Guarded Control Stack and set the stacks_ slot to null.
5522     void FreeStack(uint64_t gcs) {
5523       const std::lock_guard<std::mutex> lock(stacks_mtx_);
5524       uint64_t gcs_index = GetGCSIndex(gcs);
5525       GuardedControlStack* gcsptr = stacks_[gcs_index];
5526       if (gcsptr == nullptr) {
5527         VIXL_ABORT_WITH_MSG("Tried to free unallocated GCS ");
5528       } else {
5529         delete gcsptr;
5530         stacks_[gcs_index] = nullptr;
5531       }
5532     }
5533 
5534     // Get a pointer to the GCS vector using a GCS id.
5535     GuardedControlStack* GetGCSPtr(uint64_t gcs) const {
5536       return stacks_[GetGCSIndex(gcs)];
5537     }
5538 
5539    private:
5540     uint64_t GetGCSIndex(uint64_t gcs) const { return gcs >> 12; }
5541 
5542     std::vector<GuardedControlStack*> stacks_;
5543     std::mutex stacks_mtx_;
5544   };
5545 
5546   // A GCS id indicating no GCS has been allocated.
5547   static const uint64_t kGCSNoStack = kPageSize - 1;
5548   uint64_t gcs_;
5549   bool gcs_enabled_;
5550 
5551  public:
5552   GCSManager& GetGCSManager() {
5553     static GCSManager manager;
5554     return manager;
5555   }
5556 
5557   void EnableGCSCheck() { gcs_enabled_ = true; }
5558   void DisableGCSCheck() { gcs_enabled_ = false; }
5559   bool IsGCSCheckEnabled() const { return gcs_enabled_; }
5560 
5561  private:
5562   bool IsAllocatedGCS(uint64_t gcs) const { return gcs != kGCSNoStack; }
5563   void ResetGCSState() {
5564     GCSManager& m = GetGCSManager();
5565     if (IsAllocatedGCS(gcs_)) {
5566       m.FreeStack(gcs_);
5567     }
5568     ActivateGCS(m.AllocateStack());
5569     GCSPop();  // Remove seal.
5570   }
5571 
5572   GuardedControlStack* GetGCSPtr(uint64_t gcs) {
5573     GCSManager& m = GetGCSManager();
5574     GuardedControlStack* result = m.GetGCSPtr(gcs);
5575     return result;
5576   }
5577   GuardedControlStack* GetActiveGCSPtr() { return GetGCSPtr(gcs_); }
5578 
5579   uint64_t ActivateGCS(uint64_t gcs) {
5580     uint64_t outgoing_gcs = gcs_;
5581     gcs_ = gcs;
5582     return outgoing_gcs;
5583   }
5584 
5585   void GCSPush(uint64_t addr) {
5586     GetActiveGCSPtr()->push_back(addr);
5587     size_t entry = GetActiveGCSPtr()->size() - 1;
5588     LogGCS(/* is_push = */ true, addr, entry);
5589   }
5590 
5591   uint64_t GCSPop() {
5592     GuardedControlStack* gcs = GetActiveGCSPtr();
5593     if (gcs->empty()) {
5594       return 0;
5595     }
5596     uint64_t return_addr = gcs->back();
5597     size_t entry = gcs->size() - 1;
5598     gcs->pop_back();
5599     LogGCS(/* is_push = */ false, return_addr, entry);
5600     return return_addr;
5601   }
5602 
5603   uint64_t GCSPeek() {
5604     GuardedControlStack* gcs = GetActiveGCSPtr();
5605     if (gcs->empty()) {
5606       return 0;
5607     }
5608     uint64_t return_addr = gcs->back();
5609     return return_addr;
5610   }
5611 
5612   void ReportGCSFailure(const char* msg) {
5613     if (IsGCSCheckEnabled()) {
5614       GuardedControlStack* gcs = GetActiveGCSPtr();
5615       printf("%s", msg);
5616       if (gcs == nullptr) {
5617         printf("GCS pointer is null\n");
5618       } else {
5619         printf("GCS records, most recent first:\n");
5620         int most_recent_index = static_cast<int>(gcs->size()) - 1;
5621         for (int i = 0; i < 8; i++) {
5622           if (!gcs->empty()) {
5623             uint64_t entry = gcs->back();
5624             gcs->pop_back();
5625             int index = most_recent_index - i;
5626             printf(" gcs%" PRIu64 "[%d]: 0x%016" PRIx64 "\n",
5627                    gcs_,
5628                    index,
5629                    entry);
5630           }
5631         }
5632         printf("End of GCS records.\n");
5633       }
5634       VIXL_ABORT_WITH_MSG("GCS failed ");
5635     }
5636   }
5637 };
5638 
5639 #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) && __cplusplus < 201402L
5640 // Base case of the recursive template used to emulate C++14
5641 // `std::index_sequence`.
5642 template <size_t... I>
5643 struct Simulator::emulated_make_index_sequence_helper<0, I...>
5644     : Simulator::emulated_index_sequence<I...> {};
5645 #endif
5646 
5647 template <typename R, typename... P>
5648 void MetaDataDepot::BranchInterception<R, P...>::operator()(
5649     Simulator* simulator) const {
5650   if (callback_ == nullptr) {
5651     Simulator::RuntimeCallStructHelper<R, P...>::
5652         Wrapper(simulator, reinterpret_cast<uint64_t>(function_));
5653   } else {
5654     callback_(reinterpret_cast<uint64_t>(function_));
5655   }
5656 }
5657 
5658 }  // namespace aarch64
5659 }  // namespace vixl
5660 
5661 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
5662 
5663 #endif  // VIXL_AARCH64_SIMULATOR_AARCH64_H_
5664