1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifndef VIXL_AARCH64_SIMULATOR_AARCH64_H_
28 #define VIXL_AARCH64_SIMULATOR_AARCH64_H_
29
30 #include <memory>
31 #include <mutex>
32 #include <random>
33 #include <unordered_map>
34 #include <vector>
35
36 #include "../cpu-features.h"
37 #include "../globals-vixl.h"
38 #include "../utils-vixl.h"
39
40 #include "abi-aarch64.h"
41 #include "cpu-features-auditor-aarch64.h"
42 #include "debugger-aarch64.h"
43 #include "disasm-aarch64.h"
44 #include "instructions-aarch64.h"
45 #include "simulator-constants-aarch64.h"
46
47 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
48
49 // These are only used for the ABI feature, and depend on checks performed for
50 // it.
51 #ifdef VIXL_HAS_ABI_SUPPORT
52 #include <tuple>
53 #if __cplusplus >= 201402L
54 // Required for `std::index_sequence`
55 #include <utility>
56 #endif
57 #endif
58
59 // The hosts that Simulator running on may not have these flags defined.
60 #ifndef PROT_BTI
61 #define PROT_BTI 0x10
62 #endif
63 #ifndef PROT_MTE
64 #define PROT_MTE 0x20
65 #endif
66
67 namespace vixl {
68 namespace aarch64 {
69
70 class Simulator;
71 struct RuntimeCallStructHelper;
72
73 enum class MemoryAccessResult { Success = 0, Failure = 1 };
74
75 // Try to access a piece of memory at the given address. Accessing that memory
76 // might raise a signal which, if handled by a custom signal handler, should
77 // setup the native and simulated context in order to continue. Return whether
78 // the memory access failed (i.e: raised a signal) or succeeded.
79 MemoryAccessResult TryMemoryAccess(uintptr_t address, uintptr_t access_size);
80
81 #ifdef VIXL_ENABLE_IMPLICIT_CHECKS
82 // Access a byte of memory from the address at the given offset. If the memory
83 // could be accessed then return MemoryAccessResult::Success. If the memory
84 // could not be accessed, and therefore raised a signal, setup the simulated
85 // context and return MemoryAccessResult::Failure.
86 //
87 // If a signal is raised then it is expected that the signal handler will place
88 // MemoryAccessResult::Failure in the native return register and the address of
89 // _vixl_internal_AccessMemory_continue into the native instruction pointer.
90 extern "C" MemoryAccessResult _vixl_internal_ReadMemory(uintptr_t address,
91 uintptr_t offset);
92 extern "C" uintptr_t _vixl_internal_AccessMemory_continue();
93 #endif // VIXL_ENABLE_IMPLICIT_CHECKS
94
95 class SimStack {
96 public:
SimStack()97 SimStack() {}
SimStack(size_t size)98 explicit SimStack(size_t size) : usable_size_(size) {}
99
100 // Guard against accesses above the stack base. This could occur, for example,
101 // if the first simulated function tries to read stack arguments that haven't
102 // been properly initialised in the Simulator's stack.
SetBaseGuardSize(size_t size)103 void SetBaseGuardSize(size_t size) { base_guard_size_ = size; }
104
105 // Guard against stack overflows. The size should be large enough to detect
106 // the largest stride made (by `MacroAssembler::Claim()` or equivalent) whilst
107 // initialising stack objects.
SetLimitGuardSize(size_t size)108 void SetLimitGuardSize(size_t size) { limit_guard_size_ = size; }
109
110 // The minimum usable size of the stack.
111 // Equal to "stack base" - "stack limit", in AAPCS64 terminology.
SetUsableSize(size_t size)112 void SetUsableSize(size_t size) { usable_size_ = size; }
113
114 // Set the minimum alignment for the stack parameters.
AlignToBytesLog2(int align_log2)115 void AlignToBytesLog2(int align_log2) { align_log2_ = align_log2; }
116
117 class Allocated {
118 public:
119 // Using AAPCS64 terminology, highest addresses at the top:
120 //
121 // data_.get() + alloc_size ->
122 // |
123 // | Base guard
124 // GetBase() -> | |
125 // | |
126 // | | AAPCS64-legal
127 // | Usable stack | values of 'sp'.
128 // | |
129 // | |
130 // GetLimit() -> |
131 // | Limit guard
132 // data_.get() -> |
133 //
134 // The Simulator detects (and forbids) accesses to either guard region.
135
GetBase()136 char* GetBase() const { return base_; }
GetLimit()137 char* GetLimit() const { return limit_; }
138
139 template <typename T>
IsAccessInGuardRegion(const T * base,size_t size)140 bool IsAccessInGuardRegion(const T* base, size_t size) const {
141 VIXL_ASSERT(size > 0);
142 // Inclusive bounds.
143 const char* start = reinterpret_cast<const char*>(base);
144 const char* end = start + size - 1;
145 const char* data_start = data_.get();
146 const char* data_end = data_start + alloc_size_ - 1;
147 bool in_base_guard = (start <= data_end) && (end >= base_);
148 bool in_limit_guard = (start <= limit_) && (end >= data_start);
149 return in_base_guard || in_limit_guard;
150 }
151
152 private:
153 std::unique_ptr<char[]> data_;
154 char* limit_;
155 char* base_;
156 size_t alloc_size_;
157
158 friend class SimStack;
159 };
160
161 // Allocate the stack, locking the parameters.
Allocate()162 Allocated Allocate() {
163 size_t align_to = uint64_t{1} << align_log2_;
164 size_t l = AlignUp(limit_guard_size_, align_to);
165 size_t u = AlignUp(usable_size_, align_to);
166 size_t b = AlignUp(base_guard_size_, align_to);
167 size_t size = l + u + b;
168
169 Allocated a;
170 size_t alloc_size = (align_to - 1) + size;
171 a.data_ = std::make_unique<char[]>(alloc_size);
172 void* data = a.data_.get();
173 auto data_aligned =
174 reinterpret_cast<char*>(std::align(align_to, size, data, alloc_size));
175 a.limit_ = data_aligned + l - 1;
176 a.base_ = data_aligned + l + u;
177 a.alloc_size_ = alloc_size;
178 return a;
179 }
180
181 private:
182 size_t base_guard_size_ = 256;
183 size_t limit_guard_size_ = 4 * 1024;
184 size_t usable_size_ = 8 * 1024;
185 size_t align_log2_ = 4;
186
187 static const size_t kDefaultBaseGuardSize = 256;
188 static const size_t kDefaultLimitGuardSize = 4 * 1024;
189 static const size_t kDefaultUsableSize = 8 * 1024;
190 };
191
192 // Armv8.5 MTE helpers.
GetAllocationTagFromAddress(uint64_t address)193 inline int GetAllocationTagFromAddress(uint64_t address) {
194 return static_cast<int>(ExtractUnsignedBitfield64(59, 56, address));
195 }
196
197 template <typename T>
AddressUntag(T address)198 T AddressUntag(T address) {
199 // Cast the address using a C-style cast. A reinterpret_cast would be
200 // appropriate, but it can't cast one integral type to another.
201 uint64_t bits = (uint64_t)address;
202 return (T)(bits & ~kAddressTagMask);
203 }
204
205 // A callback function, called when a function has been intercepted if a
206 // BranchInterception entry exists in branch_interceptions. The address of
207 // the intercepted function is passed to the callback. For usage see
208 // BranchInterception.
209 using InterceptionCallback = std::function<void(uint64_t)>;
210
211 class MetaDataDepot {
212 public:
213 class MetaDataMTE {
214 public:
MetaDataMTE(int tag)215 explicit MetaDataMTE(int tag) : tag_(tag) {}
216
GetTag()217 int GetTag() const { return tag_; }
SetTag(int tag)218 void SetTag(int tag) {
219 VIXL_ASSERT(IsUint4(tag));
220 tag_ = tag;
221 }
222
IsActive()223 static bool IsActive() { return is_active; }
SetActive(bool value)224 static void SetActive(bool value) { is_active = value; }
225
226 private:
227 static bool is_active;
228 int16_t tag_;
229
230 friend class MetaDataDepot;
231 };
232
233 // Generate a key for metadata recording from a untagged address.
234 template <typename T>
GenerateMTEkey(T address)235 uint64_t GenerateMTEkey(T address) const {
236 // Cast the address using a C-style cast. A reinterpret_cast would be
237 // appropriate, but it can't cast one integral type to another.
238 return (uint64_t)(AddressUntag(address)) >> kMTETagGranuleInBytesLog2;
239 }
240
241 template <typename R, typename T>
GetAttribute(T map,uint64_t key)242 R GetAttribute(T map, uint64_t key) {
243 auto pair = map->find(key);
244 R value = (pair == map->end()) ? nullptr : &pair->second;
245 return value;
246 }
247
248 template <typename T>
249 int GetMTETag(T address, Instruction const* pc = nullptr) {
250 uint64_t key = GenerateMTEkey(address);
251 MetaDataMTE* m = GetAttribute<MetaDataMTE*>(&metadata_mte_, key);
252
253 if (!m) {
254 std::stringstream sstream;
255 sstream << std::hex << "MTE ERROR : instruction at 0x"
256 << reinterpret_cast<uint64_t>(pc)
257 << " touched a unallocated memory location 0x"
258 << (uint64_t)(address) << ".\n";
259 VIXL_ABORT_WITH_MSG(sstream.str().c_str());
260 }
261
262 return m->GetTag();
263 }
264
265 template <typename T>
266 void SetMTETag(T address, int tag, Instruction const* pc = nullptr) {
267 VIXL_ASSERT(IsAligned((uintptr_t)address, kMTETagGranuleInBytes));
268 uint64_t key = GenerateMTEkey(address);
269 MetaDataMTE* m = GetAttribute<MetaDataMTE*>(&metadata_mte_, key);
270
271 if (!m) {
272 metadata_mte_.insert({key, MetaDataMTE(tag)});
273 } else {
274 // Overwrite
275 if (m->GetTag() == tag) {
276 std::stringstream sstream;
277 sstream << std::hex << "MTE WARNING : instruction at 0x"
278 << reinterpret_cast<uint64_t>(pc)
279 << ", the same tag is assigned to the address 0x"
280 << (uint64_t)(address) << ".\n";
281 VIXL_WARNING(sstream.str().c_str());
282 }
283 m->SetTag(tag);
284 }
285 }
286
287 template <typename T>
CleanMTETag(T address)288 size_t CleanMTETag(T address) {
289 VIXL_ASSERT(
290 IsAligned(reinterpret_cast<uintptr_t>(address), kMTETagGranuleInBytes));
291 uint64_t key = GenerateMTEkey(address);
292 return metadata_mte_.erase(key);
293 }
294
GetTotalCountMTE()295 size_t GetTotalCountMTE() { return metadata_mte_.size(); }
296
297 // A pure virtual struct that allows the templated BranchInterception struct
298 // to be stored. For more information see BranchInterception.
299 struct BranchInterceptionAbstract {
~BranchInterceptionAbstractBranchInterceptionAbstract300 virtual ~BranchInterceptionAbstract() {}
301 // Call the callback_ if one exists, otherwise do a RuntimeCall.
302 virtual void operator()(Simulator* simulator) const = 0;
303 };
304
305 // An entry denoting a function to intercept when branched to during
306 // simulator execution. When a function is intercepted the callback will be
307 // called if one exists otherwise the function will be passed to
308 // RuntimeCall.
309 template <typename R, typename... P>
310 struct BranchInterception : public BranchInterceptionAbstract {
311 BranchInterception(R (*function)(P...),
312 InterceptionCallback callback = nullptr)
function_BranchInterception313 : function_(function), callback_(callback) {}
314
315 void operator()(Simulator* simulator) const VIXL_OVERRIDE;
316
317 private:
318 // Pointer to the function that will be intercepted.
319 R (*function_)(P...);
320
321 // Function to be called instead of function_
322 InterceptionCallback callback_;
323 };
324
325 // Register a new BranchInterception object. If 'function' is branched to
326 // (e.g: "blr function") in the future; instead, if provided, 'callback' will
327 // be called otherwise a runtime call will be performed on 'function'.
328 //
329 // For example: this can be used to always perform runtime calls on
330 // non-AArch64 functions without using the macroassembler.
331 //
332 // Note: only unconditional branches to registers are currently supported to
333 // be intercepted, e.g: "br"/"blr".
334 //
335 // TODO: support intercepting other branch types.
336 template <typename R, typename... P>
337 void RegisterBranchInterception(R (*function)(P...),
338 InterceptionCallback callback = nullptr) {
339 uintptr_t addr = reinterpret_cast<uintptr_t>(function);
340 std::unique_ptr<BranchInterceptionAbstract> intercept =
341 std::make_unique<BranchInterception<R, P...>>(function, callback);
342 branch_interceptions_.insert(std::make_pair(addr, std::move(intercept)));
343 }
344
345 // Search for branch interceptions to the branch_target address; If one is
346 // found return it otherwise return nullptr.
FindBranchInterception(uint64_t branch_target)347 BranchInterceptionAbstract* FindBranchInterception(uint64_t branch_target) {
348 // Check for interceptions to the target address, if one is found, call it.
349 auto search = branch_interceptions_.find(branch_target);
350 if (search != branch_interceptions_.end()) {
351 return search->second.get();
352 } else {
353 return nullptr;
354 }
355 }
356
ResetState()357 void ResetState() { branch_interceptions_.clear(); }
358
359 private:
360 // Tag recording of each allocated memory in the tag-granule.
361 std::unordered_map<uint64_t, class MetaDataMTE> metadata_mte_;
362
363 // Store a map of addresses to be intercepted and their corresponding branch
364 // interception object, see 'BranchInterception'.
365 std::unordered_map<uintptr_t, std::unique_ptr<BranchInterceptionAbstract>>
366 branch_interceptions_;
367 };
368
369
370 // Representation of memory, with typed getters and setters for access.
371 class Memory {
372 public:
Memory(SimStack::Allocated stack)373 explicit Memory(SimStack::Allocated stack) : stack_(std::move(stack)) {
374 metadata_depot_ = nullptr;
375 }
376
GetStack()377 const SimStack::Allocated& GetStack() { return stack_; }
378
379 template <typename A>
380 bool IsMTETagsMatched(A address, Instruction const* pc = nullptr) const {
381 if (MetaDataDepot::MetaDataMTE::IsActive()) {
382 // Cast the address using a C-style cast. A reinterpret_cast would be
383 // appropriate, but it can't cast one integral type to another.
384 uint64_t addr = (uint64_t)address;
385 int pointer_tag = GetAllocationTagFromAddress(addr);
386 int memory_tag = metadata_depot_->GetMTETag(AddressUntag(addr), pc);
387 return pointer_tag == memory_tag;
388 }
389 return true;
390 }
391
392 template <typename T, typename A>
393 std::optional<T> Read(A address, Instruction const* pc = nullptr) const {
394 T value;
395 VIXL_STATIC_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
396 (sizeof(value) == 4) || (sizeof(value) == 8) ||
397 (sizeof(value) == 16));
398 auto base = reinterpret_cast<const char*>(AddressUntag(address));
399 if (stack_.IsAccessInGuardRegion(base, sizeof(value))) {
400 VIXL_ABORT_WITH_MSG("Attempt to read from stack guard region");
401 }
402 if (!IsMTETagsMatched(address, pc)) {
403 VIXL_ABORT_WITH_MSG("Tag mismatch.");
404 }
405 if (TryMemoryAccess(reinterpret_cast<uintptr_t>(base), sizeof(value)) ==
406 MemoryAccessResult::Failure) {
407 return std::nullopt;
408 }
409 memcpy(&value, base, sizeof(value));
410 return value;
411 }
412
413 template <typename T, typename A>
414 bool Write(A address, T value, Instruction const* pc = nullptr) const {
415 VIXL_STATIC_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
416 (sizeof(value) == 4) || (sizeof(value) == 8) ||
417 (sizeof(value) == 16));
418 auto base = reinterpret_cast<char*>(AddressUntag(address));
419 if (stack_.IsAccessInGuardRegion(base, sizeof(value))) {
420 VIXL_ABORT_WITH_MSG("Attempt to write to stack guard region");
421 }
422 if (!IsMTETagsMatched(address, pc)) {
423 VIXL_ABORT_WITH_MSG("Tag mismatch.");
424 }
425 if (TryMemoryAccess(reinterpret_cast<uintptr_t>(base), sizeof(value)) ==
426 MemoryAccessResult::Failure) {
427 return false;
428 }
429 memcpy(base, &value, sizeof(value));
430 return true;
431 }
432
433 template <typename A>
ReadUint(int size_in_bytes,A address)434 std::optional<uint64_t> ReadUint(int size_in_bytes, A address) const {
435 switch (size_in_bytes) {
436 case 1:
437 return Read<uint8_t>(address);
438 case 2:
439 return Read<uint16_t>(address);
440 case 4:
441 return Read<uint32_t>(address);
442 case 8:
443 return Read<uint64_t>(address);
444 }
445 VIXL_UNREACHABLE();
446 return 0;
447 }
448
449 template <typename A>
ReadInt(int size_in_bytes,A address)450 std::optional<int64_t> ReadInt(int size_in_bytes, A address) const {
451 switch (size_in_bytes) {
452 case 1:
453 return Read<int8_t>(address);
454 case 2:
455 return Read<int16_t>(address);
456 case 4:
457 return Read<int32_t>(address);
458 case 8:
459 return Read<int64_t>(address);
460 }
461 VIXL_UNREACHABLE();
462 return 0;
463 }
464
465 template <typename A>
Write(int size_in_bytes,A address,uint64_t value)466 bool Write(int size_in_bytes, A address, uint64_t value) const {
467 switch (size_in_bytes) {
468 case 1:
469 return Write(address, static_cast<uint8_t>(value));
470 case 2:
471 return Write(address, static_cast<uint16_t>(value));
472 case 4:
473 return Write(address, static_cast<uint32_t>(value));
474 case 8:
475 return Write(address, value);
476 }
477 VIXL_UNREACHABLE();
478 return false;
479 }
480
AppendMetaData(MetaDataDepot * metadata_depot)481 void AppendMetaData(MetaDataDepot* metadata_depot) {
482 VIXL_ASSERT(metadata_depot != nullptr);
483 VIXL_ASSERT(metadata_depot_ == nullptr);
484 metadata_depot_ = metadata_depot;
485 }
486
487 private:
488 SimStack::Allocated stack_;
489 MetaDataDepot* metadata_depot_;
490 };
491
492 // Represent a register (r0-r31, v0-v31, z0-z31, p0-p15).
493 template <unsigned kMaxSizeInBits>
494 class SimRegisterBase {
495 public:
496 static const unsigned kMaxSizeInBytes = kMaxSizeInBits / kBitsPerByte;
497 VIXL_STATIC_ASSERT((kMaxSizeInBytes * kBitsPerByte) == kMaxSizeInBits);
498
SimRegisterBase()499 SimRegisterBase() : size_in_bytes_(kMaxSizeInBytes) { Clear(); }
500
GetSizeInBits()501 unsigned GetSizeInBits() const { return size_in_bytes_ * kBitsPerByte; }
GetSizeInBytes()502 unsigned GetSizeInBytes() const { return size_in_bytes_; }
503
SetSizeInBytes(unsigned size_in_bytes)504 void SetSizeInBytes(unsigned size_in_bytes) {
505 VIXL_ASSERT(size_in_bytes <= kMaxSizeInBytes);
506 size_in_bytes_ = size_in_bytes;
507 }
SetSizeInBits(unsigned size_in_bits)508 void SetSizeInBits(unsigned size_in_bits) {
509 VIXL_ASSERT(size_in_bits <= kMaxSizeInBits);
510 VIXL_ASSERT((size_in_bits % kBitsPerByte) == 0);
511 SetSizeInBytes(size_in_bits / kBitsPerByte);
512 }
513
514 // Write the specified value. The value is zero-extended if necessary.
515 template <typename T>
Write(T new_value)516 void Write(T new_value) {
517 // All AArch64 registers are zero-extending.
518 if (sizeof(new_value) < GetSizeInBytes()) Clear();
519 WriteLane(new_value, 0);
520 NotifyRegisterWrite();
521 }
522 template <typename T>
Set(T new_value)523 VIXL_DEPRECATED("Write", void Set(T new_value)) {
524 Write(new_value);
525 }
526
Clear()527 void Clear() {
528 memset(value_, 0, kMaxSizeInBytes);
529 NotifyRegisterWrite();
530 }
531
532 // Insert a typed value into a register, leaving the rest of the register
533 // unchanged. The lane parameter indicates where in the register the value
534 // should be inserted, in the range [ 0, sizeof(value_) / sizeof(T) ), where
535 // 0 represents the least significant bits.
536 template <typename T>
Insert(int lane,T new_value)537 void Insert(int lane, T new_value) {
538 WriteLane(new_value, lane);
539 NotifyRegisterWrite();
540 }
541
542 // Get the value as the specified type. The value is truncated if necessary.
543 template <typename T>
Get()544 T Get() const {
545 return GetLane<T>(0);
546 }
547
548 // Get the lane value as the specified type. The value is truncated if
549 // necessary.
550 template <typename T>
GetLane(int lane)551 T GetLane(int lane) const {
552 T result;
553 ReadLane(&result, lane);
554 return result;
555 }
556 template <typename T>
557 VIXL_DEPRECATED("GetLane", T Get(int lane) const) {
558 return GetLane(lane);
559 }
560
561 // Get the value of a specific bit, indexed from the least-significant bit of
562 // lane 0.
GetBit(int bit)563 bool GetBit(int bit) const {
564 int bit_in_byte = bit % (sizeof(value_[0]) * kBitsPerByte);
565 int byte = bit / (sizeof(value_[0]) * kBitsPerByte);
566 return ((value_[byte] >> bit_in_byte) & 1) != 0;
567 }
568
569 // Return a pointer to the raw, underlying byte array.
GetBytes()570 const uint8_t* GetBytes() const { return value_; }
571
572 // TODO: Make this return a map of updated bytes, so that we can highlight
573 // updated lanes for load-and-insert. (That never happens for scalar code, but
574 // NEON has some instructions that can update individual lanes.)
WrittenSinceLastLog()575 bool WrittenSinceLastLog() const { return written_since_last_log_; }
576
NotifyRegisterLogged()577 void NotifyRegisterLogged() { written_since_last_log_ = false; }
578
579 protected:
580 uint8_t value_[kMaxSizeInBytes];
581
582 unsigned size_in_bytes_;
583
584 // Helpers to aid with register tracing.
585 bool written_since_last_log_;
586
NotifyRegisterWrite()587 void NotifyRegisterWrite() { written_since_last_log_ = true; }
588
589 private:
590 template <typename T>
ReadLane(T * dst,int lane)591 void ReadLane(T* dst, int lane) const {
592 VIXL_ASSERT(lane >= 0);
593 VIXL_ASSERT((sizeof(*dst) + (lane * sizeof(*dst))) <= GetSizeInBytes());
594 memcpy(dst, &value_[lane * sizeof(*dst)], sizeof(*dst));
595 }
596
597 template <typename T>
WriteLane(T src,int lane)598 void WriteLane(T src, int lane) {
599 VIXL_ASSERT(lane >= 0);
600 VIXL_ASSERT((sizeof(src) + (lane * sizeof(src))) <= GetSizeInBytes());
601 memcpy(&value_[lane * sizeof(src)], &src, sizeof(src));
602 }
603
604 // The default ReadLane and WriteLane methods assume what we are copying is
605 // "trivially copyable" by using memcpy. We have to provide alternative
606 // implementations for SimFloat16 which cannot be copied this way.
607
ReadLane(vixl::internal::SimFloat16 * dst,int lane)608 void ReadLane(vixl::internal::SimFloat16* dst, int lane) const {
609 uint16_t rawbits;
610 ReadLane(&rawbits, lane);
611 *dst = RawbitsToFloat16(rawbits);
612 }
613
WriteLane(vixl::internal::SimFloat16 src,int lane)614 void WriteLane(vixl::internal::SimFloat16 src, int lane) {
615 WriteLane(Float16ToRawbits(src), lane);
616 }
617 };
618
619 typedef SimRegisterBase<kXRegSize> SimRegister; // r0-r31
620 typedef SimRegisterBase<kPRegMaxSize> SimPRegister; // p0-p15
621 // FFR has the same format as a predicate register.
622 typedef SimPRegister SimFFRRegister;
623
624 // v0-v31 and z0-z31
625 class SimVRegister : public SimRegisterBase<kZRegMaxSize> {
626 public:
SimVRegister()627 SimVRegister() : SimRegisterBase<kZRegMaxSize>(), accessed_as_z_(false) {}
628
NotifyAccessAsZ()629 void NotifyAccessAsZ() { accessed_as_z_ = true; }
630
NotifyRegisterLogged()631 void NotifyRegisterLogged() {
632 SimRegisterBase<kZRegMaxSize>::NotifyRegisterLogged();
633 accessed_as_z_ = false;
634 }
635
AccessedAsZSinceLastLog()636 bool AccessedAsZSinceLastLog() const { return accessed_as_z_; }
637
638 private:
639 bool accessed_as_z_;
640 };
641
642 // Representation of a SVE predicate register.
643 class LogicPRegister {
644 public:
LogicPRegister(SimPRegister & other)645 inline LogicPRegister(
646 SimPRegister& other) // NOLINT(runtime/references)(runtime/explicit)
647 : register_(other) {}
648
649 // Set a conveniently-sized block to 16 bits as the minimum predicate length
650 // is 16 bits and allow to be increased to multiples of 16 bits.
651 typedef uint16_t ChunkType;
652
653 // Assign a bit into the end positon of the specified lane.
654 // The bit is zero-extended if necessary.
SetActive(VectorFormat vform,int lane_index,bool value)655 void SetActive(VectorFormat vform, int lane_index, bool value) {
656 int psize = LaneSizeInBytesFromFormat(vform);
657 int bit_index = lane_index * psize;
658 int byte_index = bit_index / kBitsPerByte;
659 int bit_offset = bit_index % kBitsPerByte;
660 uint8_t byte = register_.GetLane<uint8_t>(byte_index);
661 register_.Insert(byte_index, ZeroExtend(byte, bit_offset, psize, value));
662 }
663
IsActive(VectorFormat vform,int lane_index)664 bool IsActive(VectorFormat vform, int lane_index) const {
665 int psize = LaneSizeInBytesFromFormat(vform);
666 int bit_index = lane_index * psize;
667 int byte_index = bit_index / kBitsPerByte;
668 int bit_offset = bit_index % kBitsPerByte;
669 uint8_t byte = register_.GetLane<uint8_t>(byte_index);
670 return ExtractBit(byte, bit_offset);
671 }
672
673 // The accessors for bulk processing.
GetChunkCount()674 int GetChunkCount() const {
675 VIXL_ASSERT((register_.GetSizeInBytes() % sizeof(ChunkType)) == 0);
676 return register_.GetSizeInBytes() / sizeof(ChunkType);
677 }
678
GetChunk(int lane)679 ChunkType GetChunk(int lane) const { return GetActiveMask<ChunkType>(lane); }
680
SetChunk(int lane,ChunkType new_value)681 void SetChunk(int lane, ChunkType new_value) {
682 SetActiveMask(lane, new_value);
683 }
684
SetAllBits()685 void SetAllBits() {
686 int chunk_size = sizeof(ChunkType) * kBitsPerByte;
687 ChunkType bits = static_cast<ChunkType>(GetUintMask(chunk_size));
688 for (int lane = 0;
689 lane < (static_cast<int>(register_.GetSizeInBits() / chunk_size));
690 lane++) {
691 SetChunk(lane, bits);
692 }
693 }
694
695 template <typename T>
GetActiveMask(int lane)696 T GetActiveMask(int lane) const {
697 return register_.GetLane<T>(lane);
698 }
699
700 template <typename T>
SetActiveMask(int lane,T new_value)701 void SetActiveMask(int lane, T new_value) {
702 register_.Insert<T>(lane, new_value);
703 }
704
Clear()705 void Clear() { register_.Clear(); }
706
Aliases(const LogicPRegister & other)707 bool Aliases(const LogicPRegister& other) const {
708 return ®ister_ == &other.register_;
709 }
710
711 private:
712 // The bit assignment is zero-extended to fill the size of predicate element.
ZeroExtend(uint8_t byte,int index,int psize,bool value)713 uint8_t ZeroExtend(uint8_t byte, int index, int psize, bool value) {
714 VIXL_ASSERT(index >= 0);
715 VIXL_ASSERT(index + psize <= kBitsPerByte);
716 int bits = value ? 1 : 0;
717 switch (psize) {
718 case 1:
719 AssignBit(byte, index, bits);
720 break;
721 case 2:
722 AssignBits(byte, index, 0x03, bits);
723 break;
724 case 4:
725 AssignBits(byte, index, 0x0f, bits);
726 break;
727 case 8:
728 AssignBits(byte, index, 0xff, bits);
729 break;
730 default:
731 VIXL_UNREACHABLE();
732 return 0;
733 }
734 return byte;
735 }
736
737 SimPRegister& register_;
738 };
739
740 using vixl_uint128_t = std::pair<uint64_t, uint64_t>;
741
742 // Representation of a vector register, with typed getters and setters for lanes
743 // and additional information to represent lane state.
744 class LogicVRegister {
745 public:
LogicVRegister(SimVRegister & other)746 inline LogicVRegister(
747 SimVRegister& other) // NOLINT(runtime/references)(runtime/explicit)
748 : register_(other) {
749 for (size_t i = 0; i < ArrayLength(saturated_); i++) {
750 saturated_[i] = kNotSaturated;
751 }
752 for (size_t i = 0; i < ArrayLength(round_); i++) {
753 round_[i] = 0;
754 }
755 }
756
Int(VectorFormat vform,int index)757 int64_t Int(VectorFormat vform, int index) const {
758 if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
759 int64_t element;
760 switch (LaneSizeInBitsFromFormat(vform)) {
761 case 8:
762 element = register_.GetLane<int8_t>(index);
763 break;
764 case 16:
765 element = register_.GetLane<int16_t>(index);
766 break;
767 case 32:
768 element = register_.GetLane<int32_t>(index);
769 break;
770 case 64:
771 element = register_.GetLane<int64_t>(index);
772 break;
773 default:
774 VIXL_UNREACHABLE();
775 return 0;
776 }
777 return element;
778 }
779
Uint(VectorFormat vform,int index)780 uint64_t Uint(VectorFormat vform, int index) const {
781 if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
782 uint64_t element;
783 switch (LaneSizeInBitsFromFormat(vform)) {
784 case 8:
785 element = register_.GetLane<uint8_t>(index);
786 break;
787 case 16:
788 element = register_.GetLane<uint16_t>(index);
789 break;
790 case 32:
791 element = register_.GetLane<uint32_t>(index);
792 break;
793 case 64:
794 element = register_.GetLane<uint64_t>(index);
795 break;
796 default:
797 VIXL_UNREACHABLE();
798 return 0;
799 }
800 return element;
801 }
802
UintArray(VectorFormat vform,uint64_t * dst)803 int UintArray(VectorFormat vform, uint64_t* dst) const {
804 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
805 dst[i] = Uint(vform, i);
806 }
807 return LaneCountFromFormat(vform);
808 }
809
UintLeftJustified(VectorFormat vform,int index)810 uint64_t UintLeftJustified(VectorFormat vform, int index) const {
811 return Uint(vform, index) << (64 - LaneSizeInBitsFromFormat(vform));
812 }
813
IntLeftJustified(VectorFormat vform,int index)814 int64_t IntLeftJustified(VectorFormat vform, int index) const {
815 uint64_t value = UintLeftJustified(vform, index);
816 int64_t result;
817 memcpy(&result, &value, sizeof(result));
818 return result;
819 }
820
SetInt(VectorFormat vform,int index,int64_t value)821 void SetInt(VectorFormat vform, int index, int64_t value) const {
822 if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
823 switch (LaneSizeInBitsFromFormat(vform)) {
824 case 8:
825 register_.Insert(index, static_cast<int8_t>(value));
826 break;
827 case 16:
828 register_.Insert(index, static_cast<int16_t>(value));
829 break;
830 case 32:
831 register_.Insert(index, static_cast<int32_t>(value));
832 break;
833 case 64:
834 register_.Insert(index, static_cast<int64_t>(value));
835 break;
836 default:
837 VIXL_UNREACHABLE();
838 return;
839 }
840 }
841
SetIntArray(VectorFormat vform,const int64_t * src)842 void SetIntArray(VectorFormat vform, const int64_t* src) const {
843 ClearForWrite(vform);
844 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
845 SetInt(vform, i, src[i]);
846 }
847 }
848
SetUint(VectorFormat vform,int index,uint64_t value)849 void SetUint(VectorFormat vform, int index, uint64_t value) const {
850 if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
851 switch (LaneSizeInBitsFromFormat(vform)) {
852 case 8:
853 register_.Insert(index, static_cast<uint8_t>(value));
854 break;
855 case 16:
856 register_.Insert(index, static_cast<uint16_t>(value));
857 break;
858 case 32:
859 register_.Insert(index, static_cast<uint32_t>(value));
860 break;
861 case 64:
862 register_.Insert(index, static_cast<uint64_t>(value));
863 break;
864 default:
865 VIXL_UNREACHABLE();
866 return;
867 }
868 }
869
SetUint(VectorFormat vform,int index,vixl_uint128_t value)870 void SetUint(VectorFormat vform, int index, vixl_uint128_t value) const {
871 if (LaneSizeInBitsFromFormat(vform) <= 64) {
872 SetUint(vform, index, value.second);
873 return;
874 }
875 VIXL_ASSERT((vform == kFormat1Q) || (vform == kFormatVnQ));
876 SetUint(kFormatVnD, 2 * index, value.second);
877 SetUint(kFormatVnD, 2 * index + 1, value.first);
878 }
879
SetUintArray(VectorFormat vform,const uint64_t * src)880 void SetUintArray(VectorFormat vform, const uint64_t* src) const {
881 ClearForWrite(vform);
882 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
883 SetUint(vform, i, src[i]);
884 }
885 }
886
887 template <typename T>
Float(int index)888 T Float(int index) const {
889 return register_.GetLane<T>(index);
890 }
891
892 template <typename T>
SetFloat(int index,T value)893 void SetFloat(int index, T value) const {
894 register_.Insert(index, value);
895 }
896
897 template <typename T>
SetFloat(VectorFormat vform,int index,T value)898 void SetFloat(VectorFormat vform, int index, T value) const {
899 if (IsSVEFormat(vform)) register_.NotifyAccessAsZ();
900 register_.Insert(index, value);
901 }
902
Clear()903 void Clear() { register_.Clear(); }
904
905 // When setting a result in a register larger than the result itself, the top
906 // bits of the register must be cleared.
ClearForWrite(VectorFormat vform)907 void ClearForWrite(VectorFormat vform) const {
908 // SVE destinations write whole registers, so we have nothing to clear.
909 if (IsSVEFormat(vform)) return;
910
911 unsigned size = RegisterSizeInBytesFromFormat(vform);
912 for (unsigned i = size; i < register_.GetSizeInBytes(); i++) {
913 SetUint(kFormat16B, i, 0);
914 }
915 }
916
917 // Saturation state for each lane of a vector.
918 enum Saturation {
919 kNotSaturated = 0,
920 kSignedSatPositive = 1 << 0,
921 kSignedSatNegative = 1 << 1,
922 kSignedSatMask = kSignedSatPositive | kSignedSatNegative,
923 kSignedSatUndefined = kSignedSatMask,
924 kUnsignedSatPositive = 1 << 2,
925 kUnsignedSatNegative = 1 << 3,
926 kUnsignedSatMask = kUnsignedSatPositive | kUnsignedSatNegative,
927 kUnsignedSatUndefined = kUnsignedSatMask
928 };
929
930 // Getters for saturation state.
GetSignedSaturation(int index)931 Saturation GetSignedSaturation(int index) {
932 return static_cast<Saturation>(saturated_[index] & kSignedSatMask);
933 }
934
GetUnsignedSaturation(int index)935 Saturation GetUnsignedSaturation(int index) {
936 return static_cast<Saturation>(saturated_[index] & kUnsignedSatMask);
937 }
938
939 // Setters for saturation state.
ClearSat(int index)940 void ClearSat(int index) { saturated_[index] = kNotSaturated; }
941
SetSignedSat(int index,bool positive)942 void SetSignedSat(int index, bool positive) {
943 SetSatFlag(index, positive ? kSignedSatPositive : kSignedSatNegative);
944 }
945
SetUnsignedSat(int index,bool positive)946 void SetUnsignedSat(int index, bool positive) {
947 SetSatFlag(index, positive ? kUnsignedSatPositive : kUnsignedSatNegative);
948 }
949
SetSatFlag(int index,Saturation sat)950 void SetSatFlag(int index, Saturation sat) {
951 saturated_[index] = static_cast<Saturation>(saturated_[index] | sat);
952 VIXL_ASSERT((sat & kUnsignedSatMask) != kUnsignedSatUndefined);
953 VIXL_ASSERT((sat & kSignedSatMask) != kSignedSatUndefined);
954 }
955
956 // Saturate lanes of a vector based on saturation state.
SignedSaturate(VectorFormat vform)957 LogicVRegister& SignedSaturate(VectorFormat vform) {
958 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
959 Saturation sat = GetSignedSaturation(i);
960 if (sat == kSignedSatPositive) {
961 SetInt(vform, i, MaxIntFromFormat(vform));
962 } else if (sat == kSignedSatNegative) {
963 SetInt(vform, i, MinIntFromFormat(vform));
964 }
965 }
966 return *this;
967 }
968
UnsignedSaturate(VectorFormat vform)969 LogicVRegister& UnsignedSaturate(VectorFormat vform) {
970 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
971 Saturation sat = GetUnsignedSaturation(i);
972 if (sat == kUnsignedSatPositive) {
973 SetUint(vform, i, MaxUintFromFormat(vform));
974 } else if (sat == kUnsignedSatNegative) {
975 SetUint(vform, i, 0);
976 }
977 }
978 return *this;
979 }
980
981 // Getter for rounding state.
GetRounding(int index)982 bool GetRounding(int index) { return round_[index]; }
983
984 // Setter for rounding state.
SetRounding(int index,bool round)985 void SetRounding(int index, bool round) { round_[index] = round; }
986
987 // Round lanes of a vector based on rounding state.
Round(VectorFormat vform)988 LogicVRegister& Round(VectorFormat vform) {
989 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
990 SetUint(vform, i, Uint(vform, i) + (GetRounding(i) ? 1 : 0));
991 }
992 return *this;
993 }
994
995 // Unsigned halve lanes of a vector, and use the saturation state to set the
996 // top bit.
Uhalve(VectorFormat vform)997 LogicVRegister& Uhalve(VectorFormat vform) {
998 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
999 uint64_t val = Uint(vform, i);
1000 SetRounding(i, (val & 1) == 1);
1001 val >>= 1;
1002 if (GetUnsignedSaturation(i) != kNotSaturated) {
1003 // If the operation causes unsigned saturation, the bit shifted into the
1004 // most significant bit must be set.
1005 val |= (MaxUintFromFormat(vform) >> 1) + 1;
1006 }
1007 SetInt(vform, i, val);
1008 }
1009 return *this;
1010 }
1011
1012 // Signed halve lanes of a vector, and use the carry state to set the top bit.
Halve(VectorFormat vform)1013 LogicVRegister& Halve(VectorFormat vform) {
1014 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1015 int64_t val = Int(vform, i);
1016 SetRounding(i, (val & 1) == 1);
1017 val = ExtractSignedBitfield64(63, 1, val); // >>= 1
1018 if (GetSignedSaturation(i) == kNotSaturated) {
1019 SetInt(vform, i, val);
1020 } else {
1021 // If the operation causes signed saturation, the sign bit must be
1022 // inverted.
1023 uint64_t uval = static_cast<uint64_t>(val);
1024 SetUint(vform, i, uval ^ ((MaxUintFromFormat(vform) >> 1) + 1));
1025 }
1026 }
1027 return *this;
1028 }
1029
LaneCountFromFormat(VectorFormat vform)1030 int LaneCountFromFormat(VectorFormat vform) const {
1031 if (IsSVEFormat(vform)) {
1032 return register_.GetSizeInBits() / LaneSizeInBitsFromFormat(vform);
1033 } else {
1034 return vixl::aarch64::LaneCountFromFormat(vform);
1035 }
1036 }
1037
1038 private:
1039 SimVRegister& register_;
1040
1041 // Allocate one saturation state entry per lane; largest register is type Q,
1042 // and lanes can be a minimum of one byte wide.
1043 Saturation saturated_[kZRegMaxSizeInBytes];
1044
1045 // Allocate one rounding state entry per lane.
1046 bool round_[kZRegMaxSizeInBytes];
1047 };
1048
1049 // Represent an SVE addressing mode and abstract per-lane address generation to
1050 // make iteration easy.
1051 //
1052 // Contiguous accesses are described with a simple base address, the memory
1053 // occupied by each lane (`SetMsizeInBytesLog2()`) and the number of elements in
1054 // each struct (`SetRegCount()`).
1055 //
1056 // Scatter-gather accesses also require a SimVRegister and information about how
1057 // to extract lanes from it.
1058 class LogicSVEAddressVector {
1059 public:
1060 // scalar-plus-scalar
1061 // scalar-plus-immediate
LogicSVEAddressVector(uint64_t base)1062 explicit LogicSVEAddressVector(uint64_t base)
1063 : base_(base),
1064 msize_in_bytes_log2_(kUnknownMsizeInBytesLog2),
1065 reg_count_(1),
1066 vector_(NULL),
1067 vector_form_(kFormatUndefined),
1068 vector_mod_(NO_SVE_OFFSET_MODIFIER),
1069 vector_shift_(0) {}
1070
1071 // scalar-plus-vector
1072 // vector-plus-immediate
1073 // `base` should be the constant used for each element. That is, the value
1074 // of `xn`, or `#<imm>`.
1075 // `vector` should be the SimVRegister with offsets for each element. The
1076 // vector format must be specified; SVE scatter/gather accesses typically
1077 // support both 32-bit and 64-bit addressing.
1078 //
1079 // `mod` and `shift` correspond to the modifiers applied to each element in
1080 // scalar-plus-vector forms, such as those used for unpacking and
1081 // sign-extension. They are not used for vector-plus-immediate.
1082 LogicSVEAddressVector(uint64_t base,
1083 const SimVRegister* vector,
1084 VectorFormat vform,
1085 SVEOffsetModifier mod = NO_SVE_OFFSET_MODIFIER,
1086 int shift = 0)
base_(base)1087 : base_(base),
1088 msize_in_bytes_log2_(kUnknownMsizeInBytesLog2),
1089 reg_count_(1),
1090 vector_(vector),
1091 vector_form_(vform),
1092 vector_mod_(mod),
1093 vector_shift_(shift) {}
1094
1095 // Set `msize` -- the memory occupied by each lane -- for address
1096 // calculations.
SetMsizeInBytesLog2(int msize_in_bytes_log2)1097 void SetMsizeInBytesLog2(int msize_in_bytes_log2) {
1098 VIXL_ASSERT(msize_in_bytes_log2 >= static_cast<int>(kBRegSizeInBytesLog2));
1099 VIXL_ASSERT(msize_in_bytes_log2 <= static_cast<int>(kDRegSizeInBytesLog2));
1100 msize_in_bytes_log2_ = msize_in_bytes_log2;
1101 }
1102
HasMsize()1103 bool HasMsize() const {
1104 return msize_in_bytes_log2_ != kUnknownMsizeInBytesLog2;
1105 }
1106
GetMsizeInBytesLog2()1107 int GetMsizeInBytesLog2() const {
1108 VIXL_ASSERT(HasMsize());
1109 return msize_in_bytes_log2_;
1110 }
GetMsizeInBitsLog2()1111 int GetMsizeInBitsLog2() const {
1112 return GetMsizeInBytesLog2() + kBitsPerByteLog2;
1113 }
1114
GetMsizeInBytes()1115 int GetMsizeInBytes() const { return 1 << GetMsizeInBytesLog2(); }
GetMsizeInBits()1116 int GetMsizeInBits() const { return 1 << GetMsizeInBitsLog2(); }
1117
SetRegCount(int reg_count)1118 void SetRegCount(int reg_count) {
1119 VIXL_ASSERT(reg_count >= 1); // E.g. ld1/st1
1120 VIXL_ASSERT(reg_count <= 4); // E.g. ld4/st4
1121 reg_count_ = reg_count;
1122 }
1123
GetRegCount()1124 int GetRegCount() const { return reg_count_; }
1125
1126 // Full per-element address calculation for structured accesses.
1127 //
1128 // Note that the register number argument (`reg`) is zero-based.
GetElementAddress(int lane,int reg)1129 uint64_t GetElementAddress(int lane, int reg) const {
1130 VIXL_ASSERT(reg < GetRegCount());
1131 // Individual structures are always contiguous in memory, so this
1132 // implementation works for both contiguous and scatter-gather addressing.
1133 return GetStructAddress(lane) + (reg * GetMsizeInBytes());
1134 }
1135
1136 // Full per-struct address calculation for structured accesses.
1137 uint64_t GetStructAddress(int lane) const;
1138
IsContiguous()1139 bool IsContiguous() const { return vector_ == NULL; }
IsScatterGather()1140 bool IsScatterGather() const { return !IsContiguous(); }
1141
1142 private:
1143 uint64_t base_;
1144 int msize_in_bytes_log2_;
1145 int reg_count_;
1146
1147 const SimVRegister* vector_;
1148 VectorFormat vector_form_;
1149 SVEOffsetModifier vector_mod_;
1150 int vector_shift_;
1151
1152 static const int kUnknownMsizeInBytesLog2 = -1;
1153 };
1154
1155 // The proper way to initialize a simulated system register (such as NZCV) is as
1156 // follows:
1157 // SimSystemRegister nzcv = SimSystemRegister::DefaultValueFor(NZCV);
1158 class SimSystemRegister {
1159 public:
1160 // The default constructor represents a register which has no writable bits.
1161 // It is not possible to set its value to anything other than 0.
SimSystemRegister()1162 SimSystemRegister() : value_(0), write_ignore_mask_(0xffffffff) {}
1163
GetRawValue()1164 uint32_t GetRawValue() const { return value_; }
1165 VIXL_DEPRECATED("GetRawValue", uint32_t RawValue() const) {
1166 return GetRawValue();
1167 }
1168
SetRawValue(uint32_t new_value)1169 void SetRawValue(uint32_t new_value) {
1170 value_ = (value_ & write_ignore_mask_) | (new_value & ~write_ignore_mask_);
1171 }
1172
ExtractBits(int msb,int lsb)1173 uint32_t ExtractBits(int msb, int lsb) const {
1174 return ExtractUnsignedBitfield32(msb, lsb, value_);
1175 }
1176 VIXL_DEPRECATED("ExtractBits", uint32_t Bits(int msb, int lsb) const) {
1177 return ExtractBits(msb, lsb);
1178 }
1179
ExtractSignedBits(int msb,int lsb)1180 int32_t ExtractSignedBits(int msb, int lsb) const {
1181 return ExtractSignedBitfield32(msb, lsb, value_);
1182 }
1183 VIXL_DEPRECATED("ExtractSignedBits",
1184 int32_t SignedBits(int msb, int lsb) const) {
1185 return ExtractSignedBits(msb, lsb);
1186 }
1187
1188 void SetBits(int msb, int lsb, uint32_t bits);
1189
1190 // Default system register values.
1191 static SimSystemRegister DefaultValueFor(SystemRegister id);
1192
1193 #define DEFINE_GETTER(Name, HighBit, LowBit, Func) \
1194 uint32_t Get##Name() const { return this->Func(HighBit, LowBit); } \
1195 VIXL_DEPRECATED("Get" #Name, uint32_t Name() const) { return Get##Name(); } \
1196 void Set##Name(uint32_t bits) { SetBits(HighBit, LowBit, bits); }
1197 #define DEFINE_WRITE_IGNORE_MASK(Name, Mask) \
1198 static const uint32_t Name##WriteIgnoreMask = ~static_cast<uint32_t>(Mask);
1199
SYSTEM_REGISTER_FIELDS_LIST(DEFINE_GETTER,DEFINE_WRITE_IGNORE_MASK)1200 SYSTEM_REGISTER_FIELDS_LIST(DEFINE_GETTER, DEFINE_WRITE_IGNORE_MASK)
1201
1202 #undef DEFINE_ZERO_BITS
1203 #undef DEFINE_GETTER
1204
1205 protected:
1206 // Most system registers only implement a few of the bits in the word. Other
1207 // bits are "read-as-zero, write-ignored". The write_ignore_mask argument
1208 // describes the bits which are not modifiable.
1209 SimSystemRegister(uint32_t value, uint32_t write_ignore_mask)
1210 : value_(value), write_ignore_mask_(write_ignore_mask) {}
1211
1212 uint32_t value_;
1213 uint32_t write_ignore_mask_;
1214 };
1215
1216
1217 class SimExclusiveLocalMonitor {
1218 public:
SimExclusiveLocalMonitor()1219 SimExclusiveLocalMonitor() : kSkipClearProbability(8), seed_(0x87654321) {
1220 Clear();
1221 }
1222
1223 // Clear the exclusive monitor (like clrex).
Clear()1224 void Clear() {
1225 address_ = 0;
1226 size_ = 0;
1227 }
1228
1229 // Clear the exclusive monitor most of the time.
MaybeClear()1230 void MaybeClear() {
1231 if ((seed_ % kSkipClearProbability) != 0) {
1232 Clear();
1233 }
1234
1235 // Advance seed_ using a simple linear congruential generator.
1236 seed_ = (seed_ * 48271) % 2147483647;
1237 }
1238
1239 // Mark the address range for exclusive access (like load-exclusive).
MarkExclusive(uint64_t address,size_t size)1240 void MarkExclusive(uint64_t address, size_t size) {
1241 address_ = address;
1242 size_ = size;
1243 }
1244
1245 // Return true if the address range is marked (like store-exclusive).
1246 // This helper doesn't implicitly clear the monitor.
IsExclusive(uint64_t address,size_t size)1247 bool IsExclusive(uint64_t address, size_t size) {
1248 VIXL_ASSERT(size > 0);
1249 // Be pedantic: Require both the address and the size to match.
1250 return (size == size_) && (address == address_);
1251 }
1252
1253 private:
1254 uint64_t address_;
1255 size_t size_;
1256
1257 const int kSkipClearProbability;
1258 uint32_t seed_;
1259 };
1260
1261
1262 // We can't accurate simulate the global monitor since it depends on external
1263 // influences. Instead, this implementation occasionally causes accesses to
1264 // fail, according to kPassProbability.
1265 class SimExclusiveGlobalMonitor {
1266 public:
SimExclusiveGlobalMonitor()1267 SimExclusiveGlobalMonitor() : kPassProbability(8), seed_(0x87654321) {}
1268
IsExclusive(uint64_t address,size_t size)1269 bool IsExclusive(uint64_t address, size_t size) {
1270 USE(address, size);
1271
1272 bool pass = (seed_ % kPassProbability) != 0;
1273 // Advance seed_ using a simple linear congruential generator.
1274 seed_ = (seed_ * 48271) % 2147483647;
1275 return pass;
1276 }
1277
1278 private:
1279 const int kPassProbability;
1280 uint32_t seed_;
1281 };
1282
1283 class Debugger;
1284
1285 template <uint32_t mode>
1286 uint64_t CryptoOp(uint64_t x, uint64_t y, uint64_t z);
1287
1288 class Simulator : public DecoderVisitor {
1289 public:
1290 explicit Simulator(Decoder* decoder,
1291 FILE* stream = stdout,
1292 SimStack::Allocated stack = SimStack().Allocate());
1293 ~Simulator();
1294
1295 void ResetState();
1296
1297 // Run the simulator.
1298 virtual void Run();
1299 void RunFrom(const Instruction* first);
1300
1301
1302 #if defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
1303 (defined(_MSC_VER) || defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
1304 // Templated `RunFrom` version taking care of passing arguments and returning
1305 // the result value.
1306 // This allows code like:
1307 // int32_t res = simulator.RunFrom<int32_t, int32_t>(GenerateCode(),
1308 // 0x123);
1309 // It requires VIXL's ABI features, and C++11 or greater.
1310 // Also, the initialisation of tuples is incorrect in GCC before 4.9.1:
1311 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51253
1312 template <typename R, typename... P>
RunFrom(const Instruction * code,P...arguments)1313 R RunFrom(const Instruction* code, P... arguments) {
1314 return RunFromStructHelper<R, P...>::Wrapper(this, code, arguments...);
1315 }
1316
1317 template <typename R, typename... P>
1318 struct RunFromStructHelper {
WrapperRunFromStructHelper1319 static R Wrapper(Simulator* simulator,
1320 const Instruction* code,
1321 P... arguments) {
1322 ABI abi;
1323 std::tuple<P...> unused_tuple{
1324 // TODO: We currently do not support arguments passed on the stack. We
1325 // could do so by using `WriteGenericOperand()` here, but may need to
1326 // add features to handle situations where the stack is or is not set
1327 // up.
1328 (simulator->WriteCPURegister(abi.GetNextParameterGenericOperand<P>()
1329 .GetCPURegister(),
1330 arguments),
1331 arguments)...};
1332 simulator->RunFrom(code);
1333 return simulator->ReadGenericOperand<R>(abi.GetReturnGenericOperand<R>());
1334 }
1335 };
1336
1337 // Partial specialization when the return type is `void`.
1338 template <typename... P>
1339 struct RunFromStructHelper<void, P...> {
1340 static void Wrapper(Simulator* simulator,
1341 const Instruction* code,
1342 P... arguments) {
1343 ABI abi;
1344 std::tuple<P...> unused_tuple{
1345 // TODO: We currently do not support arguments passed on the stack. We
1346 // could do so by using `WriteGenericOperand()` here, but may need to
1347 // add features to handle situations where the stack is or is not set
1348 // up.
1349 (simulator->WriteCPURegister(abi.GetNextParameterGenericOperand<P>()
1350 .GetCPURegister(),
1351 arguments),
1352 arguments)...};
1353 simulator->RunFrom(code);
1354 }
1355 };
1356 #endif
1357
1358 // Execution ends when the PC hits this address.
1359 static const Instruction* kEndOfSimAddress;
1360
1361 // Simulation helpers.
1362 bool IsSimulationFinished() const { return pc_ == kEndOfSimAddress; }
1363
1364 const Instruction* ReadPc() const { return pc_; }
1365 VIXL_DEPRECATED("ReadPc", const Instruction* pc() const) { return ReadPc(); }
1366
1367 enum BranchLogMode { LogBranches, NoBranchLog };
1368
1369 void WritePc(const Instruction* new_pc,
1370 BranchLogMode log_mode = LogBranches) {
1371 if (log_mode == LogBranches) LogTakenBranch(new_pc);
1372 pc_ = AddressUntag(new_pc);
1373 pc_modified_ = true;
1374 }
1375 VIXL_DEPRECATED("WritePc", void set_pc(const Instruction* new_pc)) {
1376 return WritePc(new_pc);
1377 }
1378
1379 void IncrementPc() {
1380 if (!pc_modified_) {
1381 pc_ = pc_->GetNextInstruction();
1382 }
1383 }
1384 VIXL_DEPRECATED("IncrementPc", void increment_pc()) { IncrementPc(); }
1385
1386 BType ReadBType() const { return btype_; }
1387 void WriteNextBType(BType btype) { next_btype_ = btype; }
1388 void UpdateBType() {
1389 btype_ = next_btype_;
1390 next_btype_ = DefaultBType;
1391 }
1392
1393 // Helper function to determine BType for branches.
1394 BType GetBTypeFromInstruction(const Instruction* instr) const;
1395
1396 bool PcIsInGuardedPage() const { return guard_pages_; }
1397 void SetGuardedPages(bool guard_pages) { guard_pages_ = guard_pages; }
1398
1399 const Instruction* GetLastExecutedInstruction() const { return last_instr_; }
1400
1401 void ExecuteInstruction() {
1402 // The program counter should always be aligned.
1403 VIXL_ASSERT(IsWordAligned(pc_));
1404 pc_modified_ = false;
1405
1406 // On guarded pages, if BType is not zero, take an exception on any
1407 // instruction other than BTI, PACI[AB]SP, HLT or BRK.
1408 if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) {
1409 if (pc_->IsPAuth()) {
1410 Instr i = pc_->Mask(SystemPAuthMask);
1411 if ((i != PACIASP) && (i != PACIBSP)) {
1412 VIXL_ABORT_WITH_MSG(
1413 "Executing non-BTI instruction with wrong BType.");
1414 }
1415 } else if (!pc_->IsBti() && !pc_->IsException()) {
1416 VIXL_ABORT_WITH_MSG("Executing non-BTI instruction with wrong BType.");
1417 }
1418 }
1419
1420 bool last_instr_was_movprfx =
1421 (form_hash_ == "movprfx_z_z"_h) || (form_hash_ == "movprfx_z_p_z"_h);
1422
1423 // decoder_->Decode(...) triggers at least the following visitors:
1424 // 1. The CPUFeaturesAuditor (`cpu_features_auditor_`).
1425 // 2. The PrintDisassembler (`print_disasm_`), if enabled.
1426 // 3. The Simulator (`this`).
1427 // User can add additional visitors at any point, but the Simulator requires
1428 // that the ordering above is preserved.
1429 decoder_->Decode(pc_);
1430
1431 if (last_instr_was_movprfx) {
1432 VIXL_ASSERT(last_instr_ != NULL);
1433 VIXL_CHECK(pc_->CanTakeSVEMovprfx(form_hash_, last_instr_));
1434 }
1435
1436 last_instr_ = ReadPc();
1437 IncrementPc();
1438 LogAllWrittenRegisters();
1439 UpdateBType();
1440
1441 VIXL_CHECK(cpu_features_auditor_.InstructionIsAvailable());
1442 }
1443
1444 virtual void Visit(Metadata* metadata,
1445 const Instruction* instr) VIXL_OVERRIDE;
1446
1447 #define DECLARE(A) virtual void Visit##A(const Instruction* instr);
1448 VISITOR_LIST_THAT_RETURN(DECLARE)
1449 #undef DECLARE
1450 #define DECLARE(A) \
1451 VIXL_NO_RETURN virtual void Visit##A(const Instruction* instr);
1452 VISITOR_LIST_THAT_DONT_RETURN(DECLARE)
1453 #undef DECLARE
1454
1455 void Simulate_PdT_PgZ_ZnT_ZmT(const Instruction* instr);
1456 void Simulate_PdT_Xn_Xm(const Instruction* instr);
1457 void Simulate_ZdB_Zn1B_Zn2B_imm(const Instruction* instr);
1458 void Simulate_ZdB_ZnB_ZmB(const Instruction* instr);
1459 void Simulate_ZdD_ZnD_ZmD_imm(const Instruction* instr);
1460 void Simulate_ZdH_PgM_ZnS(const Instruction* instr);
1461 void Simulate_ZdH_ZnH_ZmH_imm(const Instruction* instr);
1462 void Simulate_ZdS_PgM_ZnD(const Instruction* instr);
1463 void Simulate_ZdS_PgM_ZnS(const Instruction* instr);
1464 void Simulate_ZdS_ZnS_ZmS_imm(const Instruction* instr);
1465 void Simulate_ZdT_PgM_ZnT(const Instruction* instr);
1466 void Simulate_ZdT_PgZ_ZnT_ZmT(const Instruction* instr);
1467 void Simulate_ZdT_ZnT_ZmT(const Instruction* instr);
1468 void Simulate_ZdT_ZnT_ZmTb(const Instruction* instr);
1469 void Simulate_ZdT_ZnT_const(const Instruction* instr);
1470 void Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr);
1471 void Simulate_ZdaH_ZnH_ZmH_imm_const(const Instruction* instr);
1472 void Simulate_ZdaS_ZnH_ZmH(const Instruction* instr);
1473 void Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr);
1474 void Simulate_ZdaS_ZnS_ZmS_imm_const(const Instruction* instr);
1475 void Simulate_ZdaT_PgM_ZnTb(const Instruction* instr);
1476 void Simulate_ZdaT_ZnT_ZmT(const Instruction* instr);
1477 void Simulate_ZdaT_ZnT_const(const Instruction* instr);
1478 void Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr);
1479 void Simulate_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr);
1480 void Simulate_ZdnT_PgM_ZdnT_const(const Instruction* instr);
1481 void Simulate_ZdnT_ZdnT_ZmT_const(const Instruction* instr);
1482 void Simulate_ZtD_PgZ_ZnD_Xm(const Instruction* instr);
1483 void Simulate_ZtD_Pg_ZnD_Xm(const Instruction* instr);
1484 void Simulate_ZtS_PgZ_ZnS_Xm(const Instruction* instr);
1485 void Simulate_ZtS_Pg_ZnS_Xm(const Instruction* instr);
1486
1487 void SimulateSVEHalvingAddSub(const Instruction* instr);
1488 void SimulateSVESaturatingArithmetic(const Instruction* instr);
1489 void SimulateSVEIntArithPair(const Instruction* instr);
1490 void SimulateSVENarrow(const Instruction* instr);
1491 void SimulateSVEInterleavedArithLong(const Instruction* instr);
1492 void SimulateSVEShiftLeftImm(const Instruction* instr);
1493 void SimulateSVEAddSubCarry(const Instruction* instr);
1494 void SimulateSVEAddSubHigh(const Instruction* instr);
1495 void SimulateSVEIntMulLongVec(const Instruction* instr);
1496 void SimulateSVESaturatingIntMulLongIdx(const Instruction* instr);
1497 void SimulateSVEExclusiveOrRotate(const Instruction* instr);
1498 void SimulateSVEBitwiseTernary(const Instruction* instr);
1499 void SimulateSVEComplexDotProduct(const Instruction* instr);
1500 void SimulateSVEMulIndex(const Instruction* instr);
1501 void SimulateSVEMlaMlsIndex(const Instruction* instr);
1502 void SimulateSVEComplexIntMulAdd(const Instruction* instr);
1503 void SimulateSVESaturatingMulAddHigh(const Instruction* instr);
1504 void SimulateSVESaturatingMulHighIndex(const Instruction* instr);
1505 void SimulateSVEFPConvertLong(const Instruction* instr);
1506 void SimulateSVEPmull128(const Instruction* instr);
1507 void SimulateMatrixMul(const Instruction* instr);
1508 void SimulateSVEFPMatrixMul(const Instruction* instr);
1509 void SimulateNEONMulByElementLong(const Instruction* instr);
1510 void SimulateNEONFPMulByElement(const Instruction* instr);
1511 void SimulateNEONFPMulByElementLong(const Instruction* instr);
1512 void SimulateNEONComplexMulByElement(const Instruction* instr);
1513 void SimulateNEONDotProdByElement(const Instruction* instr);
1514 void SimulateNEONSHA3(const Instruction* instr);
1515 void SimulateMTEAddSubTag(const Instruction* instr);
1516 void SimulateMTETagMaskInsert(const Instruction* instr);
1517 void SimulateMTESubPointer(const Instruction* instr);
1518 void SimulateMTELoadTag(const Instruction* instr);
1519 void SimulateMTEStoreTag(const Instruction* instr);
1520 void SimulateMTEStoreTagPair(const Instruction* instr);
1521 void Simulate_XdSP_XnSP_Xm(const Instruction* instr);
1522 void SimulateCpy(const Instruction* instr);
1523 void SimulateCpyFP(const Instruction* instr);
1524 void SimulateCpyP(const Instruction* instr);
1525 void SimulateCpyM(const Instruction* instr);
1526 void SimulateCpyE(const Instruction* instr);
1527 void SimulateSetP(const Instruction* instr);
1528 void SimulateSetM(const Instruction* instr);
1529 void SimulateSetE(const Instruction* instr);
1530 void SimulateSetGP(const Instruction* instr);
1531 void SimulateSetGM(const Instruction* instr);
1532 void SimulateSignedMinMax(const Instruction* instr);
1533 void SimulateUnsignedMinMax(const Instruction* instr);
1534 void SimulateSHA512(const Instruction* instr);
1535
1536 void VisitCryptoSM3(const Instruction* instr);
1537 void VisitCryptoSM4(const Instruction* instr);
1538
1539 // Integer register accessors.
1540
1541 // Basic accessor: Read the register as the specified type.
1542 template <typename T>
1543 T ReadRegister(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) const {
1544 VIXL_ASSERT(
1545 code < kNumberOfRegisters ||
1546 ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode)));
1547 if ((code == 31) && (r31mode == Reg31IsZeroRegister)) {
1548 T result;
1549 memset(&result, 0, sizeof(result));
1550 return result;
1551 }
1552 if ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode)) {
1553 code = 31;
1554 }
1555 return registers_[code].Get<T>();
1556 }
1557 template <typename T>
1558 VIXL_DEPRECATED("ReadRegister",
1559 T reg(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister)
1560 const) {
1561 return ReadRegister<T>(code, r31mode);
1562 }
1563
1564 // Common specialized accessors for the ReadRegister() template.
1565 int32_t ReadWRegister(unsigned code,
1566 Reg31Mode r31mode = Reg31IsZeroRegister) const {
1567 return ReadRegister<int32_t>(code, r31mode);
1568 }
1569 VIXL_DEPRECATED("ReadWRegister",
1570 int32_t wreg(unsigned code,
1571 Reg31Mode r31mode = Reg31IsZeroRegister) const) {
1572 return ReadWRegister(code, r31mode);
1573 }
1574
1575 int64_t ReadXRegister(unsigned code,
1576 Reg31Mode r31mode = Reg31IsZeroRegister) const {
1577 return ReadRegister<int64_t>(code, r31mode);
1578 }
1579 VIXL_DEPRECATED("ReadXRegister",
1580 int64_t xreg(unsigned code,
1581 Reg31Mode r31mode = Reg31IsZeroRegister) const) {
1582 return ReadXRegister(code, r31mode);
1583 }
1584
1585 SimPRegister& ReadPRegister(unsigned code) {
1586 VIXL_ASSERT(code < kNumberOfPRegisters);
1587 return pregisters_[code];
1588 }
1589
1590 SimFFRRegister& ReadFFR() { return ffr_register_; }
1591
1592 // As above, with parameterized size and return type. The value is
1593 // either zero-extended or truncated to fit, as required.
1594 template <typename T>
1595 T ReadRegister(unsigned size,
1596 unsigned code,
1597 Reg31Mode r31mode = Reg31IsZeroRegister) const {
1598 uint64_t raw;
1599 switch (size) {
1600 case kWRegSize:
1601 raw = ReadRegister<uint32_t>(code, r31mode);
1602 break;
1603 case kXRegSize:
1604 raw = ReadRegister<uint64_t>(code, r31mode);
1605 break;
1606 default:
1607 VIXL_UNREACHABLE();
1608 return 0;
1609 }
1610
1611 T result;
1612 VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw));
1613 // Copy the result and truncate to fit. This assumes a little-endian host.
1614 memcpy(&result, &raw, sizeof(result));
1615 return result;
1616 }
1617 template <typename T>
1618 VIXL_DEPRECATED("ReadRegister",
1619 T reg(unsigned size,
1620 unsigned code,
1621 Reg31Mode r31mode = Reg31IsZeroRegister) const) {
1622 return ReadRegister<T>(size, code, r31mode);
1623 }
1624
1625 // Use int64_t by default if T is not specified.
1626 int64_t ReadRegister(unsigned size,
1627 unsigned code,
1628 Reg31Mode r31mode = Reg31IsZeroRegister) const {
1629 return ReadRegister<int64_t>(size, code, r31mode);
1630 }
1631 VIXL_DEPRECATED("ReadRegister",
1632 int64_t reg(unsigned size,
1633 unsigned code,
1634 Reg31Mode r31mode = Reg31IsZeroRegister) const) {
1635 return ReadRegister(size, code, r31mode);
1636 }
1637
1638 enum RegLogMode { LogRegWrites, NoRegLog };
1639
1640 // Write 'value' into an integer register. The value is zero-extended. This
1641 // behaviour matches AArch64 register writes.
1642 //
1643 // SP may be specified in one of two ways:
1644 // - (code == kSPRegInternalCode) && (r31mode == Reg31IsZeroRegister)
1645 // - (code == 31) && (r31mode == Reg31IsStackPointer)
1646 template <typename T>
1647 void WriteRegister(unsigned code,
1648 T value,
1649 RegLogMode log_mode = LogRegWrites,
1650 Reg31Mode r31mode = Reg31IsZeroRegister) {
1651 if (sizeof(T) < kWRegSizeInBytes) {
1652 // We use a C-style cast on purpose here.
1653 // Since we do not have access to 'constepxr if', the casts in this `if`
1654 // must be valid even if we know the code will never be executed, in
1655 // particular when `T` is a pointer type.
1656 int64_t tmp_64bit = (int64_t)value;
1657 int32_t tmp_32bit = static_cast<int32_t>(tmp_64bit);
1658 WriteRegister<int32_t>(code, tmp_32bit, log_mode, r31mode);
1659 return;
1660 }
1661
1662 VIXL_ASSERT((sizeof(T) == kWRegSizeInBytes) ||
1663 (sizeof(T) == kXRegSizeInBytes));
1664 VIXL_ASSERT(
1665 (code < kNumberOfRegisters) ||
1666 ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode)));
1667
1668 if (code == 31) {
1669 if (r31mode == Reg31IsZeroRegister) {
1670 // Discard writes to the zero register.
1671 return;
1672 } else {
1673 code = kSPRegInternalCode;
1674 }
1675 }
1676
1677 // registers_[31] is the stack pointer.
1678 VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31);
1679 registers_[code % kNumberOfRegisters].Write(value);
1680
1681 if (log_mode == LogRegWrites) {
1682 LogRegister(code, GetPrintRegisterFormatForSize(sizeof(T)));
1683 }
1684 }
1685 template <typename T>
1686 VIXL_DEPRECATED("WriteRegister",
1687 void set_reg(unsigned code,
1688 T value,
1689 RegLogMode log_mode = LogRegWrites,
1690 Reg31Mode r31mode = Reg31IsZeroRegister)) {
1691 WriteRegister<T>(code, value, log_mode, r31mode);
1692 }
1693
1694 // Common specialized accessors for the set_reg() template.
1695 void WriteWRegister(unsigned code,
1696 int32_t value,
1697 RegLogMode log_mode = LogRegWrites,
1698 Reg31Mode r31mode = Reg31IsZeroRegister) {
1699 WriteRegister(code, value, log_mode, r31mode);
1700 }
1701 VIXL_DEPRECATED("WriteWRegister",
1702 void set_wreg(unsigned code,
1703 int32_t value,
1704 RegLogMode log_mode = LogRegWrites,
1705 Reg31Mode r31mode = Reg31IsZeroRegister)) {
1706 WriteWRegister(code, value, log_mode, r31mode);
1707 }
1708
1709 void WriteXRegister(unsigned code,
1710 int64_t value,
1711 RegLogMode log_mode = LogRegWrites,
1712 Reg31Mode r31mode = Reg31IsZeroRegister) {
1713 WriteRegister(code, value, log_mode, r31mode);
1714 }
1715 VIXL_DEPRECATED("WriteXRegister",
1716 void set_xreg(unsigned code,
1717 int64_t value,
1718 RegLogMode log_mode = LogRegWrites,
1719 Reg31Mode r31mode = Reg31IsZeroRegister)) {
1720 WriteXRegister(code, value, log_mode, r31mode);
1721 }
1722
1723 // As above, with parameterized size and type. The value is either
1724 // zero-extended or truncated to fit, as required.
1725 template <typename T>
1726 void WriteRegister(unsigned size,
1727 unsigned code,
1728 T value,
1729 RegLogMode log_mode = LogRegWrites,
1730 Reg31Mode r31mode = Reg31IsZeroRegister) {
1731 // Zero-extend the input.
1732 uint64_t raw = 0;
1733 VIXL_STATIC_ASSERT(sizeof(value) <= sizeof(raw));
1734 memcpy(&raw, &value, sizeof(value));
1735
1736 // Write (and possibly truncate) the value.
1737 switch (size) {
1738 case kWRegSize:
1739 WriteRegister(code, static_cast<uint32_t>(raw), log_mode, r31mode);
1740 break;
1741 case kXRegSize:
1742 WriteRegister(code, raw, log_mode, r31mode);
1743 break;
1744 default:
1745 VIXL_UNREACHABLE();
1746 return;
1747 }
1748 }
1749 template <typename T>
1750 VIXL_DEPRECATED("WriteRegister",
1751 void set_reg(unsigned size,
1752 unsigned code,
1753 T value,
1754 RegLogMode log_mode = LogRegWrites,
1755 Reg31Mode r31mode = Reg31IsZeroRegister)) {
1756 WriteRegister(size, code, value, log_mode, r31mode);
1757 }
1758
1759 // Common specialized accessors for the set_reg() template.
1760
1761 // Commonly-used special cases.
1762 template <typename T>
1763 void WriteLr(T value) {
1764 WriteRegister(kLinkRegCode, value);
1765 }
1766 template <typename T>
1767 VIXL_DEPRECATED("WriteLr", void set_lr(T value)) {
1768 WriteLr(value);
1769 }
1770
1771 template <typename T>
1772 void WriteSp(T value) {
1773 WriteRegister(31, value, LogRegWrites, Reg31IsStackPointer);
1774 }
1775 template <typename T>
1776 VIXL_DEPRECATED("WriteSp", void set_sp(T value)) {
1777 WriteSp(value);
1778 }
1779
1780 // Vector register accessors.
1781 // These are equivalent to the integer register accessors, but for vector
1782 // registers.
1783
1784 // A structure for representing a 128-bit Q register.
1785 struct qreg_t {
1786 uint8_t val[kQRegSizeInBytes];
1787 };
1788
1789 // A structure for representing a SVE Z register.
1790 struct zreg_t {
1791 uint8_t val[kZRegMaxSizeInBytes];
1792 };
1793
1794 // Basic accessor: read the register as the specified type.
1795 template <typename T>
1796 T ReadVRegister(unsigned code) const {
1797 VIXL_STATIC_ASSERT(
1798 (sizeof(T) == kBRegSizeInBytes) || (sizeof(T) == kHRegSizeInBytes) ||
1799 (sizeof(T) == kSRegSizeInBytes) || (sizeof(T) == kDRegSizeInBytes) ||
1800 (sizeof(T) == kQRegSizeInBytes));
1801 VIXL_ASSERT(code < kNumberOfVRegisters);
1802
1803 return vregisters_[code].Get<T>();
1804 }
1805 template <typename T>
1806 VIXL_DEPRECATED("ReadVRegister", T vreg(unsigned code) const) {
1807 return ReadVRegister<T>(code);
1808 }
1809
1810 // Common specialized accessors for the vreg() template.
1811 int8_t ReadBRegister(unsigned code) const {
1812 return ReadVRegister<int8_t>(code);
1813 }
1814 VIXL_DEPRECATED("ReadBRegister", int8_t breg(unsigned code) const) {
1815 return ReadBRegister(code);
1816 }
1817
1818 vixl::internal::SimFloat16 ReadHRegister(unsigned code) const {
1819 return RawbitsToFloat16(ReadHRegisterBits(code));
1820 }
1821 VIXL_DEPRECATED("ReadHRegister", int16_t hreg(unsigned code) const) {
1822 return Float16ToRawbits(ReadHRegister(code));
1823 }
1824
1825 uint16_t ReadHRegisterBits(unsigned code) const {
1826 return ReadVRegister<uint16_t>(code);
1827 }
1828
1829 float ReadSRegister(unsigned code) const {
1830 return ReadVRegister<float>(code);
1831 }
1832 VIXL_DEPRECATED("ReadSRegister", float sreg(unsigned code) const) {
1833 return ReadSRegister(code);
1834 }
1835
1836 uint32_t ReadSRegisterBits(unsigned code) const {
1837 return ReadVRegister<uint32_t>(code);
1838 }
1839 VIXL_DEPRECATED("ReadSRegisterBits",
1840 uint32_t sreg_bits(unsigned code) const) {
1841 return ReadSRegisterBits(code);
1842 }
1843
1844 double ReadDRegister(unsigned code) const {
1845 return ReadVRegister<double>(code);
1846 }
1847 VIXL_DEPRECATED("ReadDRegister", double dreg(unsigned code) const) {
1848 return ReadDRegister(code);
1849 }
1850
1851 uint64_t ReadDRegisterBits(unsigned code) const {
1852 return ReadVRegister<uint64_t>(code);
1853 }
1854 VIXL_DEPRECATED("ReadDRegisterBits",
1855 uint64_t dreg_bits(unsigned code) const) {
1856 return ReadDRegisterBits(code);
1857 }
1858
1859 qreg_t ReadQRegister(unsigned code) const {
1860 return ReadVRegister<qreg_t>(code);
1861 }
1862 VIXL_DEPRECATED("ReadQRegister", qreg_t qreg(unsigned code) const) {
1863 return ReadQRegister(code);
1864 }
1865
1866 // As above, with parameterized size and return type. The value is
1867 // either zero-extended or truncated to fit, as required.
1868 template <typename T>
1869 T ReadVRegister(unsigned size, unsigned code) const {
1870 uint64_t raw = 0;
1871 T result;
1872
1873 switch (size) {
1874 case kSRegSize:
1875 raw = ReadVRegister<uint32_t>(code);
1876 break;
1877 case kDRegSize:
1878 raw = ReadVRegister<uint64_t>(code);
1879 break;
1880 default:
1881 VIXL_UNREACHABLE();
1882 break;
1883 }
1884
1885 VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw));
1886 // Copy the result and truncate to fit. This assumes a little-endian host.
1887 memcpy(&result, &raw, sizeof(result));
1888 return result;
1889 }
1890 template <typename T>
1891 VIXL_DEPRECATED("ReadVRegister", T vreg(unsigned size, unsigned code) const) {
1892 return ReadVRegister<T>(size, code);
1893 }
1894
1895 SimVRegister& ReadVRegister(unsigned code) { return vregisters_[code]; }
1896 VIXL_DEPRECATED("ReadVRegister", SimVRegister& vreg(unsigned code)) {
1897 return ReadVRegister(code);
1898 }
1899
1900 // Basic accessor: Write the specified value.
1901 template <typename T>
1902 void WriteVRegister(unsigned code,
1903 T value,
1904 RegLogMode log_mode = LogRegWrites) {
1905 VIXL_STATIC_ASSERT((sizeof(value) == kBRegSizeInBytes) ||
1906 (sizeof(value) == kHRegSizeInBytes) ||
1907 (sizeof(value) == kSRegSizeInBytes) ||
1908 (sizeof(value) == kDRegSizeInBytes) ||
1909 (sizeof(value) == kQRegSizeInBytes) ||
1910 (sizeof(value) == kZRegMaxSizeInBytes));
1911 VIXL_ASSERT(code < kNumberOfVRegisters);
1912 vregisters_[code].Write(value);
1913
1914 if (log_mode == LogRegWrites) {
1915 LogVRegister(code, GetPrintRegisterFormat(value));
1916 }
1917 }
1918 template <typename T>
1919 VIXL_DEPRECATED("WriteVRegister",
1920 void set_vreg(unsigned code,
1921 T value,
1922 RegLogMode log_mode = LogRegWrites)) {
1923 WriteVRegister(code, value, log_mode);
1924 }
1925
1926 // Common specialized accessors for the WriteVRegister() template.
1927 void WriteBRegister(unsigned code,
1928 int8_t value,
1929 RegLogMode log_mode = LogRegWrites) {
1930 WriteVRegister(code, value, log_mode);
1931 }
1932 VIXL_DEPRECATED("WriteBRegister",
1933 void set_breg(unsigned code,
1934 int8_t value,
1935 RegLogMode log_mode = LogRegWrites)) {
1936 return WriteBRegister(code, value, log_mode);
1937 }
1938
1939 void WriteHRegister(unsigned code,
1940 vixl::internal::SimFloat16 value,
1941 RegLogMode log_mode = LogRegWrites) {
1942 WriteVRegister(code, Float16ToRawbits(value), log_mode);
1943 }
1944
1945 void WriteHRegister(unsigned code,
1946 int16_t value,
1947 RegLogMode log_mode = LogRegWrites) {
1948 WriteVRegister(code, value, log_mode);
1949 }
1950 VIXL_DEPRECATED("WriteHRegister",
1951 void set_hreg(unsigned code,
1952 int16_t value,
1953 RegLogMode log_mode = LogRegWrites)) {
1954 return WriteHRegister(code, value, log_mode);
1955 }
1956
1957 void WriteSRegister(unsigned code,
1958 float value,
1959 RegLogMode log_mode = LogRegWrites) {
1960 WriteVRegister(code, value, log_mode);
1961 }
1962 VIXL_DEPRECATED("WriteSRegister",
1963 void set_sreg(unsigned code,
1964 float value,
1965 RegLogMode log_mode = LogRegWrites)) {
1966 WriteSRegister(code, value, log_mode);
1967 }
1968
1969 void WriteSRegisterBits(unsigned code,
1970 uint32_t value,
1971 RegLogMode log_mode = LogRegWrites) {
1972 WriteVRegister(code, value, log_mode);
1973 }
1974 VIXL_DEPRECATED("WriteSRegisterBits",
1975 void set_sreg_bits(unsigned code,
1976 uint32_t value,
1977 RegLogMode log_mode = LogRegWrites)) {
1978 WriteSRegisterBits(code, value, log_mode);
1979 }
1980
1981 void WriteDRegister(unsigned code,
1982 double value,
1983 RegLogMode log_mode = LogRegWrites) {
1984 WriteVRegister(code, value, log_mode);
1985 }
1986 VIXL_DEPRECATED("WriteDRegister",
1987 void set_dreg(unsigned code,
1988 double value,
1989 RegLogMode log_mode = LogRegWrites)) {
1990 WriteDRegister(code, value, log_mode);
1991 }
1992
1993 void WriteDRegisterBits(unsigned code,
1994 uint64_t value,
1995 RegLogMode log_mode = LogRegWrites) {
1996 WriteVRegister(code, value, log_mode);
1997 }
1998 VIXL_DEPRECATED("WriteDRegisterBits",
1999 void set_dreg_bits(unsigned code,
2000 uint64_t value,
2001 RegLogMode log_mode = LogRegWrites)) {
2002 WriteDRegisterBits(code, value, log_mode);
2003 }
2004
2005 void WriteQRegister(unsigned code,
2006 qreg_t value,
2007 RegLogMode log_mode = LogRegWrites) {
2008 WriteVRegister(code, value, log_mode);
2009 }
2010 VIXL_DEPRECATED("WriteQRegister",
2011 void set_qreg(unsigned code,
2012 qreg_t value,
2013 RegLogMode log_mode = LogRegWrites)) {
2014 WriteQRegister(code, value, log_mode);
2015 }
2016
2017 void WriteZRegister(unsigned code,
2018 zreg_t value,
2019 RegLogMode log_mode = LogRegWrites) {
2020 WriteVRegister(code, value, log_mode);
2021 }
2022
2023 template <typename T>
2024 T ReadRegister(Register reg) const {
2025 return ReadRegister<T>(reg.GetCode(), Reg31IsZeroRegister);
2026 }
2027
2028 template <typename T>
2029 void WriteRegister(Register reg,
2030 T value,
2031 RegLogMode log_mode = LogRegWrites) {
2032 WriteRegister<T>(reg.GetCode(), value, log_mode, Reg31IsZeroRegister);
2033 }
2034
2035 template <typename T>
2036 T ReadVRegister(VRegister vreg) const {
2037 return ReadVRegister<T>(vreg.GetCode());
2038 }
2039
2040 template <typename T>
2041 void WriteVRegister(VRegister vreg,
2042 T value,
2043 RegLogMode log_mode = LogRegWrites) {
2044 WriteVRegister<T>(vreg.GetCode(), value, log_mode);
2045 }
2046
2047 template <typename T>
2048 T ReadCPURegister(CPURegister reg) const {
2049 if (reg.IsVRegister()) {
2050 return ReadVRegister<T>(VRegister(reg));
2051 } else {
2052 return ReadRegister<T>(Register(reg));
2053 }
2054 }
2055
2056 template <typename T>
2057 void WriteCPURegister(CPURegister reg,
2058 T value,
2059 RegLogMode log_mode = LogRegWrites) {
2060 if (reg.IsVRegister()) {
2061 WriteVRegister<T>(VRegister(reg), value, log_mode);
2062 } else {
2063 WriteRegister<T>(Register(reg), value, log_mode);
2064 }
2065 }
2066
2067 template <typename T, typename A>
2068 std::optional<T> MemRead(A address) const {
2069 Instruction const* pc = ReadPc();
2070 return memory_.Read<T>(address, pc);
2071 }
2072
2073 template <typename T, typename A>
2074 bool MemWrite(A address, T value) const {
2075 Instruction const* pc = ReadPc();
2076 return memory_.Write(address, value, pc);
2077 }
2078
2079 template <typename A>
2080 std::optional<uint64_t> MemReadUint(int size_in_bytes, A address) const {
2081 return memory_.ReadUint(size_in_bytes, address);
2082 }
2083
2084 template <typename A>
2085 std::optional<int64_t> MemReadInt(int size_in_bytes, A address) const {
2086 return memory_.ReadInt(size_in_bytes, address);
2087 }
2088
2089 template <typename A>
2090 bool MemWrite(int size_in_bytes, A address, uint64_t value) const {
2091 return memory_.Write(size_in_bytes, address, value);
2092 }
2093
2094 bool LoadLane(LogicVRegister dst,
2095 VectorFormat vform,
2096 int index,
2097 uint64_t addr) const {
2098 unsigned msize_in_bytes = LaneSizeInBytesFromFormat(vform);
2099 return LoadUintToLane(dst, vform, msize_in_bytes, index, addr);
2100 }
2101
2102 bool LoadUintToLane(LogicVRegister dst,
2103 VectorFormat vform,
2104 unsigned msize_in_bytes,
2105 int index,
2106 uint64_t addr) const {
2107 VIXL_DEFINE_OR_RETURN_FALSE(value, MemReadUint(msize_in_bytes, addr));
2108 dst.SetUint(vform, index, value);
2109 return true;
2110 }
2111
2112 bool LoadIntToLane(LogicVRegister dst,
2113 VectorFormat vform,
2114 unsigned msize_in_bytes,
2115 int index,
2116 uint64_t addr) const {
2117 VIXL_DEFINE_OR_RETURN_FALSE(value, MemReadInt(msize_in_bytes, addr));
2118 dst.SetInt(vform, index, value);
2119 return true;
2120 }
2121
2122 bool StoreLane(const LogicVRegister& src,
2123 VectorFormat vform,
2124 int index,
2125 uint64_t addr) const {
2126 unsigned msize_in_bytes = LaneSizeInBytesFromFormat(vform);
2127 return MemWrite(msize_in_bytes, addr, src.Uint(vform, index));
2128 }
2129
2130 uint64_t ComputeMemOperandAddress(const MemOperand& mem_op) const;
2131
2132 template <typename T>
2133 T ReadGenericOperand(GenericOperand operand) const {
2134 if (operand.IsCPURegister()) {
2135 return ReadCPURegister<T>(operand.GetCPURegister());
2136 } else {
2137 VIXL_ASSERT(operand.IsMemOperand());
2138 auto res = MemRead<T>(ComputeMemOperandAddress(operand.GetMemOperand()));
2139 VIXL_ASSERT(res);
2140 return *res;
2141 }
2142 }
2143
2144 template <typename T>
2145 bool WriteGenericOperand(GenericOperand operand,
2146 T value,
2147 RegLogMode log_mode = LogRegWrites) {
2148 if (operand.IsCPURegister()) {
2149 // Outside SIMD, registers are 64-bit or a subset of a 64-bit register. If
2150 // the width of the value to write is smaller than 64 bits, the unused
2151 // bits may contain unrelated values that the code following this write
2152 // needs to handle gracefully.
2153 // Here we fill the unused bits with a predefined pattern to catch issues
2154 // early.
2155 VIXL_ASSERT(operand.GetCPURegister().GetSizeInBits() <= 64);
2156 uint64_t raw = 0xdeadda1adeadda1a;
2157 memcpy(&raw, &value, sizeof(value));
2158 WriteCPURegister(operand.GetCPURegister(), raw, log_mode);
2159 } else {
2160 VIXL_ASSERT(operand.IsMemOperand());
2161 return MemWrite(ComputeMemOperandAddress(operand.GetMemOperand()), value);
2162 }
2163 return true;
2164 }
2165
2166 bool ReadN() const { return nzcv_.GetN() != 0; }
2167 VIXL_DEPRECATED("ReadN", bool N() const) { return ReadN(); }
2168
2169 bool ReadZ() const { return nzcv_.GetZ() != 0; }
2170 VIXL_DEPRECATED("ReadZ", bool Z() const) { return ReadZ(); }
2171
2172 bool ReadC() const { return nzcv_.GetC() != 0; }
2173 VIXL_DEPRECATED("ReadC", bool C() const) { return ReadC(); }
2174
2175 bool ReadV() const { return nzcv_.GetV() != 0; }
2176 VIXL_DEPRECATED("ReadV", bool V() const) { return ReadV(); }
2177
2178 SimSystemRegister& ReadNzcv() { return nzcv_; }
2179 VIXL_DEPRECATED("ReadNzcv", SimSystemRegister& nzcv()) { return ReadNzcv(); }
2180
2181 // TODO: Find a way to make the fpcr_ members return the proper types, so
2182 // these accessors are not necessary.
2183 FPRounding ReadRMode() const {
2184 return static_cast<FPRounding>(fpcr_.GetRMode());
2185 }
2186 VIXL_DEPRECATED("ReadRMode", FPRounding RMode()) { return ReadRMode(); }
2187
2188 UseDefaultNaN ReadDN() const {
2189 return fpcr_.GetDN() != 0 ? kUseDefaultNaN : kIgnoreDefaultNaN;
2190 }
2191
2192 VIXL_DEPRECATED("ReadDN", bool DN()) {
2193 return ReadDN() == kUseDefaultNaN ? true : false;
2194 }
2195
2196 SimSystemRegister& ReadFpcr() { return fpcr_; }
2197 VIXL_DEPRECATED("ReadFpcr", SimSystemRegister& fpcr()) { return ReadFpcr(); }
2198
2199 // Specify relevant register formats for Print(V)Register and related helpers.
2200 enum PrintRegisterFormat {
2201 // The lane size.
2202 kPrintRegLaneSizeB = 0 << 0,
2203 kPrintRegLaneSizeH = 1 << 0,
2204 kPrintRegLaneSizeS = 2 << 0,
2205 kPrintRegLaneSizeW = kPrintRegLaneSizeS,
2206 kPrintRegLaneSizeD = 3 << 0,
2207 kPrintRegLaneSizeX = kPrintRegLaneSizeD,
2208 kPrintRegLaneSizeQ = 4 << 0,
2209 kPrintRegLaneSizeUnknown = 5 << 0,
2210
2211 kPrintRegLaneSizeOffset = 0,
2212 kPrintRegLaneSizeMask = 7 << 0,
2213
2214 // The overall register size.
2215 kPrintRegAsScalar = 0,
2216 kPrintRegAsDVector = 1 << 3,
2217 kPrintRegAsQVector = 2 << 3,
2218 kPrintRegAsSVEVector = 3 << 3,
2219
2220 kPrintRegAsVectorMask = 3 << 3,
2221
2222 // Indicate floating-point format lanes. (This flag is only supported for
2223 // S-, H-, and D-sized lanes.)
2224 kPrintRegAsFP = 1 << 5,
2225
2226 // With this flag, print helpers won't check that the upper bits are zero.
2227 // This also forces the register name to be printed with the `reg<msb:0>`
2228 // format.
2229 //
2230 // The flag is supported with any PrintRegisterFormat other than those with
2231 // kPrintRegAsSVEVector.
2232 kPrintRegPartial = 1 << 6,
2233
2234 // Supported combinations.
2235 // These exist so that they can be referred to by name, but also because C++
2236 // does not allow enum types to hold values that aren't explicitly
2237 // enumerated, and we want to be able to combine the above flags.
2238
2239 // Scalar formats.
2240 #define VIXL_DECL_PRINT_REG_SCALAR(size) \
2241 kPrint##size##Reg = kPrintRegLaneSize##size | kPrintRegAsScalar, \
2242 kPrint##size##RegPartial = kPrintRegLaneSize##size | kPrintRegPartial
2243 #define VIXL_DECL_PRINT_REG_SCALAR_FP(size) \
2244 VIXL_DECL_PRINT_REG_SCALAR(size) \
2245 , kPrint##size##RegFP = kPrint##size##Reg | kPrintRegAsFP, \
2246 kPrint##size##RegPartialFP = kPrint##size##RegPartial | kPrintRegAsFP
2247 VIXL_DECL_PRINT_REG_SCALAR(W),
2248 VIXL_DECL_PRINT_REG_SCALAR(X),
2249 VIXL_DECL_PRINT_REG_SCALAR_FP(H),
2250 VIXL_DECL_PRINT_REG_SCALAR_FP(S),
2251 VIXL_DECL_PRINT_REG_SCALAR_FP(D),
2252 VIXL_DECL_PRINT_REG_SCALAR(Q),
2253 #undef VIXL_DECL_PRINT_REG_SCALAR
2254 #undef VIXL_DECL_PRINT_REG_SCALAR_FP
2255
2256 #define VIXL_DECL_PRINT_REG_NEON(count, type, size) \
2257 kPrintReg##count##type = kPrintRegLaneSize##type | kPrintRegAs##size, \
2258 kPrintReg##count##type##Partial = kPrintReg##count##type | kPrintRegPartial
2259 #define VIXL_DECL_PRINT_REG_NEON_FP(count, type, size) \
2260 VIXL_DECL_PRINT_REG_NEON(count, type, size) \
2261 , kPrintReg##count##type##FP = kPrintReg##count##type | kPrintRegAsFP, \
2262 kPrintReg##count##type##PartialFP = \
2263 kPrintReg##count##type##Partial | kPrintRegAsFP
2264 VIXL_DECL_PRINT_REG_NEON(1, B, Scalar),
2265 VIXL_DECL_PRINT_REG_NEON(8, B, DVector),
2266 VIXL_DECL_PRINT_REG_NEON(16, B, QVector),
2267 VIXL_DECL_PRINT_REG_NEON_FP(1, H, Scalar),
2268 VIXL_DECL_PRINT_REG_NEON_FP(4, H, DVector),
2269 VIXL_DECL_PRINT_REG_NEON_FP(8, H, QVector),
2270 VIXL_DECL_PRINT_REG_NEON_FP(1, S, Scalar),
2271 VIXL_DECL_PRINT_REG_NEON_FP(2, S, DVector),
2272 VIXL_DECL_PRINT_REG_NEON_FP(4, S, QVector),
2273 VIXL_DECL_PRINT_REG_NEON_FP(1, D, Scalar),
2274 VIXL_DECL_PRINT_REG_NEON_FP(2, D, QVector),
2275 VIXL_DECL_PRINT_REG_NEON(1, Q, Scalar),
2276 #undef VIXL_DECL_PRINT_REG_NEON
2277 #undef VIXL_DECL_PRINT_REG_NEON_FP
2278
2279 #define VIXL_DECL_PRINT_REG_SVE(type) \
2280 kPrintRegVn##type = kPrintRegLaneSize##type | kPrintRegAsSVEVector, \
2281 kPrintRegVn##type##Partial = kPrintRegVn##type | kPrintRegPartial
2282 #define VIXL_DECL_PRINT_REG_SVE_FP(type) \
2283 VIXL_DECL_PRINT_REG_SVE(type) \
2284 , kPrintRegVn##type##FP = kPrintRegVn##type | kPrintRegAsFP, \
2285 kPrintRegVn##type##PartialFP = kPrintRegVn##type##Partial | kPrintRegAsFP
2286 VIXL_DECL_PRINT_REG_SVE(B),
2287 VIXL_DECL_PRINT_REG_SVE_FP(H),
2288 VIXL_DECL_PRINT_REG_SVE_FP(S),
2289 VIXL_DECL_PRINT_REG_SVE_FP(D),
2290 VIXL_DECL_PRINT_REG_SVE(Q)
2291 #undef VIXL_DECL_PRINT_REG_SVE
2292 #undef VIXL_DECL_PRINT_REG_SVE_FP
2293 };
2294
2295 // Return `format` with the kPrintRegPartial flag set.
2296 PrintRegisterFormat GetPrintRegPartial(PrintRegisterFormat format) {
2297 // Every PrintRegisterFormat has a kPrintRegPartial counterpart, so the
2298 // result of this cast will always be well-defined.
2299 return static_cast<PrintRegisterFormat>(format | kPrintRegPartial);
2300 }
2301
2302 // For SVE formats, return the format of a Q register part of it.
2303 PrintRegisterFormat GetPrintRegAsQChunkOfSVE(PrintRegisterFormat format) {
2304 VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
2305 // Keep the FP and lane size fields.
2306 int q_format = format & (kPrintRegLaneSizeMask | kPrintRegAsFP);
2307 // The resulting format must always be partial, because we're not formatting
2308 // the whole Z register.
2309 q_format |= (kPrintRegAsQVector | kPrintRegPartial);
2310
2311 // This cast is always safe because NEON QVector formats support every
2312 // combination of FP and lane size that SVE formats do.
2313 return static_cast<PrintRegisterFormat>(q_format);
2314 }
2315
2316 unsigned GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format) {
2317 VIXL_ASSERT((format & kPrintRegLaneSizeMask) != kPrintRegLaneSizeUnknown);
2318 return (format & kPrintRegLaneSizeMask) >> kPrintRegLaneSizeOffset;
2319 }
2320
2321 unsigned GetPrintRegLaneSizeInBytes(PrintRegisterFormat format) {
2322 return 1 << GetPrintRegLaneSizeInBytesLog2(format);
2323 }
2324
2325 unsigned GetPrintRegSizeInBytesLog2(PrintRegisterFormat format) {
2326 switch (format & kPrintRegAsVectorMask) {
2327 case kPrintRegAsScalar:
2328 return GetPrintRegLaneSizeInBytesLog2(format);
2329 case kPrintRegAsDVector:
2330 return kDRegSizeInBytesLog2;
2331 case kPrintRegAsQVector:
2332 return kQRegSizeInBytesLog2;
2333 default:
2334 case kPrintRegAsSVEVector:
2335 // We print SVE vectors in Q-sized chunks. These need special handling,
2336 // and it's probably an error to call this function in that case.
2337 VIXL_UNREACHABLE();
2338 return kQRegSizeInBytesLog2;
2339 }
2340 }
2341
2342 unsigned GetPrintRegSizeInBytes(PrintRegisterFormat format) {
2343 return 1 << GetPrintRegSizeInBytesLog2(format);
2344 }
2345
2346 unsigned GetPrintRegSizeInBitsLog2(PrintRegisterFormat format) {
2347 return GetPrintRegSizeInBytesLog2(format) + kBitsPerByteLog2;
2348 }
2349
2350 unsigned GetPrintRegSizeInBits(PrintRegisterFormat format) {
2351 return 1 << GetPrintRegSizeInBitsLog2(format);
2352 }
2353
2354 const char* GetPartialRegSuffix(PrintRegisterFormat format) {
2355 switch (GetPrintRegSizeInBitsLog2(format)) {
2356 case kBRegSizeLog2:
2357 return "<7:0>";
2358 case kHRegSizeLog2:
2359 return "<15:0>";
2360 case kSRegSizeLog2:
2361 return "<31:0>";
2362 case kDRegSizeLog2:
2363 return "<63:0>";
2364 case kQRegSizeLog2:
2365 return "<127:0>";
2366 }
2367 VIXL_UNREACHABLE();
2368 return "<UNKNOWN>";
2369 }
2370
2371 unsigned GetPrintRegLaneCount(PrintRegisterFormat format) {
2372 unsigned reg_size_log2 = GetPrintRegSizeInBytesLog2(format);
2373 unsigned lane_size_log2 = GetPrintRegLaneSizeInBytesLog2(format);
2374 VIXL_ASSERT(reg_size_log2 >= lane_size_log2);
2375 return 1 << (reg_size_log2 - lane_size_log2);
2376 }
2377
2378 uint16_t GetPrintRegLaneMask(PrintRegisterFormat format) {
2379 int print_as = format & kPrintRegAsVectorMask;
2380 if (print_as == kPrintRegAsScalar) return 1;
2381
2382 // Vector formats, including SVE formats printed in Q-sized chunks.
2383 static const uint16_t masks[] = {0xffff, 0x5555, 0x1111, 0x0101, 0x0001};
2384 unsigned size_in_bytes_log2 = GetPrintRegLaneSizeInBytesLog2(format);
2385 VIXL_ASSERT(size_in_bytes_log2 < ArrayLength(masks));
2386 uint16_t mask = masks[size_in_bytes_log2];
2387
2388 // Exclude lanes that aren't visible in D vectors.
2389 if (print_as == kPrintRegAsDVector) mask &= 0x00ff;
2390 return mask;
2391 }
2392
2393 PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned reg_size,
2394 unsigned lane_size);
2395
2396 PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned size) {
2397 return GetPrintRegisterFormatForSize(size, size);
2398 }
2399
2400 PrintRegisterFormat GetPrintRegisterFormatForSizeFP(unsigned size) {
2401 switch (size) {
2402 default:
2403 VIXL_UNREACHABLE();
2404 return kPrintDReg;
2405 case kDRegSizeInBytes:
2406 return kPrintDReg;
2407 case kSRegSizeInBytes:
2408 return kPrintSReg;
2409 case kHRegSizeInBytes:
2410 return kPrintHReg;
2411 }
2412 }
2413
2414 PrintRegisterFormat GetPrintRegisterFormatTryFP(PrintRegisterFormat format) {
2415 if ((GetPrintRegLaneSizeInBytes(format) == kHRegSizeInBytes) ||
2416 (GetPrintRegLaneSizeInBytes(format) == kSRegSizeInBytes) ||
2417 (GetPrintRegLaneSizeInBytes(format) == kDRegSizeInBytes)) {
2418 return static_cast<PrintRegisterFormat>(format | kPrintRegAsFP);
2419 }
2420 return format;
2421 }
2422
2423 PrintRegisterFormat GetPrintRegisterFormatForSizeTryFP(unsigned size) {
2424 return GetPrintRegisterFormatTryFP(GetPrintRegisterFormatForSize(size));
2425 }
2426
2427 template <typename T>
2428 PrintRegisterFormat GetPrintRegisterFormat(T value) {
2429 return GetPrintRegisterFormatForSize(sizeof(value));
2430 }
2431
2432 PrintRegisterFormat GetPrintRegisterFormat(double value) {
2433 VIXL_STATIC_ASSERT(sizeof(value) == kDRegSizeInBytes);
2434 return GetPrintRegisterFormatForSizeFP(sizeof(value));
2435 }
2436
2437 PrintRegisterFormat GetPrintRegisterFormat(float value) {
2438 VIXL_STATIC_ASSERT(sizeof(value) == kSRegSizeInBytes);
2439 return GetPrintRegisterFormatForSizeFP(sizeof(value));
2440 }
2441
2442 PrintRegisterFormat GetPrintRegisterFormat(Float16 value) {
2443 VIXL_STATIC_ASSERT(sizeof(Float16ToRawbits(value)) == kHRegSizeInBytes);
2444 return GetPrintRegisterFormatForSizeFP(sizeof(Float16ToRawbits(value)));
2445 }
2446
2447 PrintRegisterFormat GetPrintRegisterFormat(VectorFormat vform);
2448 PrintRegisterFormat GetPrintRegisterFormatFP(VectorFormat vform);
2449
2450 // Print all registers of the specified types.
2451 void PrintRegisters();
2452 void PrintVRegisters();
2453 void PrintZRegisters();
2454 void PrintSystemRegisters();
2455
2456 // As above, but only print the registers that have been updated.
2457 void PrintWrittenRegisters();
2458 void PrintWrittenVRegisters();
2459 void PrintWrittenPRegisters();
2460
2461 // As above, but respect LOG_REG and LOG_VREG.
2462 void LogWrittenRegisters() {
2463 if (ShouldTraceRegs()) PrintWrittenRegisters();
2464 }
2465 void LogWrittenVRegisters() {
2466 if (ShouldTraceVRegs()) PrintWrittenVRegisters();
2467 }
2468 void LogWrittenPRegisters() {
2469 if (ShouldTraceVRegs()) PrintWrittenPRegisters();
2470 }
2471 void LogAllWrittenRegisters() {
2472 LogWrittenRegisters();
2473 LogWrittenVRegisters();
2474 LogWrittenPRegisters();
2475 }
2476
2477 // The amount of space to leave for a register name. This is used to keep the
2478 // values vertically aligned. The longest register name has the form
2479 // "z31<2047:1920>". The total overall value indentation must also take into
2480 // account the fixed formatting: "# {name}: 0x{value}".
2481 static const int kPrintRegisterNameFieldWidth = 14;
2482
2483 // Print whole, individual register values.
2484 // - The format can be used to restrict how much of the register is printed,
2485 // but such formats indicate that the unprinted high-order bits are zero and
2486 // these helpers will assert that.
2487 // - If the format includes the kPrintRegAsFP flag then human-friendly FP
2488 // value annotations will be printed.
2489 // - The suffix can be used to add annotations (such as memory access
2490 // details), or to suppress the newline.
2491 void PrintRegister(int code,
2492 PrintRegisterFormat format = kPrintXReg,
2493 const char* suffix = "\n");
2494 void PrintVRegister(int code,
2495 PrintRegisterFormat format = kPrintReg1Q,
2496 const char* suffix = "\n");
2497 // PrintZRegister and PrintPRegister print over several lines, so they cannot
2498 // allow the suffix to be overridden.
2499 void PrintZRegister(int code, PrintRegisterFormat format = kPrintRegVnQ);
2500 void PrintPRegister(int code, PrintRegisterFormat format = kPrintRegVnQ);
2501 void PrintFFR(PrintRegisterFormat format = kPrintRegVnQ);
2502 // Print a single Q-sized part of a Z register, or the corresponding two-byte
2503 // part of a P register. These print single lines, and therefore allow the
2504 // suffix to be overridden. The format must include the kPrintRegPartial flag.
2505 void PrintPartialZRegister(int code,
2506 int q_index,
2507 PrintRegisterFormat format = kPrintRegVnQ,
2508 const char* suffix = "\n");
2509 void PrintPartialPRegister(int code,
2510 int q_index,
2511 PrintRegisterFormat format = kPrintRegVnQ,
2512 const char* suffix = "\n");
2513 void PrintPartialPRegister(const char* name,
2514 const SimPRegister& reg,
2515 int q_index,
2516 PrintRegisterFormat format = kPrintRegVnQ,
2517 const char* suffix = "\n");
2518
2519 // Like Print*Register (above), but respect trace parameters.
2520 void LogRegister(unsigned code, PrintRegisterFormat format) {
2521 if (ShouldTraceRegs()) PrintRegister(code, format);
2522 }
2523 void LogVRegister(unsigned code, PrintRegisterFormat format) {
2524 if (ShouldTraceVRegs()) PrintVRegister(code, format);
2525 }
2526 void LogZRegister(unsigned code, PrintRegisterFormat format) {
2527 if (ShouldTraceVRegs()) PrintZRegister(code, format);
2528 }
2529 void LogPRegister(unsigned code, PrintRegisterFormat format) {
2530 if (ShouldTraceVRegs()) PrintPRegister(code, format);
2531 }
2532 void LogFFR(PrintRegisterFormat format) {
2533 if (ShouldTraceVRegs()) PrintFFR(format);
2534 }
2535
2536 // Other state updates, including system registers.
2537 void PrintSystemRegister(SystemRegister id);
2538 void PrintTakenBranch(const Instruction* target);
2539 void PrintGCS(bool is_push, uint64_t addr, size_t entry);
2540 void LogSystemRegister(SystemRegister id) {
2541 if (ShouldTraceSysRegs()) PrintSystemRegister(id);
2542 }
2543 void LogTakenBranch(const Instruction* target) {
2544 if (ShouldTraceBranches()) PrintTakenBranch(target);
2545 }
2546 void LogGCS(bool is_push, uint64_t addr, size_t entry) {
2547 if (ShouldTraceSysRegs()) PrintGCS(is_push, addr, entry);
2548 }
2549
2550 // Trace memory accesses.
2551
2552 // Common, contiguous register accesses (such as for scalars).
2553 // The *Write variants automatically set kPrintRegPartial on the format.
2554 void PrintRead(int rt_code, PrintRegisterFormat format, uintptr_t address);
2555 void PrintExtendingRead(int rt_code,
2556 PrintRegisterFormat format,
2557 int access_size_in_bytes,
2558 uintptr_t address);
2559 void PrintWrite(int rt_code, PrintRegisterFormat format, uintptr_t address);
2560 void PrintVRead(int rt_code, PrintRegisterFormat format, uintptr_t address);
2561 void PrintVWrite(int rt_code, PrintRegisterFormat format, uintptr_t address);
2562 // Simple, unpredicated SVE accesses always access the whole vector, and never
2563 // know the lane type, so there's no need to accept a `format`.
2564 void PrintZRead(int rt_code, uintptr_t address) {
2565 vregisters_[rt_code].NotifyRegisterLogged();
2566 PrintZAccess(rt_code, "<-", address);
2567 }
2568 void PrintZWrite(int rt_code, uintptr_t address) {
2569 PrintZAccess(rt_code, "->", address);
2570 }
2571 void PrintPRead(int rt_code, uintptr_t address) {
2572 pregisters_[rt_code].NotifyRegisterLogged();
2573 PrintPAccess(rt_code, "<-", address);
2574 }
2575 void PrintPWrite(int rt_code, uintptr_t address) {
2576 PrintPAccess(rt_code, "->", address);
2577 }
2578 void PrintWriteU64(uint64_t x, uintptr_t address) {
2579 fprintf(stream_,
2580 "# 0x%016" PRIx64 " -> %s0x%016" PRIxPTR "%s\n",
2581 x,
2582 clr_memory_address,
2583 address,
2584 clr_normal);
2585 }
2586
2587 // Like Print* (above), but respect GetTraceParameters().
2588 void LogRead(int rt_code, PrintRegisterFormat format, uintptr_t address) {
2589 if (ShouldTraceRegs()) PrintRead(rt_code, format, address);
2590 }
2591 void LogExtendingRead(int rt_code,
2592 PrintRegisterFormat format,
2593 int access_size_in_bytes,
2594 uintptr_t address) {
2595 if (ShouldTraceRegs()) {
2596 PrintExtendingRead(rt_code, format, access_size_in_bytes, address);
2597 }
2598 }
2599 void LogWrite(int rt_code, PrintRegisterFormat format, uintptr_t address) {
2600 if (ShouldTraceWrites()) PrintWrite(rt_code, format, address);
2601 }
2602 void LogVRead(int rt_code, PrintRegisterFormat format, uintptr_t address) {
2603 if (ShouldTraceVRegs()) PrintVRead(rt_code, format, address);
2604 }
2605 void LogVWrite(int rt_code, PrintRegisterFormat format, uintptr_t address) {
2606 if (ShouldTraceWrites()) PrintVWrite(rt_code, format, address);
2607 }
2608 void LogZRead(int rt_code, uintptr_t address) {
2609 if (ShouldTraceVRegs()) PrintZRead(rt_code, address);
2610 }
2611 void LogZWrite(int rt_code, uintptr_t address) {
2612 if (ShouldTraceWrites()) PrintZWrite(rt_code, address);
2613 }
2614 void LogPRead(int rt_code, uintptr_t address) {
2615 if (ShouldTraceVRegs()) PrintPRead(rt_code, address);
2616 }
2617 void LogPWrite(int rt_code, uintptr_t address) {
2618 if (ShouldTraceWrites()) PrintPWrite(rt_code, address);
2619 }
2620 void LogWriteU64(uint64_t x, uintptr_t address) {
2621 if (ShouldTraceWrites()) PrintWriteU64(x, address);
2622 }
2623 void LogMemTransfer(uintptr_t dst, uintptr_t src, uint8_t value) {
2624 if (ShouldTraceWrites()) PrintMemTransfer(dst, src, value);
2625 }
2626 // Helpers for the above, where the access operation is parameterised.
2627 // - For loads, set op = "<-".
2628 // - For stores, set op = "->".
2629 void PrintAccess(int rt_code,
2630 PrintRegisterFormat format,
2631 const char* op,
2632 uintptr_t address);
2633 void PrintVAccess(int rt_code,
2634 PrintRegisterFormat format,
2635 const char* op,
2636 uintptr_t address);
2637 void PrintMemTransfer(uintptr_t dst, uintptr_t src, uint8_t value);
2638 // Simple, unpredicated SVE accesses always access the whole vector, and never
2639 // know the lane type, so these don't accept a `format`.
2640 void PrintZAccess(int rt_code, const char* op, uintptr_t address);
2641 void PrintPAccess(int rt_code, const char* op, uintptr_t address);
2642
2643 // Multiple-structure accesses.
2644 void PrintVStructAccess(int rt_code,
2645 int reg_count,
2646 PrintRegisterFormat format,
2647 const char* op,
2648 uintptr_t address);
2649 // Single-structure (single-lane) accesses.
2650 void PrintVSingleStructAccess(int rt_code,
2651 int reg_count,
2652 int lane,
2653 PrintRegisterFormat format,
2654 const char* op,
2655 uintptr_t address);
2656 // Replicating accesses.
2657 void PrintVReplicatingStructAccess(int rt_code,
2658 int reg_count,
2659 PrintRegisterFormat format,
2660 const char* op,
2661 uintptr_t address);
2662
2663 // Multiple-structure accesses.
2664 void PrintZStructAccess(int rt_code,
2665 int reg_count,
2666 const LogicPRegister& pg,
2667 PrintRegisterFormat format,
2668 int msize_in_bytes,
2669 const char* op,
2670 const LogicSVEAddressVector& addr);
2671
2672 // Register-printing helper for all structured accessors.
2673 //
2674 // All lanes (according to `format`) are printed, but lanes indicated by
2675 // `focus_mask` are of particular interest. Each bit corresponds to a byte in
2676 // the printed register, in a manner similar to SVE's predicates. Currently,
2677 // this is used to determine when to print human-readable FP annotations.
2678 void PrintVRegistersForStructuredAccess(int rt_code,
2679 int reg_count,
2680 uint16_t focus_mask,
2681 PrintRegisterFormat format);
2682
2683 // As for the VRegister variant, but print partial Z register names.
2684 void PrintZRegistersForStructuredAccess(int rt_code,
2685 int q_index,
2686 int reg_count,
2687 uint16_t focus_mask,
2688 PrintRegisterFormat format);
2689
2690 // Print part of a memory access. This should be used for annotating
2691 // non-trivial accesses, such as structured or sign-extending loads. Call
2692 // Print*Register (or Print*RegistersForStructuredAccess), then
2693 // PrintPartialAccess for each contiguous access that makes up the
2694 // instruction.
2695 //
2696 // access_mask:
2697 // The lanes to be printed. Each bit corresponds to a byte in the printed
2698 // register, in a manner similar to SVE's predicates, except that the
2699 // lane size is not respected when interpreting lane_mask: unaligned bits
2700 // must be zeroed.
2701 //
2702 // This function asserts that this mask is non-zero.
2703 //
2704 // future_access_mask:
2705 // The lanes to be printed by a future invocation. This must be specified
2706 // because vertical lines are drawn for partial accesses that haven't yet
2707 // been printed. The format is the same as for accessed_mask.
2708 //
2709 // If a lane is active in both `access_mask` and `future_access_mask`,
2710 // `access_mask` takes precedence.
2711 //
2712 // struct_element_count:
2713 // The number of elements in each structure. For non-structured accesses,
2714 // set this to one. Along with lane_size_in_bytes, this is used determine
2715 // the size of each access, and to format the accessed value.
2716 //
2717 // op:
2718 // For stores, use "->". For loads, use "<-".
2719 //
2720 // address:
2721 // The address of this partial access. (Not the base address of the whole
2722 // instruction.) The traced value is read from this address (according to
2723 // part_count and lane_size_in_bytes) so it must be accessible, and when
2724 // tracing stores, the store must have been executed before this function
2725 // is called.
2726 //
2727 // reg_size_in_bytes:
2728 // The size of the register being accessed. This helper is usually used
2729 // for V registers or Q-sized chunks of Z registers, so that is the
2730 // default, but it is possible to use this to annotate X register
2731 // accesses by specifying kXRegSizeInBytes.
2732 //
2733 // The return value is a future_access_mask suitable for the next iteration,
2734 // so that it is possible to execute this in a loop, until the mask is zero.
2735 // Note that accessed_mask must still be updated by the caller for each call.
2736 uint16_t PrintPartialAccess(uint16_t access_mask,
2737 uint16_t future_access_mask,
2738 int struct_element_count,
2739 int lane_size_in_bytes,
2740 const char* op,
2741 uintptr_t address,
2742 int reg_size_in_bytes = kQRegSizeInBytes);
2743
2744 // Print an abstract register value. This works for all register types, and
2745 // can print parts of registers. This exists to ensure consistent formatting
2746 // of values.
2747 void PrintRegisterValue(const uint8_t* value,
2748 int value_size,
2749 PrintRegisterFormat format);
2750 template <typename T>
2751 void PrintRegisterValue(const T& sim_register, PrintRegisterFormat format) {
2752 PrintRegisterValue(sim_register.GetBytes(),
2753 std::min(sim_register.GetSizeInBytes(),
2754 kQRegSizeInBytes),
2755 format);
2756 }
2757
2758 // As above, but format as an SVE predicate value, using binary notation with
2759 // spaces between each bit so that they align with the Z register bytes that
2760 // they predicate.
2761 void PrintPRegisterValue(uint16_t value);
2762
2763 void PrintRegisterValueFPAnnotations(const uint8_t* value,
2764 uint16_t lane_mask,
2765 PrintRegisterFormat format);
2766 template <typename T>
2767 void PrintRegisterValueFPAnnotations(const T& sim_register,
2768 uint16_t lane_mask,
2769 PrintRegisterFormat format) {
2770 PrintRegisterValueFPAnnotations(sim_register.GetBytes(), lane_mask, format);
2771 }
2772 template <typename T>
2773 void PrintRegisterValueFPAnnotations(const T& sim_register,
2774 PrintRegisterFormat format) {
2775 PrintRegisterValueFPAnnotations(sim_register.GetBytes(),
2776 GetPrintRegLaneMask(format),
2777 format);
2778 }
2779
2780 VIXL_NO_RETURN void DoUnreachable(const Instruction* instr);
2781 void DoTrace(const Instruction* instr);
2782 void DoLog(const Instruction* instr);
2783
2784 static const char* WRegNameForCode(unsigned code,
2785 Reg31Mode mode = Reg31IsZeroRegister);
2786 static const char* XRegNameForCode(unsigned code,
2787 Reg31Mode mode = Reg31IsZeroRegister);
2788 static const char* BRegNameForCode(unsigned code);
2789 static const char* HRegNameForCode(unsigned code);
2790 static const char* SRegNameForCode(unsigned code);
2791 static const char* DRegNameForCode(unsigned code);
2792 static const char* VRegNameForCode(unsigned code);
2793 static const char* ZRegNameForCode(unsigned code);
2794 static const char* PRegNameForCode(unsigned code);
2795
2796 bool IsColouredTrace() const { return coloured_trace_; }
2797 VIXL_DEPRECATED("IsColouredTrace", bool coloured_trace() const) {
2798 return IsColouredTrace();
2799 }
2800
2801 void SetColouredTrace(bool value);
2802 VIXL_DEPRECATED("SetColouredTrace", void set_coloured_trace(bool value)) {
2803 SetColouredTrace(value);
2804 }
2805
2806 // Values for traces parameters defined in simulator-constants-aarch64.h in
2807 // enum TraceParameters.
2808 int GetTraceParameters() const { return trace_parameters_; }
2809 VIXL_DEPRECATED("GetTraceParameters", int trace_parameters() const) {
2810 return GetTraceParameters();
2811 }
2812
2813 bool ShouldTraceWrites() const {
2814 return (GetTraceParameters() & LOG_WRITE) != 0;
2815 }
2816 bool ShouldTraceRegs() const {
2817 return (GetTraceParameters() & LOG_REGS) != 0;
2818 }
2819 bool ShouldTraceVRegs() const {
2820 return (GetTraceParameters() & LOG_VREGS) != 0;
2821 }
2822 bool ShouldTraceSysRegs() const {
2823 return (GetTraceParameters() & LOG_SYSREGS) != 0;
2824 }
2825 bool ShouldTraceBranches() const {
2826 return (GetTraceParameters() & LOG_BRANCH) != 0;
2827 }
2828
2829 void SetTraceParameters(int parameters);
2830 VIXL_DEPRECATED("SetTraceParameters",
2831 void set_trace_parameters(int parameters)) {
2832 SetTraceParameters(parameters);
2833 }
2834
2835 // Clear the simulated local monitor to force the next store-exclusive
2836 // instruction to fail.
2837 void ClearLocalMonitor() { local_monitor_.Clear(); }
2838
2839 void SilenceExclusiveAccessWarning() {
2840 print_exclusive_access_warning_ = false;
2841 }
2842
2843 void CheckIsValidUnalignedAtomicAccess(int rn,
2844 uint64_t address,
2845 unsigned access_size) {
2846 // Verify that the address is available to the host.
2847 VIXL_ASSERT(address == static_cast<uintptr_t>(address));
2848
2849 if (GetCPUFeatures()->Has(CPUFeatures::kUSCAT)) {
2850 // Check that the access falls entirely within one atomic access granule.
2851 if (AlignDown(address, kAtomicAccessGranule) !=
2852 AlignDown(address + access_size - 1, kAtomicAccessGranule)) {
2853 VIXL_ALIGNMENT_EXCEPTION();
2854 }
2855 } else {
2856 // Check that the access is aligned.
2857 if (AlignDown(address, access_size) != address) {
2858 VIXL_ALIGNMENT_EXCEPTION();
2859 }
2860 }
2861
2862 // The sp must be aligned to 16 bytes when it is accessed.
2863 if ((rn == kSpRegCode) && (AlignDown(address, 16) != address)) {
2864 VIXL_ALIGNMENT_EXCEPTION();
2865 }
2866 }
2867
2868 enum PointerType { kDataPointer, kInstructionPointer };
2869
2870 struct PACKey {
2871 uint64_t high;
2872 uint64_t low;
2873 int number;
2874 };
2875
2876 // Current implementation is that all pointers are tagged.
2877 bool HasTBI(uint64_t ptr, PointerType type) {
2878 USE(ptr, type);
2879 return true;
2880 }
2881
2882 // Current implementation uses 48-bit virtual addresses.
2883 int GetBottomPACBit(uint64_t ptr, int ttbr) {
2884 USE(ptr, ttbr);
2885 VIXL_ASSERT((ttbr == 0) || (ttbr == 1));
2886 return 48;
2887 }
2888
2889 // The top PAC bit is 55 for the purposes of relative bit fields with TBI,
2890 // however bit 55 is the TTBR bit regardless of TBI so isn't part of the PAC
2891 // codes in pointers.
2892 int GetTopPACBit(uint64_t ptr, PointerType type) {
2893 return HasTBI(ptr, type) ? 55 : 63;
2894 }
2895
2896 // Armv8.3 Pointer authentication helpers.
2897 uint64_t CalculatePACMask(uint64_t ptr, PointerType type, int ext_bit);
2898 uint64_t ComputePAC(uint64_t data, uint64_t context, PACKey key);
2899 uint64_t AuthPAC(uint64_t ptr,
2900 uint64_t context,
2901 PACKey key,
2902 PointerType type);
2903 uint64_t AddPAC(uint64_t ptr, uint64_t context, PACKey key, PointerType type);
2904 uint64_t StripPAC(uint64_t ptr, PointerType type);
2905 void PACHelper(int dst,
2906 int src,
2907 PACKey key,
2908 decltype(&Simulator::AddPAC) pac_fn);
2909
2910 // Armv8.5 MTE helpers.
2911 uint64_t ChooseNonExcludedTag(uint64_t tag,
2912 uint64_t offset,
2913 uint64_t exclude = 0) {
2914 VIXL_ASSERT(IsUint4(tag) && IsUint4(offset) && IsUint16(exclude));
2915
2916 if (exclude == 0xffff) {
2917 return 0;
2918 }
2919
2920 if (offset == 0) {
2921 while ((exclude & (uint64_t{1} << tag)) != 0) {
2922 tag = (tag + 1) % 16;
2923 }
2924 }
2925
2926 while (offset > 0) {
2927 offset--;
2928 tag = (tag + 1) % 16;
2929 while ((exclude & (uint64_t{1} << tag)) != 0) {
2930 tag = (tag + 1) % 16;
2931 }
2932 }
2933 return tag;
2934 }
2935
2936 uint64_t GetAddressWithAllocationTag(uint64_t addr, uint64_t tag) {
2937 VIXL_ASSERT(IsUint4(tag));
2938 return (addr & ~(UINT64_C(0xf) << 56)) | (tag << 56);
2939 }
2940
2941 #if __linux__
2942 #define VIXL_HAS_SIMULATED_MMAP
2943 // Create or remove a mapping with memory protection. Memory attributes such
2944 // as MTE and BTI are represented by metadata in Simulator.
2945 void* Mmap(
2946 void* address, size_t length, int prot, int flags, int fd, off_t offset);
2947
2948 int Munmap(void* address, size_t length, int prot);
2949 #endif
2950
2951 // The common CPUFeatures interface with the set of available features.
2952
2953 CPUFeatures* GetCPUFeatures() {
2954 return cpu_features_auditor_.GetCPUFeatures();
2955 }
2956
2957 void SetCPUFeatures(const CPUFeatures& cpu_features) {
2958 cpu_features_auditor_.SetCPUFeatures(cpu_features);
2959 }
2960
2961 // The set of features that the simulator has encountered.
2962 const CPUFeatures& GetSeenFeatures() {
2963 return cpu_features_auditor_.GetSeenFeatures();
2964 }
2965 void ResetSeenFeatures() { cpu_features_auditor_.ResetSeenFeatures(); }
2966
2967 // Runtime call emulation support.
2968 // It requires VIXL's ABI features, and C++11 or greater.
2969 // Also, the initialisation of the tuples in RuntimeCall(Non)Void is incorrect
2970 // in GCC before 4.9.1: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51253
2971 #if defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
2972 (defined(_MSC_VER) || defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
2973
2974 #define VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT
2975
2976 // The implementation of the runtime call helpers require the functionality
2977 // provided by `std::index_sequence`. It is only available from C++14, but
2978 // we want runtime call simulation to work from C++11, so we emulate if
2979 // necessary.
2980 #if __cplusplus >= 201402L
2981 template <std::size_t... I>
2982 using local_index_sequence = std::index_sequence<I...>;
2983 template <typename... P>
2984 using __local_index_sequence_for = std::index_sequence_for<P...>;
2985 #else
2986 // Emulate the behaviour of `std::index_sequence` and
2987 // `std::index_sequence_for`.
2988 // Naming follow the `std` names, prefixed with `emulated_`.
2989 template <size_t... I>
2990 struct emulated_index_sequence {};
2991
2992 // A recursive template to create a sequence of indexes.
2993 // The base case (for `N == 0`) is declared outside of the class scope, as
2994 // required by C++.
2995 template <std::size_t N, size_t... I>
2996 struct emulated_make_index_sequence_helper
2997 : emulated_make_index_sequence_helper<N - 1, N - 1, I...> {};
2998
2999 template <std::size_t N>
3000 struct emulated_make_index_sequence : emulated_make_index_sequence_helper<N> {
3001 };
3002
3003 template <typename... P>
3004 struct emulated_index_sequence_for
3005 : emulated_make_index_sequence<sizeof...(P)> {};
3006
3007 template <std::size_t... I>
3008 using local_index_sequence = emulated_index_sequence<I...>;
3009 template <typename... P>
3010 using __local_index_sequence_for = emulated_index_sequence_for<P...>;
3011 #endif
3012
3013 // Expand the argument tuple and perform the call.
3014 template <typename R, typename... P, std::size_t... I>
3015 R DoRuntimeCall(R (*function)(P...),
3016 std::tuple<P...> arguments,
3017 local_index_sequence<I...>) {
3018 USE(arguments);
3019 return function(std::get<I>(arguments)...);
3020 }
3021
3022 template <typename R, typename... P>
3023 void RuntimeCallNonVoid(R (*function)(P...)) {
3024 ABI abi;
3025 std::tuple<P...> argument_operands{
3026 ReadGenericOperand<P>(abi.GetNextParameterGenericOperand<P>())...};
3027 R return_value = DoRuntimeCall(function,
3028 argument_operands,
3029 __local_index_sequence_for<P...>{});
3030 bool succeeded =
3031 WriteGenericOperand(abi.GetReturnGenericOperand<R>(), return_value);
3032 USE(succeeded);
3033 VIXL_ASSERT(succeeded);
3034 }
3035
3036 template <typename R, typename... P>
3037 void RuntimeCallVoid(R (*function)(P...)) {
3038 ABI abi;
3039 std::tuple<P...> argument_operands{
3040 ReadGenericOperand<P>(abi.GetNextParameterGenericOperand<P>())...};
3041 DoRuntimeCall(function,
3042 argument_operands,
3043 __local_index_sequence_for<P...>{});
3044 }
3045
3046 // We use `struct` for `void` return type specialisation.
3047 template <typename R, typename... P>
3048 struct RuntimeCallStructHelper {
3049 static void Wrapper(Simulator* simulator, uintptr_t function_pointer) {
3050 R (*function)(P...) = reinterpret_cast<R (*)(P...)>(function_pointer);
3051 simulator->RuntimeCallNonVoid(function);
3052 }
3053 };
3054
3055 // Partial specialization when the return type is `void`.
3056 template <typename... P>
3057 struct RuntimeCallStructHelper<void, P...> {
3058 static void Wrapper(Simulator* simulator, uintptr_t function_pointer) {
3059 void (*function)(P...) =
3060 reinterpret_cast<void (*)(P...)>(function_pointer);
3061 simulator->RuntimeCallVoid(function);
3062 }
3063 };
3064 #endif
3065
3066 // Configure the simulated value of 'VL', which is the size of a Z register.
3067 // Because this cannot occur during a program's lifetime, this function also
3068 // resets the SVE registers.
3069 void SetVectorLengthInBits(unsigned vector_length);
3070
3071 unsigned GetVectorLengthInBits() const { return vector_length_; }
3072 unsigned GetVectorLengthInBytes() const {
3073 VIXL_ASSERT((vector_length_ % kBitsPerByte) == 0);
3074 return vector_length_ / kBitsPerByte;
3075 }
3076 unsigned GetPredicateLengthInBits() const {
3077 VIXL_ASSERT((GetVectorLengthInBits() % kZRegBitsPerPRegBit) == 0);
3078 return GetVectorLengthInBits() / kZRegBitsPerPRegBit;
3079 }
3080 unsigned GetPredicateLengthInBytes() const {
3081 VIXL_ASSERT((GetVectorLengthInBytes() % kZRegBitsPerPRegBit) == 0);
3082 return GetVectorLengthInBytes() / kZRegBitsPerPRegBit;
3083 }
3084
3085 unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) const {
3086 if (IsSVEFormat(vform)) {
3087 return GetVectorLengthInBits();
3088 } else {
3089 return vixl::aarch64::RegisterSizeInBitsFromFormat(vform);
3090 }
3091 }
3092
3093 unsigned RegisterSizeInBytesFromFormat(VectorFormat vform) const {
3094 unsigned size_in_bits = RegisterSizeInBitsFromFormat(vform);
3095 VIXL_ASSERT((size_in_bits % kBitsPerByte) == 0);
3096 return size_in_bits / kBitsPerByte;
3097 }
3098
3099 int LaneCountFromFormat(VectorFormat vform) const {
3100 if (IsSVEFormat(vform)) {
3101 return GetVectorLengthInBits() / LaneSizeInBitsFromFormat(vform);
3102 } else {
3103 return vixl::aarch64::LaneCountFromFormat(vform);
3104 }
3105 }
3106
3107 bool IsFirstActive(VectorFormat vform,
3108 const LogicPRegister& mask,
3109 const LogicPRegister& bits) {
3110 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3111 if (mask.IsActive(vform, i)) {
3112 return bits.IsActive(vform, i);
3113 }
3114 }
3115 return false;
3116 }
3117
3118 bool AreNoneActive(VectorFormat vform,
3119 const LogicPRegister& mask,
3120 const LogicPRegister& bits) {
3121 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3122 if (mask.IsActive(vform, i) && bits.IsActive(vform, i)) {
3123 return false;
3124 }
3125 }
3126 return true;
3127 }
3128
3129 bool IsLastActive(VectorFormat vform,
3130 const LogicPRegister& mask,
3131 const LogicPRegister& bits) {
3132 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3133 if (mask.IsActive(vform, i)) {
3134 return bits.IsActive(vform, i);
3135 }
3136 }
3137 return false;
3138 }
3139
3140 void PredTest(VectorFormat vform,
3141 const LogicPRegister& mask,
3142 const LogicPRegister& bits) {
3143 ReadNzcv().SetN(IsFirstActive(vform, mask, bits));
3144 ReadNzcv().SetZ(AreNoneActive(vform, mask, bits));
3145 ReadNzcv().SetC(!IsLastActive(vform, mask, bits));
3146 ReadNzcv().SetV(0);
3147 LogSystemRegister(NZCV);
3148 }
3149
3150 SimPRegister& GetPTrue() { return pregister_all_true_; }
3151
3152 template <typename T>
3153 size_t CleanGranuleTag(T address, size_t length = kMTETagGranuleInBytes) {
3154 size_t count = 0;
3155 for (size_t offset = 0; offset < length; offset += kMTETagGranuleInBytes) {
3156 count +=
3157 meta_data_.CleanMTETag(reinterpret_cast<uintptr_t>(address) + offset);
3158 }
3159 size_t expected =
3160 length / kMTETagGranuleInBytes + (length % kMTETagGranuleInBytes != 0);
3161
3162 // Give a warning when the memory region that is being unmapped isn't all
3163 // either MTE protected or not.
3164 if (count != expected) {
3165 std::stringstream sstream;
3166 sstream << std::hex
3167 << "MTE WARNING : the memory region being unmapped "
3168 "starting at address 0x"
3169 << reinterpret_cast<uint64_t>(address)
3170 << "is not fully MTE protected.\n";
3171 VIXL_WARNING(sstream.str().c_str());
3172 }
3173 return count;
3174 }
3175
3176 template <typename T>
3177 void SetGranuleTag(T address,
3178 int tag,
3179 size_t length = kMTETagGranuleInBytes) {
3180 for (size_t offset = 0; offset < length; offset += kMTETagGranuleInBytes) {
3181 meta_data_.SetMTETag((uintptr_t)(address) + offset, tag);
3182 }
3183 }
3184
3185 template <typename T>
3186 int GetGranuleTag(T address) {
3187 return meta_data_.GetMTETag(address);
3188 }
3189
3190 // Generate a random address tag, and any tags specified in the input are
3191 // excluded from the selection.
3192 uint64_t GenerateRandomTag(uint16_t exclude = 0);
3193
3194 // Register a new BranchInterception object. If 'function' is branched to
3195 // (e.g: "bl function") in the future; instead, if provided, 'callback' will
3196 // be called otherwise a runtime call will be performed on 'function'.
3197 //
3198 // For example: this can be used to always perform runtime calls on
3199 // non-AArch64 functions without using the macroassembler.
3200 template <typename R, typename... P>
3201 void RegisterBranchInterception(R (*function)(P...),
3202 InterceptionCallback callback = nullptr) {
3203 meta_data_.RegisterBranchInterception(*function, callback);
3204 }
3205
3206 // Return the current output stream in use by the simulator.
3207 FILE* GetOutputStream() const { return stream_; }
3208
3209 bool IsDebuggerEnabled() const { return debugger_enabled_; }
3210
3211 void SetDebuggerEnabled(bool enabled) { debugger_enabled_ = enabled; }
3212
3213 Debugger* GetDebugger() const { return debugger_.get(); }
3214
3215 #ifdef VIXL_ENABLE_IMPLICIT_CHECKS
3216 // Returns true if the faulting instruction address (usually the program
3217 // counter or instruction pointer) comes from an internal VIXL memory access.
3218 // This can be used by signal handlers to check if a signal was raised from
3219 // the simulator (via TryMemoryAccess) before the actual
3220 // access occurs.
3221 bool IsSimulatedMemoryAccess(uintptr_t fault_pc) const {
3222 return (fault_pc ==
3223 reinterpret_cast<uintptr_t>(&_vixl_internal_ReadMemory));
3224 }
3225
3226 // Get the instruction address of the internal VIXL memory access continuation
3227 // label. Signal handlers can resume execution at this address to return to
3228 // TryMemoryAccess which will continue simulation.
3229 uintptr_t GetSignalReturnAddress() const {
3230 return reinterpret_cast<uintptr_t>(&_vixl_internal_AccessMemory_continue);
3231 }
3232
3233 // Replace the fault address reported by the kernel with the actual faulting
3234 // address.
3235 //
3236 // This is required because TryMemoryAccess reads a section of
3237 // memory 1 byte at a time meaning the fault address reported may not be the
3238 // base address of memory being accessed.
3239 void ReplaceFaultAddress(siginfo_t* siginfo, void* context) {
3240 #ifdef __x86_64__
3241 // The base address being accessed is passed in as the first argument to
3242 // _vixl_internal_ReadMemory.
3243 ucontext_t* uc = reinterpret_cast<ucontext_t*>(context);
3244 siginfo->si_addr = reinterpret_cast<void*>(uc->uc_mcontext.gregs[REG_RDI]);
3245 #else
3246 USE(siginfo);
3247 USE(context);
3248 #endif // __x86_64__
3249 }
3250 #endif // VIXL_ENABLE_IMPLICIT_CHECKS
3251
3252 protected:
3253 const char* clr_normal;
3254 const char* clr_flag_name;
3255 const char* clr_flag_value;
3256 const char* clr_reg_name;
3257 const char* clr_reg_value;
3258 const char* clr_vreg_name;
3259 const char* clr_vreg_value;
3260 const char* clr_preg_name;
3261 const char* clr_preg_value;
3262 const char* clr_memory_address;
3263 const char* clr_warning;
3264 const char* clr_warning_message;
3265 const char* clr_printf;
3266 const char* clr_branch_marker;
3267
3268 // Simulation helpers ------------------------------------
3269
3270 void ResetSystemRegisters();
3271 void ResetRegisters();
3272 void ResetVRegisters();
3273 void ResetPRegisters();
3274 void ResetFFR();
3275
3276 bool ConditionPassed(Condition cond) {
3277 switch (cond) {
3278 case eq:
3279 return ReadZ();
3280 case ne:
3281 return !ReadZ();
3282 case hs:
3283 return ReadC();
3284 case lo:
3285 return !ReadC();
3286 case mi:
3287 return ReadN();
3288 case pl:
3289 return !ReadN();
3290 case vs:
3291 return ReadV();
3292 case vc:
3293 return !ReadV();
3294 case hi:
3295 return ReadC() && !ReadZ();
3296 case ls:
3297 return !(ReadC() && !ReadZ());
3298 case ge:
3299 return ReadN() == ReadV();
3300 case lt:
3301 return ReadN() != ReadV();
3302 case gt:
3303 return !ReadZ() && (ReadN() == ReadV());
3304 case le:
3305 return !(!ReadZ() && (ReadN() == ReadV()));
3306 case nv:
3307 VIXL_FALLTHROUGH();
3308 case al:
3309 return true;
3310 default:
3311 VIXL_UNREACHABLE();
3312 return false;
3313 }
3314 }
3315
3316 bool ConditionPassed(Instr cond) {
3317 return ConditionPassed(static_cast<Condition>(cond));
3318 }
3319
3320 bool ConditionFailed(Condition cond) { return !ConditionPassed(cond); }
3321
3322 void AddSubHelper(const Instruction* instr, int64_t op2);
3323 uint64_t AddWithCarry(unsigned reg_size,
3324 bool set_flags,
3325 uint64_t left,
3326 uint64_t right,
3327 int carry_in = 0);
3328 std::pair<uint64_t, uint8_t> AddWithCarry(unsigned reg_size,
3329 uint64_t left,
3330 uint64_t right,
3331 int carry_in);
3332 vixl_uint128_t Add128(vixl_uint128_t x, vixl_uint128_t y);
3333 vixl_uint128_t Lsl128(vixl_uint128_t x, unsigned shift) const;
3334 vixl_uint128_t Eor128(vixl_uint128_t x, vixl_uint128_t y) const;
3335 vixl_uint128_t Mul64(uint64_t x, uint64_t y);
3336 vixl_uint128_t Neg128(vixl_uint128_t x);
3337 void LogicalHelper(const Instruction* instr, int64_t op2);
3338 void ConditionalCompareHelper(const Instruction* instr, int64_t op2);
3339 void LoadStoreHelper(const Instruction* instr,
3340 int64_t offset,
3341 AddrMode addrmode);
3342 void LoadStorePairHelper(const Instruction* instr, AddrMode addrmode);
3343 template <typename T>
3344 void CompareAndSwapHelper(const Instruction* instr);
3345 template <typename T>
3346 void CompareAndSwapPairHelper(const Instruction* instr);
3347 template <typename T>
3348 void AtomicMemorySimpleHelper(const Instruction* instr);
3349 template <typename T>
3350 void AtomicMemorySwapHelper(const Instruction* instr);
3351 template <typename T>
3352 void LoadAcquireRCpcHelper(const Instruction* instr);
3353 template <typename T1, typename T2>
3354 void LoadAcquireRCpcUnscaledOffsetHelper(const Instruction* instr);
3355 template <typename T>
3356 void StoreReleaseUnscaledOffsetHelper(const Instruction* instr);
3357 uintptr_t AddressModeHelper(unsigned addr_reg,
3358 int64_t offset,
3359 AddrMode addrmode);
3360 void NEONLoadStoreMultiStructHelper(const Instruction* instr,
3361 AddrMode addr_mode);
3362 void NEONLoadStoreSingleStructHelper(const Instruction* instr,
3363 AddrMode addr_mode);
3364 template <uint32_t mops_type>
3365 void MOPSPHelper(const Instruction* instr) {
3366 VIXL_ASSERT(instr->IsConsistentMOPSTriplet<mops_type>());
3367
3368 int d = instr->GetRd();
3369 int n = instr->GetRn();
3370 int s = instr->GetRs();
3371
3372 // Aliased registers and xzr are disallowed for Xd and Xn.
3373 if ((d == n) || (d == s) || (n == s) || (d == 31) || (n == 31)) {
3374 VisitUnallocated(instr);
3375 }
3376
3377 // Additionally, Xs may not be xzr for cpy.
3378 if ((mops_type == "cpy"_h) && (s == 31)) {
3379 VisitUnallocated(instr);
3380 }
3381
3382 // Bits 31 and 30 must be zero.
3383 if (instr->ExtractBits(31, 30) != 0) {
3384 VisitUnallocated(instr);
3385 }
3386
3387 // Saturate copy count.
3388 uint64_t xn = ReadXRegister(n);
3389 int saturation_bits = (mops_type == "cpy"_h) ? 55 : 63;
3390 if ((xn >> saturation_bits) != 0) {
3391 xn = (UINT64_C(1) << saturation_bits) - 1;
3392 if (mops_type == "setg"_h) {
3393 // Align saturated value to granule.
3394 xn &= ~UINT64_C(kMTETagGranuleInBytes - 1);
3395 }
3396 WriteXRegister(n, xn);
3397 }
3398
3399 ReadNzcv().SetN(0);
3400 ReadNzcv().SetZ(0);
3401 ReadNzcv().SetC(1); // Indicates "option B" implementation.
3402 ReadNzcv().SetV(0);
3403 }
3404
3405 int64_t ShiftOperand(unsigned reg_size,
3406 uint64_t value,
3407 Shift shift_type,
3408 unsigned amount) const;
3409 int64_t ExtendValue(unsigned reg_width,
3410 int64_t value,
3411 Extend extend_type,
3412 unsigned left_shift = 0) const;
3413 uint64_t PolynomialMult(uint64_t op1,
3414 uint64_t op2,
3415 int lane_size_in_bits) const;
3416 vixl_uint128_t PolynomialMult128(uint64_t op1,
3417 uint64_t op2,
3418 int lane_size_in_bits) const;
3419
3420 bool ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr);
3421 bool ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr);
3422 bool ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr);
3423 bool ld1r(VectorFormat vform,
3424 VectorFormat unpack_vform,
3425 LogicVRegister dst,
3426 uint64_t addr,
3427 bool is_signed = false);
3428 bool ld2(VectorFormat vform,
3429 LogicVRegister dst1,
3430 LogicVRegister dst2,
3431 uint64_t addr);
3432 bool ld2(VectorFormat vform,
3433 LogicVRegister dst1,
3434 LogicVRegister dst2,
3435 int index,
3436 uint64_t addr);
3437 bool ld2r(VectorFormat vform,
3438 LogicVRegister dst1,
3439 LogicVRegister dst2,
3440 uint64_t addr);
3441 bool ld3(VectorFormat vform,
3442 LogicVRegister dst1,
3443 LogicVRegister dst2,
3444 LogicVRegister dst3,
3445 uint64_t addr);
3446 bool ld3(VectorFormat vform,
3447 LogicVRegister dst1,
3448 LogicVRegister dst2,
3449 LogicVRegister dst3,
3450 int index,
3451 uint64_t addr);
3452 bool ld3r(VectorFormat vform,
3453 LogicVRegister dst1,
3454 LogicVRegister dst2,
3455 LogicVRegister dst3,
3456 uint64_t addr);
3457 bool ld4(VectorFormat vform,
3458 LogicVRegister dst1,
3459 LogicVRegister dst2,
3460 LogicVRegister dst3,
3461 LogicVRegister dst4,
3462 uint64_t addr);
3463 bool ld4(VectorFormat vform,
3464 LogicVRegister dst1,
3465 LogicVRegister dst2,
3466 LogicVRegister dst3,
3467 LogicVRegister dst4,
3468 int index,
3469 uint64_t addr);
3470 bool ld4r(VectorFormat vform,
3471 LogicVRegister dst1,
3472 LogicVRegister dst2,
3473 LogicVRegister dst3,
3474 LogicVRegister dst4,
3475 uint64_t addr);
3476 bool st1(VectorFormat vform, LogicVRegister src, uint64_t addr);
3477 bool st1(VectorFormat vform, LogicVRegister src, int index, uint64_t addr);
3478 bool st2(VectorFormat vform,
3479 LogicVRegister src,
3480 LogicVRegister src2,
3481 uint64_t addr);
3482 bool st2(VectorFormat vform,
3483 LogicVRegister src,
3484 LogicVRegister src2,
3485 int index,
3486 uint64_t addr);
3487 bool st3(VectorFormat vform,
3488 LogicVRegister src,
3489 LogicVRegister src2,
3490 LogicVRegister src3,
3491 uint64_t addr);
3492 bool st3(VectorFormat vform,
3493 LogicVRegister src,
3494 LogicVRegister src2,
3495 LogicVRegister src3,
3496 int index,
3497 uint64_t addr);
3498 bool st4(VectorFormat vform,
3499 LogicVRegister src,
3500 LogicVRegister src2,
3501 LogicVRegister src3,
3502 LogicVRegister src4,
3503 uint64_t addr);
3504 bool st4(VectorFormat vform,
3505 LogicVRegister src,
3506 LogicVRegister src2,
3507 LogicVRegister src3,
3508 LogicVRegister src4,
3509 int index,
3510 uint64_t addr);
3511 LogicVRegister cmp(VectorFormat vform,
3512 LogicVRegister dst,
3513 const LogicVRegister& src1,
3514 const LogicVRegister& src2,
3515 Condition cond);
3516 LogicVRegister cmp(VectorFormat vform,
3517 LogicVRegister dst,
3518 const LogicVRegister& src1,
3519 int imm,
3520 Condition cond);
3521 LogicVRegister cmptst(VectorFormat vform,
3522 LogicVRegister dst,
3523 const LogicVRegister& src1,
3524 const LogicVRegister& src2);
3525 LogicVRegister add(VectorFormat vform,
3526 LogicVRegister dst,
3527 const LogicVRegister& src1,
3528 const LogicVRegister& src2);
3529 // Add `value` to each lane of `src1`, treating `value` as unsigned for the
3530 // purposes of setting the saturation flags.
3531 LogicVRegister add_uint(VectorFormat vform,
3532 LogicVRegister dst,
3533 const LogicVRegister& src1,
3534 uint64_t value);
3535 LogicVRegister addp(VectorFormat vform,
3536 LogicVRegister dst,
3537 const LogicVRegister& src1,
3538 const LogicVRegister& src2);
3539 LogicPRegister brka(LogicPRegister pd,
3540 const LogicPRegister& pg,
3541 const LogicPRegister& pn);
3542 LogicPRegister brkb(LogicPRegister pd,
3543 const LogicPRegister& pg,
3544 const LogicPRegister& pn);
3545 LogicPRegister brkn(LogicPRegister pdm,
3546 const LogicPRegister& pg,
3547 const LogicPRegister& pn);
3548 LogicPRegister brkpa(LogicPRegister pd,
3549 const LogicPRegister& pg,
3550 const LogicPRegister& pn,
3551 const LogicPRegister& pm);
3552 LogicPRegister brkpb(LogicPRegister pd,
3553 const LogicPRegister& pg,
3554 const LogicPRegister& pn,
3555 const LogicPRegister& pm);
3556 // dst = srca + src1 * src2
3557 LogicVRegister mla(VectorFormat vform,
3558 LogicVRegister dst,
3559 const LogicVRegister& srca,
3560 const LogicVRegister& src1,
3561 const LogicVRegister& src2);
3562 // dst = srca - src1 * src2
3563 LogicVRegister mls(VectorFormat vform,
3564 LogicVRegister dst,
3565 const LogicVRegister& srca,
3566 const LogicVRegister& src1,
3567 const LogicVRegister& src2);
3568 LogicVRegister mul(VectorFormat vform,
3569 LogicVRegister dst,
3570 const LogicVRegister& src1,
3571 const LogicVRegister& src2);
3572 LogicVRegister mul(VectorFormat vform,
3573 LogicVRegister dst,
3574 const LogicVRegister& src1,
3575 const LogicVRegister& src2,
3576 int index);
3577 LogicVRegister mla(VectorFormat vform,
3578 LogicVRegister dst,
3579 const LogicVRegister& src1,
3580 const LogicVRegister& src2,
3581 int index);
3582 LogicVRegister mls(VectorFormat vform,
3583 LogicVRegister dst,
3584 const LogicVRegister& src1,
3585 const LogicVRegister& src2,
3586 int index);
3587 LogicVRegister pmul(VectorFormat vform,
3588 LogicVRegister dst,
3589 const LogicVRegister& src1,
3590 const LogicVRegister& src2);
3591 LogicVRegister sdiv(VectorFormat vform,
3592 LogicVRegister dst,
3593 const LogicVRegister& src1,
3594 const LogicVRegister& src2);
3595 LogicVRegister udiv(VectorFormat vform,
3596 LogicVRegister dst,
3597 const LogicVRegister& src1,
3598 const LogicVRegister& src2);
3599
3600 typedef LogicVRegister (Simulator::*ByElementOp)(VectorFormat vform,
3601 LogicVRegister dst,
3602 const LogicVRegister& src1,
3603 const LogicVRegister& src2,
3604 int index);
3605 LogicVRegister fmul(VectorFormat vform,
3606 LogicVRegister dst,
3607 const LogicVRegister& src1,
3608 const LogicVRegister& src2,
3609 int index);
3610 LogicVRegister fmla(VectorFormat vform,
3611 LogicVRegister dst,
3612 const LogicVRegister& src1,
3613 const LogicVRegister& src2,
3614 int index);
3615 LogicVRegister fmlal(VectorFormat vform,
3616 LogicVRegister dst,
3617 const LogicVRegister& src1,
3618 const LogicVRegister& src2,
3619 int index);
3620 LogicVRegister fmlal2(VectorFormat vform,
3621 LogicVRegister dst,
3622 const LogicVRegister& src1,
3623 const LogicVRegister& src2,
3624 int index);
3625 LogicVRegister fmls(VectorFormat vform,
3626 LogicVRegister dst,
3627 const LogicVRegister& src1,
3628 const LogicVRegister& src2,
3629 int index);
3630 LogicVRegister fmlsl(VectorFormat vform,
3631 LogicVRegister dst,
3632 const LogicVRegister& src1,
3633 const LogicVRegister& src2,
3634 int index);
3635 LogicVRegister fmlsl2(VectorFormat vform,
3636 LogicVRegister dst,
3637 const LogicVRegister& src1,
3638 const LogicVRegister& src2,
3639 int index);
3640 LogicVRegister fmulx(VectorFormat vform,
3641 LogicVRegister dst,
3642 const LogicVRegister& src1,
3643 const LogicVRegister& src2,
3644 int index);
3645 LogicVRegister smulh(VectorFormat vform,
3646 LogicVRegister dst,
3647 const LogicVRegister& src1,
3648 const LogicVRegister& src2);
3649 LogicVRegister umulh(VectorFormat vform,
3650 LogicVRegister dst,
3651 const LogicVRegister& src1,
3652 const LogicVRegister& src2);
3653 LogicVRegister sqdmull(VectorFormat vform,
3654 LogicVRegister dst,
3655 const LogicVRegister& src1,
3656 const LogicVRegister& src2,
3657 int index);
3658 LogicVRegister sqdmlal(VectorFormat vform,
3659 LogicVRegister dst,
3660 const LogicVRegister& src1,
3661 const LogicVRegister& src2,
3662 int index);
3663 LogicVRegister sqdmlsl(VectorFormat vform,
3664 LogicVRegister dst,
3665 const LogicVRegister& src1,
3666 const LogicVRegister& src2,
3667 int index);
3668 LogicVRegister sqdmulh(VectorFormat vform,
3669 LogicVRegister dst,
3670 const LogicVRegister& src1,
3671 const LogicVRegister& src2,
3672 int index);
3673 LogicVRegister sqrdmulh(VectorFormat vform,
3674 LogicVRegister dst,
3675 const LogicVRegister& src1,
3676 const LogicVRegister& src2,
3677 int index);
3678 LogicVRegister sqrdmlah(VectorFormat vform,
3679 LogicVRegister dst,
3680 const LogicVRegister& src1,
3681 const LogicVRegister& src2,
3682 int index);
3683 LogicVRegister sqrdmlsh(VectorFormat vform,
3684 LogicVRegister dst,
3685 const LogicVRegister& src1,
3686 const LogicVRegister& src2,
3687 int index);
3688 LogicVRegister sub(VectorFormat vform,
3689 LogicVRegister dst,
3690 const LogicVRegister& src1,
3691 const LogicVRegister& src2);
3692 // Subtract `value` from each lane of `src1`, treating `value` as unsigned for
3693 // the purposes of setting the saturation flags.
3694 LogicVRegister sub_uint(VectorFormat vform,
3695 LogicVRegister dst,
3696 const LogicVRegister& src1,
3697 uint64_t value);
3698 LogicVRegister and_(VectorFormat vform,
3699 LogicVRegister dst,
3700 const LogicVRegister& src1,
3701 const LogicVRegister& src2);
3702 LogicVRegister orr(VectorFormat vform,
3703 LogicVRegister dst,
3704 const LogicVRegister& src1,
3705 const LogicVRegister& src2);
3706 LogicVRegister orn(VectorFormat vform,
3707 LogicVRegister dst,
3708 const LogicVRegister& src1,
3709 const LogicVRegister& src2);
3710 LogicVRegister eor(VectorFormat vform,
3711 LogicVRegister dst,
3712 const LogicVRegister& src1,
3713 const LogicVRegister& src2);
3714 LogicVRegister bic(VectorFormat vform,
3715 LogicVRegister dst,
3716 const LogicVRegister& src1,
3717 const LogicVRegister& src2);
3718 LogicVRegister bic(VectorFormat vform,
3719 LogicVRegister dst,
3720 const LogicVRegister& src,
3721 uint64_t imm);
3722 LogicVRegister bif(VectorFormat vform,
3723 LogicVRegister dst,
3724 const LogicVRegister& src1,
3725 const LogicVRegister& src2);
3726 LogicVRegister bit(VectorFormat vform,
3727 LogicVRegister dst,
3728 const LogicVRegister& src1,
3729 const LogicVRegister& src2);
3730 LogicVRegister bsl(VectorFormat vform,
3731 LogicVRegister dst,
3732 const LogicVRegister& src_mask,
3733 const LogicVRegister& src1,
3734 const LogicVRegister& src2);
3735 LogicVRegister cls(VectorFormat vform,
3736 LogicVRegister dst,
3737 const LogicVRegister& src);
3738 LogicVRegister clz(VectorFormat vform,
3739 LogicVRegister dst,
3740 const LogicVRegister& src);
3741 LogicVRegister cnot(VectorFormat vform,
3742 LogicVRegister dst,
3743 const LogicVRegister& src);
3744 LogicVRegister cnt(VectorFormat vform,
3745 LogicVRegister dst,
3746 const LogicVRegister& src);
3747 LogicVRegister not_(VectorFormat vform,
3748 LogicVRegister dst,
3749 const LogicVRegister& src);
3750 LogicVRegister rbit(VectorFormat vform,
3751 LogicVRegister dst,
3752 const LogicVRegister& src);
3753 LogicVRegister rev(VectorFormat vform,
3754 LogicVRegister dst,
3755 const LogicVRegister& src);
3756 LogicVRegister rev_byte(VectorFormat vform,
3757 LogicVRegister dst,
3758 const LogicVRegister& src,
3759 int rev_size);
3760 LogicVRegister rev16(VectorFormat vform,
3761 LogicVRegister dst,
3762 const LogicVRegister& src);
3763 LogicVRegister rev32(VectorFormat vform,
3764 LogicVRegister dst,
3765 const LogicVRegister& src);
3766 LogicVRegister rev64(VectorFormat vform,
3767 LogicVRegister dst,
3768 const LogicVRegister& src);
3769 LogicVRegister addlp(VectorFormat vform,
3770 LogicVRegister dst,
3771 const LogicVRegister& src,
3772 bool is_signed,
3773 bool do_accumulate);
3774 LogicVRegister saddlp(VectorFormat vform,
3775 LogicVRegister dst,
3776 const LogicVRegister& src);
3777 LogicVRegister uaddlp(VectorFormat vform,
3778 LogicVRegister dst,
3779 const LogicVRegister& src);
3780 LogicVRegister sadalp(VectorFormat vform,
3781 LogicVRegister dst,
3782 const LogicVRegister& src);
3783 LogicVRegister uadalp(VectorFormat vform,
3784 LogicVRegister dst,
3785 const LogicVRegister& src);
3786 LogicVRegister ror(VectorFormat vform,
3787 LogicVRegister dst,
3788 const LogicVRegister& src,
3789 int rotation);
3790 LogicVRegister rol(VectorFormat vform,
3791 LogicVRegister dst,
3792 const LogicVRegister& src,
3793 int rotation);
3794 LogicVRegister ext(VectorFormat vform,
3795 LogicVRegister dst,
3796 const LogicVRegister& src1,
3797 const LogicVRegister& src2,
3798 int index);
3799 LogicVRegister rotate_elements_right(VectorFormat vform,
3800 LogicVRegister dst,
3801 const LogicVRegister& src,
3802 int index);
3803 template <typename T>
3804 LogicVRegister fcadd(VectorFormat vform,
3805 LogicVRegister dst,
3806 const LogicVRegister& src1,
3807 const LogicVRegister& src2,
3808 int rot);
3809 LogicVRegister fcadd(VectorFormat vform,
3810 LogicVRegister dst,
3811 const LogicVRegister& src1,
3812 const LogicVRegister& src2,
3813 int rot);
3814 template <typename T>
3815 LogicVRegister fcmla(VectorFormat vform,
3816 LogicVRegister dst,
3817 const LogicVRegister& src1,
3818 const LogicVRegister& src2,
3819 const LogicVRegister& acc,
3820 int index,
3821 int rot);
3822 LogicVRegister fcmla(VectorFormat vform,
3823 LogicVRegister dst,
3824 const LogicVRegister& src1,
3825 const LogicVRegister& src2,
3826 int index,
3827 int rot);
3828 LogicVRegister fcmla(VectorFormat vform,
3829 LogicVRegister dst,
3830 const LogicVRegister& src1,
3831 const LogicVRegister& src2,
3832 const LogicVRegister& acc,
3833 int rot);
3834 template <typename T>
3835 LogicVRegister fadda(VectorFormat vform,
3836 LogicVRegister acc,
3837 const LogicPRegister& pg,
3838 const LogicVRegister& src);
3839 LogicVRegister fadda(VectorFormat vform,
3840 LogicVRegister acc,
3841 const LogicPRegister& pg,
3842 const LogicVRegister& src);
3843 LogicVRegister cadd(VectorFormat vform,
3844 LogicVRegister dst,
3845 const LogicVRegister& src1,
3846 const LogicVRegister& src2,
3847 int rot,
3848 bool saturate = false);
3849 LogicVRegister cmla(VectorFormat vform,
3850 LogicVRegister dst,
3851 const LogicVRegister& srca,
3852 const LogicVRegister& src1,
3853 const LogicVRegister& src2,
3854 int rot);
3855 LogicVRegister cmla(VectorFormat vform,
3856 LogicVRegister dst,
3857 const LogicVRegister& srca,
3858 const LogicVRegister& src1,
3859 const LogicVRegister& src2,
3860 int index,
3861 int rot);
3862 LogicVRegister bgrp(VectorFormat vform,
3863 LogicVRegister dst,
3864 const LogicVRegister& src1,
3865 const LogicVRegister& src2,
3866 bool do_bext = false);
3867 LogicVRegister bdep(VectorFormat vform,
3868 LogicVRegister dst,
3869 const LogicVRegister& src1,
3870 const LogicVRegister& src2);
3871 LogicVRegister histogram(VectorFormat vform,
3872 LogicVRegister dst,
3873 const LogicPRegister& pg,
3874 const LogicVRegister& src1,
3875 const LogicVRegister& src2,
3876 bool do_segmented = false);
3877 LogicVRegister index(VectorFormat vform,
3878 LogicVRegister dst,
3879 uint64_t start,
3880 uint64_t step);
3881 LogicVRegister ins_element(VectorFormat vform,
3882 LogicVRegister dst,
3883 int dst_index,
3884 const LogicVRegister& src,
3885 int src_index);
3886 LogicVRegister ins_immediate(VectorFormat vform,
3887 LogicVRegister dst,
3888 int dst_index,
3889 uint64_t imm);
3890 LogicVRegister insr(VectorFormat vform, LogicVRegister dst, uint64_t imm);
3891 LogicVRegister dup_element(VectorFormat vform,
3892 LogicVRegister dst,
3893 const LogicVRegister& src,
3894 int src_index);
3895 LogicVRegister dup_elements_to_segments(VectorFormat vform,
3896 LogicVRegister dst,
3897 const LogicVRegister& src,
3898 int src_index);
3899 LogicVRegister dup_elements_to_segments(
3900 VectorFormat vform,
3901 LogicVRegister dst,
3902 const std::pair<int, int>& src_and_index);
3903 LogicVRegister dup_immediate(VectorFormat vform,
3904 LogicVRegister dst,
3905 uint64_t imm);
3906 LogicVRegister mov(VectorFormat vform,
3907 LogicVRegister dst,
3908 const LogicVRegister& src);
3909 LogicPRegister mov(LogicPRegister dst, const LogicPRegister& src);
3910 LogicVRegister mov_merging(VectorFormat vform,
3911 LogicVRegister dst,
3912 const SimPRegister& pg,
3913 const LogicVRegister& src);
3914 LogicVRegister mov_zeroing(VectorFormat vform,
3915 LogicVRegister dst,
3916 const SimPRegister& pg,
3917 const LogicVRegister& src);
3918 LogicVRegister mov_alternating(VectorFormat vform,
3919 LogicVRegister dst,
3920 const LogicVRegister& src,
3921 int start_at);
3922 LogicPRegister mov_merging(LogicPRegister dst,
3923 const LogicPRegister& pg,
3924 const LogicPRegister& src);
3925 LogicPRegister mov_zeroing(LogicPRegister dst,
3926 const LogicPRegister& pg,
3927 const LogicPRegister& src);
3928 LogicVRegister movi(VectorFormat vform, LogicVRegister dst, uint64_t imm);
3929 LogicVRegister mvni(VectorFormat vform, LogicVRegister dst, uint64_t imm);
3930 LogicVRegister orr(VectorFormat vform,
3931 LogicVRegister dst,
3932 const LogicVRegister& src,
3933 uint64_t imm);
3934 LogicVRegister sshl(VectorFormat vform,
3935 LogicVRegister dst,
3936 const LogicVRegister& src1,
3937 const LogicVRegister& src2,
3938 bool shift_is_8bit = true);
3939 LogicVRegister ushl(VectorFormat vform,
3940 LogicVRegister dst,
3941 const LogicVRegister& src1,
3942 const LogicVRegister& src2,
3943 bool shift_is_8bit = true);
3944 LogicVRegister sshr(VectorFormat vform,
3945 LogicVRegister dst,
3946 const LogicVRegister& src1,
3947 const LogicVRegister& src2);
3948 LogicVRegister ushr(VectorFormat vform,
3949 LogicVRegister dst,
3950 const LogicVRegister& src1,
3951 const LogicVRegister& src2);
3952 // Perform a "conditional last" operation. The first part of the pair is true
3953 // if any predicate lane is active, false otherwise. The second part takes the
3954 // value of the last active (plus offset) lane, or last (plus offset) lane if
3955 // none active.
3956 std::pair<bool, uint64_t> clast(VectorFormat vform,
3957 const LogicPRegister& pg,
3958 const LogicVRegister& src2,
3959 int offset_from_last_active);
3960 LogicPRegister match(VectorFormat vform,
3961 LogicPRegister dst,
3962 const LogicVRegister& haystack,
3963 const LogicVRegister& needles,
3964 bool negate_match);
3965 LogicVRegister compact(VectorFormat vform,
3966 LogicVRegister dst,
3967 const LogicPRegister& pg,
3968 const LogicVRegister& src);
3969 LogicVRegister splice(VectorFormat vform,
3970 LogicVRegister dst,
3971 const LogicPRegister& pg,
3972 const LogicVRegister& src1,
3973 const LogicVRegister& src2);
3974 LogicVRegister sel(VectorFormat vform,
3975 LogicVRegister dst,
3976 const SimPRegister& pg,
3977 const LogicVRegister& src1,
3978 const LogicVRegister& src2);
3979 LogicPRegister sel(LogicPRegister dst,
3980 const LogicPRegister& pg,
3981 const LogicPRegister& src1,
3982 const LogicPRegister& src2);
3983 LogicVRegister sminmax(VectorFormat vform,
3984 LogicVRegister dst,
3985 const LogicVRegister& src1,
3986 const LogicVRegister& src2,
3987 bool max);
3988 LogicVRegister smax(VectorFormat vform,
3989 LogicVRegister dst,
3990 const LogicVRegister& src1,
3991 const LogicVRegister& src2);
3992 LogicVRegister smin(VectorFormat vform,
3993 LogicVRegister dst,
3994 const LogicVRegister& src1,
3995 const LogicVRegister& src2);
3996 LogicVRegister sminmaxp(VectorFormat vform,
3997 LogicVRegister dst,
3998 const LogicVRegister& src1,
3999 const LogicVRegister& src2,
4000 bool max);
4001 LogicVRegister smaxp(VectorFormat vform,
4002 LogicVRegister dst,
4003 const LogicVRegister& src1,
4004 const LogicVRegister& src2);
4005 LogicVRegister sminp(VectorFormat vform,
4006 LogicVRegister dst,
4007 const LogicVRegister& src1,
4008 const LogicVRegister& src2);
4009 LogicVRegister addp(VectorFormat vform,
4010 LogicVRegister dst,
4011 const LogicVRegister& src);
4012 LogicVRegister addv(VectorFormat vform,
4013 LogicVRegister dst,
4014 const LogicVRegister& src);
4015 LogicVRegister uaddlv(VectorFormat vform,
4016 LogicVRegister dst,
4017 const LogicVRegister& src);
4018 LogicVRegister saddlv(VectorFormat vform,
4019 LogicVRegister dst,
4020 const LogicVRegister& src);
4021 LogicVRegister sminmaxv(VectorFormat vform,
4022 LogicVRegister dst,
4023 const LogicPRegister& pg,
4024 const LogicVRegister& src,
4025 bool max);
4026 LogicVRegister smaxv(VectorFormat vform,
4027 LogicVRegister dst,
4028 const LogicVRegister& src);
4029 LogicVRegister sminv(VectorFormat vform,
4030 LogicVRegister dst,
4031 const LogicVRegister& src);
4032 LogicVRegister uxtl(VectorFormat vform,
4033 LogicVRegister dst,
4034 const LogicVRegister& src,
4035 bool is_2 = false);
4036 LogicVRegister uxtl2(VectorFormat vform,
4037 LogicVRegister dst,
4038 const LogicVRegister& src);
4039 LogicVRegister sxtl(VectorFormat vform,
4040 LogicVRegister dst,
4041 const LogicVRegister& src,
4042 bool is_2 = false);
4043 LogicVRegister sxtl2(VectorFormat vform,
4044 LogicVRegister dst,
4045 const LogicVRegister& src);
4046 LogicVRegister uxt(VectorFormat vform,
4047 LogicVRegister dst,
4048 const LogicVRegister& src,
4049 unsigned from_size_in_bits);
4050 LogicVRegister sxt(VectorFormat vform,
4051 LogicVRegister dst,
4052 const LogicVRegister& src,
4053 unsigned from_size_in_bits);
4054 LogicVRegister tbl(VectorFormat vform,
4055 LogicVRegister dst,
4056 const LogicVRegister& tab,
4057 const LogicVRegister& ind);
4058 LogicVRegister tbl(VectorFormat vform,
4059 LogicVRegister dst,
4060 const LogicVRegister& tab,
4061 const LogicVRegister& tab2,
4062 const LogicVRegister& ind);
4063 LogicVRegister tbl(VectorFormat vform,
4064 LogicVRegister dst,
4065 const LogicVRegister& tab,
4066 const LogicVRegister& tab2,
4067 const LogicVRegister& tab3,
4068 const LogicVRegister& ind);
4069 LogicVRegister tbl(VectorFormat vform,
4070 LogicVRegister dst,
4071 const LogicVRegister& tab,
4072 const LogicVRegister& tab2,
4073 const LogicVRegister& tab3,
4074 const LogicVRegister& tab4,
4075 const LogicVRegister& ind);
4076 LogicVRegister Table(VectorFormat vform,
4077 LogicVRegister dst,
4078 const LogicVRegister& ind,
4079 bool zero_out_of_bounds,
4080 const LogicVRegister* tab1,
4081 const LogicVRegister* tab2 = NULL,
4082 const LogicVRegister* tab3 = NULL,
4083 const LogicVRegister* tab4 = NULL);
4084 LogicVRegister tbx(VectorFormat vform,
4085 LogicVRegister dst,
4086 const LogicVRegister& tab,
4087 const LogicVRegister& ind);
4088 LogicVRegister tbx(VectorFormat vform,
4089 LogicVRegister dst,
4090 const LogicVRegister& tab,
4091 const LogicVRegister& tab2,
4092 const LogicVRegister& ind);
4093 LogicVRegister tbx(VectorFormat vform,
4094 LogicVRegister dst,
4095 const LogicVRegister& tab,
4096 const LogicVRegister& tab2,
4097 const LogicVRegister& tab3,
4098 const LogicVRegister& ind);
4099 LogicVRegister tbx(VectorFormat vform,
4100 LogicVRegister dst,
4101 const LogicVRegister& tab,
4102 const LogicVRegister& tab2,
4103 const LogicVRegister& tab3,
4104 const LogicVRegister& tab4,
4105 const LogicVRegister& ind);
4106 LogicVRegister uaddl(VectorFormat vform,
4107 LogicVRegister dst,
4108 const LogicVRegister& src1,
4109 const LogicVRegister& src2);
4110 LogicVRegister uaddl2(VectorFormat vform,
4111 LogicVRegister dst,
4112 const LogicVRegister& src1,
4113 const LogicVRegister& src2);
4114 LogicVRegister uaddw(VectorFormat vform,
4115 LogicVRegister dst,
4116 const LogicVRegister& src1,
4117 const LogicVRegister& src2);
4118 LogicVRegister uaddw2(VectorFormat vform,
4119 LogicVRegister dst,
4120 const LogicVRegister& src1,
4121 const LogicVRegister& src2);
4122 LogicVRegister saddl(VectorFormat vform,
4123 LogicVRegister dst,
4124 const LogicVRegister& src1,
4125 const LogicVRegister& src2);
4126 LogicVRegister saddl2(VectorFormat vform,
4127 LogicVRegister dst,
4128 const LogicVRegister& src1,
4129 const LogicVRegister& src2);
4130 LogicVRegister saddw(VectorFormat vform,
4131 LogicVRegister dst,
4132 const LogicVRegister& src1,
4133 const LogicVRegister& src2);
4134 LogicVRegister saddw2(VectorFormat vform,
4135 LogicVRegister dst,
4136 const LogicVRegister& src1,
4137 const LogicVRegister& src2);
4138 LogicVRegister usubl(VectorFormat vform,
4139 LogicVRegister dst,
4140 const LogicVRegister& src1,
4141 const LogicVRegister& src2);
4142 LogicVRegister usubl2(VectorFormat vform,
4143 LogicVRegister dst,
4144 const LogicVRegister& src1,
4145 const LogicVRegister& src2);
4146 LogicVRegister usubw(VectorFormat vform,
4147 LogicVRegister dst,
4148 const LogicVRegister& src1,
4149 const LogicVRegister& src2);
4150 LogicVRegister usubw2(VectorFormat vform,
4151 LogicVRegister dst,
4152 const LogicVRegister& src1,
4153 const LogicVRegister& src2);
4154 LogicVRegister ssubl(VectorFormat vform,
4155 LogicVRegister dst,
4156 const LogicVRegister& src1,
4157 const LogicVRegister& src2);
4158 LogicVRegister ssubl2(VectorFormat vform,
4159 LogicVRegister dst,
4160 const LogicVRegister& src1,
4161 const LogicVRegister& src2);
4162 LogicVRegister ssubw(VectorFormat vform,
4163 LogicVRegister dst,
4164 const LogicVRegister& src1,
4165 const LogicVRegister& src2);
4166 LogicVRegister ssubw2(VectorFormat vform,
4167 LogicVRegister dst,
4168 const LogicVRegister& src1,
4169 const LogicVRegister& src2);
4170 LogicVRegister uminmax(VectorFormat vform,
4171 LogicVRegister dst,
4172 const LogicVRegister& src1,
4173 const LogicVRegister& src2,
4174 bool max);
4175 LogicVRegister umax(VectorFormat vform,
4176 LogicVRegister dst,
4177 const LogicVRegister& src1,
4178 const LogicVRegister& src2);
4179 LogicVRegister umin(VectorFormat vform,
4180 LogicVRegister dst,
4181 const LogicVRegister& src1,
4182 const LogicVRegister& src2);
4183 LogicVRegister uminmaxp(VectorFormat vform,
4184 LogicVRegister dst,
4185 const LogicVRegister& src1,
4186 const LogicVRegister& src2,
4187 bool max);
4188 LogicVRegister umaxp(VectorFormat vform,
4189 LogicVRegister dst,
4190 const LogicVRegister& src1,
4191 const LogicVRegister& src2);
4192 LogicVRegister uminp(VectorFormat vform,
4193 LogicVRegister dst,
4194 const LogicVRegister& src1,
4195 const LogicVRegister& src2);
4196 LogicVRegister uminmaxv(VectorFormat vform,
4197 LogicVRegister dst,
4198 const LogicPRegister& pg,
4199 const LogicVRegister& src,
4200 bool max);
4201 LogicVRegister umaxv(VectorFormat vform,
4202 LogicVRegister dst,
4203 const LogicVRegister& src);
4204 LogicVRegister uminv(VectorFormat vform,
4205 LogicVRegister dst,
4206 const LogicVRegister& src);
4207 LogicVRegister trn1(VectorFormat vform,
4208 LogicVRegister dst,
4209 const LogicVRegister& src1,
4210 const LogicVRegister& src2);
4211 LogicVRegister trn2(VectorFormat vform,
4212 LogicVRegister dst,
4213 const LogicVRegister& src1,
4214 const LogicVRegister& src2);
4215 LogicVRegister zip1(VectorFormat vform,
4216 LogicVRegister dst,
4217 const LogicVRegister& src1,
4218 const LogicVRegister& src2);
4219 LogicVRegister zip2(VectorFormat vform,
4220 LogicVRegister dst,
4221 const LogicVRegister& src1,
4222 const LogicVRegister& src2);
4223 LogicVRegister uzp1(VectorFormat vform,
4224 LogicVRegister dst,
4225 const LogicVRegister& src1,
4226 const LogicVRegister& src2);
4227 LogicVRegister uzp2(VectorFormat vform,
4228 LogicVRegister dst,
4229 const LogicVRegister& src1,
4230 const LogicVRegister& src2);
4231 LogicVRegister shl(VectorFormat vform,
4232 LogicVRegister dst,
4233 const LogicVRegister& src,
4234 int shift);
4235 LogicVRegister scvtf(VectorFormat vform,
4236 unsigned dst_data_size_in_bits,
4237 unsigned src_data_size_in_bits,
4238 LogicVRegister dst,
4239 const LogicPRegister& pg,
4240 const LogicVRegister& src,
4241 FPRounding round,
4242 int fbits = 0);
4243 LogicVRegister scvtf(VectorFormat vform,
4244 LogicVRegister dst,
4245 const LogicVRegister& src,
4246 int fbits,
4247 FPRounding rounding_mode);
4248 LogicVRegister ucvtf(VectorFormat vform,
4249 unsigned dst_data_size,
4250 unsigned src_data_size,
4251 LogicVRegister dst,
4252 const LogicPRegister& pg,
4253 const LogicVRegister& src,
4254 FPRounding round,
4255 int fbits = 0);
4256 LogicVRegister ucvtf(VectorFormat vform,
4257 LogicVRegister dst,
4258 const LogicVRegister& src,
4259 int fbits,
4260 FPRounding rounding_mode);
4261 LogicVRegister sshll(VectorFormat vform,
4262 LogicVRegister dst,
4263 const LogicVRegister& src,
4264 int shift);
4265 LogicVRegister sshll2(VectorFormat vform,
4266 LogicVRegister dst,
4267 const LogicVRegister& src,
4268 int shift);
4269 LogicVRegister shll(VectorFormat vform,
4270 LogicVRegister dst,
4271 const LogicVRegister& src);
4272 LogicVRegister shll2(VectorFormat vform,
4273 LogicVRegister dst,
4274 const LogicVRegister& src);
4275 LogicVRegister ushll(VectorFormat vform,
4276 LogicVRegister dst,
4277 const LogicVRegister& src,
4278 int shift);
4279 LogicVRegister ushll2(VectorFormat vform,
4280 LogicVRegister dst,
4281 const LogicVRegister& src,
4282 int shift);
4283 LogicVRegister sli(VectorFormat vform,
4284 LogicVRegister dst,
4285 const LogicVRegister& src,
4286 int shift);
4287 LogicVRegister sri(VectorFormat vform,
4288 LogicVRegister dst,
4289 const LogicVRegister& src,
4290 int shift);
4291 LogicVRegister sshr(VectorFormat vform,
4292 LogicVRegister dst,
4293 const LogicVRegister& src,
4294 int shift);
4295 LogicVRegister ushr(VectorFormat vform,
4296 LogicVRegister dst,
4297 const LogicVRegister& src,
4298 int shift);
4299 LogicVRegister ssra(VectorFormat vform,
4300 LogicVRegister dst,
4301 const LogicVRegister& src,
4302 int shift);
4303 LogicVRegister usra(VectorFormat vform,
4304 LogicVRegister dst,
4305 const LogicVRegister& src,
4306 int shift);
4307 LogicVRegister srsra(VectorFormat vform,
4308 LogicVRegister dst,
4309 const LogicVRegister& src,
4310 int shift);
4311 LogicVRegister ursra(VectorFormat vform,
4312 LogicVRegister dst,
4313 const LogicVRegister& src,
4314 int shift);
4315 LogicVRegister suqadd(VectorFormat vform,
4316 LogicVRegister dst,
4317 const LogicVRegister& src1,
4318 const LogicVRegister& src2);
4319 LogicVRegister usqadd(VectorFormat vform,
4320 LogicVRegister dst,
4321 const LogicVRegister& src1,
4322 const LogicVRegister& src2);
4323 LogicVRegister sqshl(VectorFormat vform,
4324 LogicVRegister dst,
4325 const LogicVRegister& src,
4326 int shift);
4327 LogicVRegister uqshl(VectorFormat vform,
4328 LogicVRegister dst,
4329 const LogicVRegister& src,
4330 int shift);
4331 LogicVRegister sqshlu(VectorFormat vform,
4332 LogicVRegister dst,
4333 const LogicVRegister& src,
4334 int shift);
4335 LogicVRegister abs(VectorFormat vform,
4336 LogicVRegister dst,
4337 const LogicVRegister& src);
4338 LogicVRegister neg(VectorFormat vform,
4339 LogicVRegister dst,
4340 const LogicVRegister& src);
4341 LogicVRegister extractnarrow(VectorFormat vform,
4342 LogicVRegister dst,
4343 bool dst_is_signed,
4344 const LogicVRegister& src,
4345 bool src_is_signed);
4346 LogicVRegister xtn(VectorFormat vform,
4347 LogicVRegister dst,
4348 const LogicVRegister& src);
4349 LogicVRegister sqxtn(VectorFormat vform,
4350 LogicVRegister dst,
4351 const LogicVRegister& src);
4352 LogicVRegister uqxtn(VectorFormat vform,
4353 LogicVRegister dst,
4354 const LogicVRegister& src);
4355 LogicVRegister sqxtun(VectorFormat vform,
4356 LogicVRegister dst,
4357 const LogicVRegister& src);
4358 LogicVRegister absdiff(VectorFormat vform,
4359 LogicVRegister dst,
4360 const LogicVRegister& src1,
4361 const LogicVRegister& src2,
4362 bool is_signed);
4363 LogicVRegister saba(VectorFormat vform,
4364 LogicVRegister dst,
4365 const LogicVRegister& src1,
4366 const LogicVRegister& src2);
4367 LogicVRegister uaba(VectorFormat vform,
4368 LogicVRegister dst,
4369 const LogicVRegister& src1,
4370 const LogicVRegister& src2);
4371 LogicVRegister shrn(VectorFormat vform,
4372 LogicVRegister dst,
4373 const LogicVRegister& src,
4374 int shift);
4375 LogicVRegister shrn2(VectorFormat vform,
4376 LogicVRegister dst,
4377 const LogicVRegister& src,
4378 int shift);
4379 LogicVRegister rshrn(VectorFormat vform,
4380 LogicVRegister dst,
4381 const LogicVRegister& src,
4382 int shift);
4383 LogicVRegister rshrn2(VectorFormat vform,
4384 LogicVRegister dst,
4385 const LogicVRegister& src,
4386 int shift);
4387 LogicVRegister uqshrn(VectorFormat vform,
4388 LogicVRegister dst,
4389 const LogicVRegister& src,
4390 int shift);
4391 LogicVRegister uqshrn2(VectorFormat vform,
4392 LogicVRegister dst,
4393 const LogicVRegister& src,
4394 int shift);
4395 LogicVRegister uqrshrn(VectorFormat vform,
4396 LogicVRegister dst,
4397 const LogicVRegister& src,
4398 int shift);
4399 LogicVRegister uqrshrn2(VectorFormat vform,
4400 LogicVRegister dst,
4401 const LogicVRegister& src,
4402 int shift);
4403 LogicVRegister sqshrn(VectorFormat vform,
4404 LogicVRegister dst,
4405 const LogicVRegister& src,
4406 int shift);
4407 LogicVRegister sqshrn2(VectorFormat vform,
4408 LogicVRegister dst,
4409 const LogicVRegister& src,
4410 int shift);
4411 LogicVRegister sqrshrn(VectorFormat vform,
4412 LogicVRegister dst,
4413 const LogicVRegister& src,
4414 int shift);
4415 LogicVRegister sqrshrn2(VectorFormat vform,
4416 LogicVRegister dst,
4417 const LogicVRegister& src,
4418 int shift);
4419 LogicVRegister sqshrun(VectorFormat vform,
4420 LogicVRegister dst,
4421 const LogicVRegister& src,
4422 int shift);
4423 LogicVRegister sqshrun2(VectorFormat vform,
4424 LogicVRegister dst,
4425 const LogicVRegister& src,
4426 int shift);
4427 LogicVRegister sqrshrun(VectorFormat vform,
4428 LogicVRegister dst,
4429 const LogicVRegister& src,
4430 int shift);
4431 LogicVRegister sqrshrun2(VectorFormat vform,
4432 LogicVRegister dst,
4433 const LogicVRegister& src,
4434 int shift);
4435 LogicVRegister sqrdmulh(VectorFormat vform,
4436 LogicVRegister dst,
4437 const LogicVRegister& src1,
4438 const LogicVRegister& src2,
4439 bool round = true);
4440 LogicVRegister dot(VectorFormat vform,
4441 LogicVRegister dst,
4442 const LogicVRegister& src1,
4443 const LogicVRegister& src2,
4444 bool is_src1_signed,
4445 bool is_src2_signed);
4446 LogicVRegister sdot(VectorFormat vform,
4447 LogicVRegister dst,
4448 const LogicVRegister& src1,
4449 const LogicVRegister& src2);
4450 LogicVRegister udot(VectorFormat vform,
4451 LogicVRegister dst,
4452 const LogicVRegister& src1,
4453 const LogicVRegister& src2);
4454 LogicVRegister usdot(VectorFormat vform,
4455 LogicVRegister dst,
4456 const LogicVRegister& src1,
4457 const LogicVRegister& src2);
4458 LogicVRegister cdot(VectorFormat vform,
4459 LogicVRegister dst,
4460 const LogicVRegister& acc,
4461 const LogicVRegister& src1,
4462 const LogicVRegister& src2,
4463 int rot);
4464 LogicVRegister sqrdcmlah(VectorFormat vform,
4465 LogicVRegister dst,
4466 const LogicVRegister& srca,
4467 const LogicVRegister& src1,
4468 const LogicVRegister& src2,
4469 int rot);
4470 LogicVRegister sqrdcmlah(VectorFormat vform,
4471 LogicVRegister dst,
4472 const LogicVRegister& srca,
4473 const LogicVRegister& src1,
4474 const LogicVRegister& src2,
4475 int index,
4476 int rot);
4477 LogicVRegister sqrdmlash(VectorFormat vform,
4478 LogicVRegister dst,
4479 const LogicVRegister& src1,
4480 const LogicVRegister& src2,
4481 bool round = true,
4482 bool sub_op = false);
4483 LogicVRegister sqrdmlash_d(VectorFormat vform,
4484 LogicVRegister dst,
4485 const LogicVRegister& src1,
4486 const LogicVRegister& src2,
4487 bool round = true,
4488 bool sub_op = false);
4489 LogicVRegister sqrdmlah(VectorFormat vform,
4490 LogicVRegister dst,
4491 const LogicVRegister& src1,
4492 const LogicVRegister& src2,
4493 bool round = true);
4494 LogicVRegister sqrdmlsh(VectorFormat vform,
4495 LogicVRegister dst,
4496 const LogicVRegister& src1,
4497 const LogicVRegister& src2,
4498 bool round = true);
4499 LogicVRegister sqdmulh(VectorFormat vform,
4500 LogicVRegister dst,
4501 const LogicVRegister& src1,
4502 const LogicVRegister& src2);
4503 LogicVRegister matmul(VectorFormat vform_dst,
4504 LogicVRegister dst,
4505 const LogicVRegister& src1,
4506 const LogicVRegister& src2,
4507 bool src1_signed,
4508 bool src2_signed);
4509 template <typename T>
4510 LogicVRegister fmatmul(VectorFormat vform,
4511 LogicVRegister srcdst,
4512 const LogicVRegister& src1,
4513 const LogicVRegister& src2);
4514 LogicVRegister fmatmul(VectorFormat vform,
4515 LogicVRegister srcdst,
4516 const LogicVRegister& src1,
4517 const LogicVRegister& src2);
4518
4519 template <unsigned N>
4520 static void SHARotateEltsLeftOne(uint64_t (&x)[N]) {
4521 VIXL_STATIC_ASSERT(N == 4);
4522 uint64_t temp = x[3];
4523 x[3] = x[2];
4524 x[2] = x[1];
4525 x[1] = x[0];
4526 x[0] = temp;
4527 }
4528
4529 template <uint32_t mode>
4530 LogicVRegister sha1(LogicVRegister srcdst,
4531 const LogicVRegister& src1,
4532 const LogicVRegister& src2) {
4533 uint64_t y = src1.Uint(kFormat4S, 0);
4534 uint64_t sd[4] = {};
4535 srcdst.UintArray(kFormat4S, sd);
4536
4537 for (unsigned i = 0; i < ArrayLength(sd); i++) {
4538 uint64_t t = CryptoOp<mode>(sd[1], sd[2], sd[3]);
4539
4540 y += RotateLeft(sd[0], 5, kSRegSize) + t;
4541 y += src2.Uint(kFormat4S, i);
4542
4543 sd[1] = RotateLeft(sd[1], 30, kSRegSize);
4544
4545 // y:sd = ROL(y:sd, 32)
4546 SHARotateEltsLeftOne(sd);
4547 std::swap(sd[0], y);
4548 }
4549
4550 srcdst.SetUintArray(kFormat4S, sd);
4551 return srcdst;
4552 }
4553
4554 LogicVRegister sha2h(LogicVRegister srcdst,
4555 const LogicVRegister& src1,
4556 const LogicVRegister& src2,
4557 bool part1);
4558 LogicVRegister sha2su0(LogicVRegister srcdst, const LogicVRegister& src1);
4559 LogicVRegister sha2su1(LogicVRegister srcdst,
4560 const LogicVRegister& src1,
4561 const LogicVRegister& src2);
4562 LogicVRegister sha512h(LogicVRegister srcdst,
4563 const LogicVRegister& src1,
4564 const LogicVRegister& src2);
4565 LogicVRegister sha512h2(LogicVRegister srcdst,
4566 const LogicVRegister& src1,
4567 const LogicVRegister& src2);
4568 LogicVRegister sha512su0(LogicVRegister srcdst, const LogicVRegister& src1);
4569 LogicVRegister sha512su1(LogicVRegister srcdst,
4570 const LogicVRegister& src1,
4571 const LogicVRegister& src2);
4572
4573
4574 LogicVRegister aes(LogicVRegister srcdst,
4575 const LogicVRegister& src1,
4576 bool decrypt);
4577 LogicVRegister aesmix(LogicVRegister srcdst,
4578 const LogicVRegister& src1,
4579 bool inverse);
4580
4581 LogicVRegister sm3partw1(LogicVRegister dst,
4582 const LogicVRegister& src1,
4583 const LogicVRegister& src2);
4584 LogicVRegister sm3partw2(LogicVRegister dst,
4585 const LogicVRegister& src1,
4586 const LogicVRegister& src2);
4587 LogicVRegister sm3ss1(LogicVRegister dst,
4588 const LogicVRegister& src1,
4589 const LogicVRegister& src2,
4590 const LogicVRegister& src3);
4591 LogicVRegister sm3tt1(LogicVRegister srcdst,
4592 const LogicVRegister& src1,
4593 const LogicVRegister& src2,
4594 int index,
4595 bool is_a);
4596 LogicVRegister sm3tt2(LogicVRegister srcdst,
4597 const LogicVRegister& src1,
4598 const LogicVRegister& src2,
4599 int index,
4600 bool is_a);
4601
4602 LogicVRegister sm4(LogicVRegister dst,
4603 const LogicVRegister& src1,
4604 const LogicVRegister& src2,
4605 bool is_key);
4606
4607 #define NEON_3VREG_LOGIC_LIST(V) \
4608 V(addhn) \
4609 V(addhn2) \
4610 V(raddhn) \
4611 V(raddhn2) \
4612 V(subhn) \
4613 V(subhn2) \
4614 V(rsubhn) \
4615 V(rsubhn2) \
4616 V(pmull) \
4617 V(pmull2) \
4618 V(sabal) \
4619 V(sabal2) \
4620 V(uabal) \
4621 V(uabal2) \
4622 V(sabdl) \
4623 V(sabdl2) \
4624 V(uabdl) \
4625 V(uabdl2) \
4626 V(smull2) \
4627 V(umull2) \
4628 V(smlal2) \
4629 V(umlal2) \
4630 V(smlsl2) \
4631 V(umlsl2) \
4632 V(sqdmlal2) \
4633 V(sqdmlsl2) \
4634 V(sqdmull2)
4635
4636 #define DEFINE_LOGIC_FUNC(FXN) \
4637 LogicVRegister FXN(VectorFormat vform, \
4638 LogicVRegister dst, \
4639 const LogicVRegister& src1, \
4640 const LogicVRegister& src2);
4641 NEON_3VREG_LOGIC_LIST(DEFINE_LOGIC_FUNC)
4642 #undef DEFINE_LOGIC_FUNC
4643
4644 #define NEON_MULL_LIST(V) \
4645 V(smull) \
4646 V(umull) \
4647 V(smlal) \
4648 V(umlal) \
4649 V(smlsl) \
4650 V(umlsl) \
4651 V(sqdmlal) \
4652 V(sqdmlsl) \
4653 V(sqdmull)
4654
4655 #define DECLARE_NEON_MULL_OP(FN) \
4656 LogicVRegister FN(VectorFormat vform, \
4657 LogicVRegister dst, \
4658 const LogicVRegister& src1, \
4659 const LogicVRegister& src2, \
4660 bool is_2 = false);
4661 NEON_MULL_LIST(DECLARE_NEON_MULL_OP)
4662 #undef DECLARE_NEON_MULL_OP
4663
4664 #define NEON_FP3SAME_LIST(V) \
4665 V(fadd, FPAdd, false) \
4666 V(fsub, FPSub, true) \
4667 V(fmul, FPMul, true) \
4668 V(fmulx, FPMulx, true) \
4669 V(fdiv, FPDiv, true) \
4670 V(fmax, FPMax, false) \
4671 V(fmin, FPMin, false) \
4672 V(fmaxnm, FPMaxNM, false) \
4673 V(fminnm, FPMinNM, false)
4674
4675 #define DECLARE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
4676 template <typename T> \
4677 LogicVRegister FN(VectorFormat vform, \
4678 LogicVRegister dst, \
4679 const LogicVRegister& src1, \
4680 const LogicVRegister& src2); \
4681 LogicVRegister FN(VectorFormat vform, \
4682 LogicVRegister dst, \
4683 const LogicVRegister& src1, \
4684 const LogicVRegister& src2);
4685 NEON_FP3SAME_LIST(DECLARE_NEON_FP_VECTOR_OP)
4686 #undef DECLARE_NEON_FP_VECTOR_OP
4687
4688 #define NEON_FPPAIRWISE_LIST(V) \
4689 V(faddp, fadd, FPAdd) \
4690 V(fmaxp, fmax, FPMax) \
4691 V(fmaxnmp, fmaxnm, FPMaxNM) \
4692 V(fminp, fmin, FPMin) \
4693 V(fminnmp, fminnm, FPMinNM)
4694
4695 #define DECLARE_NEON_FP_PAIR_OP(FNP, FN, OP) \
4696 LogicVRegister FNP(VectorFormat vform, \
4697 LogicVRegister dst, \
4698 const LogicVRegister& src1, \
4699 const LogicVRegister& src2); \
4700 LogicVRegister FNP(VectorFormat vform, \
4701 LogicVRegister dst, \
4702 const LogicVRegister& src);
4703 NEON_FPPAIRWISE_LIST(DECLARE_NEON_FP_PAIR_OP)
4704 #undef DECLARE_NEON_FP_PAIR_OP
4705
4706 enum FrintMode {
4707 kFrintToInteger = 0,
4708 kFrintToInt32 = 32,
4709 kFrintToInt64 = 64
4710 };
4711
4712 template <typename T>
4713 LogicVRegister frecps(VectorFormat vform,
4714 LogicVRegister dst,
4715 const LogicVRegister& src1,
4716 const LogicVRegister& src2);
4717 LogicVRegister frecps(VectorFormat vform,
4718 LogicVRegister dst,
4719 const LogicVRegister& src1,
4720 const LogicVRegister& src2);
4721 template <typename T>
4722 LogicVRegister frsqrts(VectorFormat vform,
4723 LogicVRegister dst,
4724 const LogicVRegister& src1,
4725 const LogicVRegister& src2);
4726 LogicVRegister frsqrts(VectorFormat vform,
4727 LogicVRegister dst,
4728 const LogicVRegister& src1,
4729 const LogicVRegister& src2);
4730 template <typename T>
4731 LogicVRegister fmla(VectorFormat vform,
4732 LogicVRegister dst,
4733 const LogicVRegister& srca,
4734 const LogicVRegister& src1,
4735 const LogicVRegister& src2);
4736 LogicVRegister fmla(VectorFormat vform,
4737 LogicVRegister dst,
4738 const LogicVRegister& srca,
4739 const LogicVRegister& src1,
4740 const LogicVRegister& src2);
4741 template <typename T>
4742 LogicVRegister fmls(VectorFormat vform,
4743 LogicVRegister dst,
4744 const LogicVRegister& srca,
4745 const LogicVRegister& src1,
4746 const LogicVRegister& src2);
4747 LogicVRegister fmls(VectorFormat vform,
4748 LogicVRegister dst,
4749 const LogicVRegister& srca,
4750 const LogicVRegister& src1,
4751 const LogicVRegister& src2);
4752 LogicVRegister fnmul(VectorFormat vform,
4753 LogicVRegister dst,
4754 const LogicVRegister& src1,
4755 const LogicVRegister& src2);
4756
4757 LogicVRegister fmlal(VectorFormat vform,
4758 LogicVRegister dst,
4759 const LogicVRegister& src1,
4760 const LogicVRegister& src2);
4761 LogicVRegister fmlal2(VectorFormat vform,
4762 LogicVRegister dst,
4763 const LogicVRegister& src1,
4764 const LogicVRegister& src2);
4765 LogicVRegister fmlsl(VectorFormat vform,
4766 LogicVRegister dst,
4767 const LogicVRegister& src1,
4768 const LogicVRegister& src2);
4769 LogicVRegister fmlsl2(VectorFormat vform,
4770 LogicVRegister dst,
4771 const LogicVRegister& src1,
4772 const LogicVRegister& src2);
4773
4774 template <typename T>
4775 LogicVRegister fcmp(VectorFormat vform,
4776 LogicVRegister dst,
4777 const LogicVRegister& src1,
4778 const LogicVRegister& src2,
4779 Condition cond);
4780 LogicVRegister fcmp(VectorFormat vform,
4781 LogicVRegister dst,
4782 const LogicVRegister& src1,
4783 const LogicVRegister& src2,
4784 Condition cond);
4785 LogicVRegister fabscmp(VectorFormat vform,
4786 LogicVRegister dst,
4787 const LogicVRegister& src1,
4788 const LogicVRegister& src2,
4789 Condition cond);
4790 LogicVRegister fcmp_zero(VectorFormat vform,
4791 LogicVRegister dst,
4792 const LogicVRegister& src,
4793 Condition cond);
4794
4795 template <typename T>
4796 LogicVRegister fneg(VectorFormat vform,
4797 LogicVRegister dst,
4798 const LogicVRegister& src);
4799 LogicVRegister fneg(VectorFormat vform,
4800 LogicVRegister dst,
4801 const LogicVRegister& src);
4802 template <typename T>
4803 LogicVRegister frecpx(VectorFormat vform,
4804 LogicVRegister dst,
4805 const LogicVRegister& src);
4806 LogicVRegister frecpx(VectorFormat vform,
4807 LogicVRegister dst,
4808 const LogicVRegister& src);
4809 LogicVRegister ftsmul(VectorFormat vform,
4810 LogicVRegister dst,
4811 const LogicVRegister& src1,
4812 const LogicVRegister& src2);
4813 LogicVRegister ftssel(VectorFormat vform,
4814 LogicVRegister dst,
4815 const LogicVRegister& src1,
4816 const LogicVRegister& src2);
4817 LogicVRegister ftmad(VectorFormat vform,
4818 LogicVRegister dst,
4819 const LogicVRegister& src1,
4820 const LogicVRegister& src2,
4821 unsigned index);
4822 LogicVRegister fexpa(VectorFormat vform,
4823 LogicVRegister dst,
4824 const LogicVRegister& src);
4825 LogicVRegister flogb(VectorFormat vform,
4826 LogicVRegister dst,
4827 const LogicVRegister& src);
4828 template <typename T>
4829 LogicVRegister fscale(VectorFormat vform,
4830 LogicVRegister dst,
4831 const LogicVRegister& src1,
4832 const LogicVRegister& src2);
4833 LogicVRegister fscale(VectorFormat vform,
4834 LogicVRegister dst,
4835 const LogicVRegister& src1,
4836 const LogicVRegister& src2);
4837 template <typename T>
4838 LogicVRegister fabs_(VectorFormat vform,
4839 LogicVRegister dst,
4840 const LogicVRegister& src);
4841 LogicVRegister fabs_(VectorFormat vform,
4842 LogicVRegister dst,
4843 const LogicVRegister& src);
4844 LogicVRegister fabd(VectorFormat vform,
4845 LogicVRegister dst,
4846 const LogicVRegister& src1,
4847 const LogicVRegister& src2);
4848 LogicVRegister frint(VectorFormat vform,
4849 LogicVRegister dst,
4850 const LogicVRegister& src,
4851 FPRounding rounding_mode,
4852 bool inexact_exception = false,
4853 FrintMode frint_mode = kFrintToInteger);
4854 LogicVRegister fcvt(VectorFormat dst_vform,
4855 VectorFormat src_vform,
4856 LogicVRegister dst,
4857 const LogicPRegister& pg,
4858 const LogicVRegister& src);
4859 LogicVRegister fcvts(VectorFormat vform,
4860 unsigned dst_data_size_in_bits,
4861 unsigned src_data_size_in_bits,
4862 LogicVRegister dst,
4863 const LogicPRegister& pg,
4864 const LogicVRegister& src,
4865 FPRounding round,
4866 int fbits = 0);
4867 LogicVRegister fcvts(VectorFormat vform,
4868 LogicVRegister dst,
4869 const LogicVRegister& src,
4870 FPRounding rounding_mode,
4871 int fbits = 0);
4872 LogicVRegister fcvtu(VectorFormat vform,
4873 unsigned dst_data_size_in_bits,
4874 unsigned src_data_size_in_bits,
4875 LogicVRegister dst,
4876 const LogicPRegister& pg,
4877 const LogicVRegister& src,
4878 FPRounding round,
4879 int fbits = 0);
4880 LogicVRegister fcvtu(VectorFormat vform,
4881 LogicVRegister dst,
4882 const LogicVRegister& src,
4883 FPRounding rounding_mode,
4884 int fbits = 0);
4885 LogicVRegister fcvtl(VectorFormat vform,
4886 LogicVRegister dst,
4887 const LogicVRegister& src);
4888 LogicVRegister fcvtl2(VectorFormat vform,
4889 LogicVRegister dst,
4890 const LogicVRegister& src);
4891 LogicVRegister fcvtn(VectorFormat vform,
4892 LogicVRegister dst,
4893 const LogicVRegister& src);
4894 LogicVRegister fcvtn2(VectorFormat vform,
4895 LogicVRegister dst,
4896 const LogicVRegister& src);
4897 LogicVRegister fcvtxn(VectorFormat vform,
4898 LogicVRegister dst,
4899 const LogicVRegister& src);
4900 LogicVRegister fcvtxn2(VectorFormat vform,
4901 LogicVRegister dst,
4902 const LogicVRegister& src);
4903 LogicVRegister fsqrt(VectorFormat vform,
4904 LogicVRegister dst,
4905 const LogicVRegister& src);
4906 LogicVRegister frsqrte(VectorFormat vform,
4907 LogicVRegister dst,
4908 const LogicVRegister& src);
4909 LogicVRegister frecpe(VectorFormat vform,
4910 LogicVRegister dst,
4911 const LogicVRegister& src,
4912 FPRounding rounding);
4913 LogicVRegister ursqrte(VectorFormat vform,
4914 LogicVRegister dst,
4915 const LogicVRegister& src);
4916 LogicVRegister urecpe(VectorFormat vform,
4917 LogicVRegister dst,
4918 const LogicVRegister& src);
4919
4920 LogicPRegister pfalse(LogicPRegister dst);
4921 LogicPRegister pfirst(LogicPRegister dst,
4922 const LogicPRegister& pg,
4923 const LogicPRegister& src);
4924 LogicPRegister ptrue(VectorFormat vform, LogicPRegister dst, int pattern);
4925 LogicPRegister pnext(VectorFormat vform,
4926 LogicPRegister dst,
4927 const LogicPRegister& pg,
4928 const LogicPRegister& src);
4929
4930 LogicVRegister asrd(VectorFormat vform,
4931 LogicVRegister dst,
4932 const LogicVRegister& src1,
4933 int shift);
4934
4935 LogicVRegister andv(VectorFormat vform,
4936 LogicVRegister dst,
4937 const LogicPRegister& pg,
4938 const LogicVRegister& src);
4939 LogicVRegister eorv(VectorFormat vform,
4940 LogicVRegister dst,
4941 const LogicPRegister& pg,
4942 const LogicVRegister& src);
4943 LogicVRegister orv(VectorFormat vform,
4944 LogicVRegister dst,
4945 const LogicPRegister& pg,
4946 const LogicVRegister& src);
4947 LogicVRegister saddv(VectorFormat vform,
4948 LogicVRegister dst,
4949 const LogicPRegister& pg,
4950 const LogicVRegister& src);
4951 LogicVRegister sminv(VectorFormat vform,
4952 LogicVRegister dst,
4953 const LogicPRegister& pg,
4954 const LogicVRegister& src);
4955 LogicVRegister smaxv(VectorFormat vform,
4956 LogicVRegister dst,
4957 const LogicPRegister& pg,
4958 const LogicVRegister& src);
4959 LogicVRegister uaddv(VectorFormat vform,
4960 LogicVRegister dst,
4961 const LogicPRegister& pg,
4962 const LogicVRegister& src);
4963 LogicVRegister uminv(VectorFormat vform,
4964 LogicVRegister dst,
4965 const LogicPRegister& pg,
4966 const LogicVRegister& src);
4967 LogicVRegister umaxv(VectorFormat vform,
4968 LogicVRegister dst,
4969 const LogicPRegister& pg,
4970 const LogicVRegister& src);
4971
4972 LogicVRegister interleave_top_bottom(VectorFormat vform,
4973 LogicVRegister dst,
4974 const LogicVRegister& src);
4975
4976 template <typename T>
4977 struct TFPPairOp {
4978 typedef T (Simulator::*type)(T a, T b);
4979 };
4980
4981 template <typename T>
4982 LogicVRegister FPPairedAcrossHelper(VectorFormat vform,
4983 LogicVRegister dst,
4984 const LogicVRegister& src,
4985 typename TFPPairOp<T>::type fn,
4986 uint64_t inactive_value);
4987
4988 LogicVRegister FPPairedAcrossHelper(
4989 VectorFormat vform,
4990 LogicVRegister dst,
4991 const LogicVRegister& src,
4992 typename TFPPairOp<vixl::internal::SimFloat16>::type fn16,
4993 typename TFPPairOp<float>::type fn32,
4994 typename TFPPairOp<double>::type fn64,
4995 uint64_t inactive_value);
4996
4997 LogicVRegister fminv(VectorFormat vform,
4998 LogicVRegister dst,
4999 const LogicVRegister& src);
5000 LogicVRegister fmaxv(VectorFormat vform,
5001 LogicVRegister dst,
5002 const LogicVRegister& src);
5003 LogicVRegister fminnmv(VectorFormat vform,
5004 LogicVRegister dst,
5005 const LogicVRegister& src);
5006 LogicVRegister fmaxnmv(VectorFormat vform,
5007 LogicVRegister dst,
5008 const LogicVRegister& src);
5009 LogicVRegister faddv(VectorFormat vform,
5010 LogicVRegister dst,
5011 const LogicVRegister& src);
5012
5013 static const uint32_t CRC32_POLY = 0x04C11DB7;
5014 static const uint32_t CRC32C_POLY = 0x1EDC6F41;
5015 uint32_t Poly32Mod2(unsigned n, uint64_t data, uint32_t poly);
5016 template <typename T>
5017 uint32_t Crc32Checksum(uint32_t acc, T val, uint32_t poly);
5018 uint32_t Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly);
5019
5020 bool SysOp_W(int op, int64_t val);
5021
5022 template <typename T>
5023 T FPRecipSqrtEstimate(T op);
5024 template <typename T>
5025 T FPRecipEstimate(T op, FPRounding rounding);
5026 template <typename T, typename R>
5027 R FPToFixed(T op, int fbits, bool is_signed, FPRounding rounding);
5028
5029 void FPCompare(double val0, double val1, FPTrapFlags trap);
5030 double FPRoundInt(double value, FPRounding round_mode);
5031 double FPRoundInt(double value, FPRounding round_mode, FrintMode frint_mode);
5032 double FPRoundIntCommon(double value, FPRounding round_mode);
5033 double recip_sqrt_estimate(double a);
5034 double recip_estimate(double a);
5035 double FPRecipSqrtEstimate(double a);
5036 double FPRecipEstimate(double a);
5037 double FixedToDouble(int64_t src, int fbits, FPRounding round_mode);
5038 double UFixedToDouble(uint64_t src, int fbits, FPRounding round_mode);
5039 float FixedToFloat(int64_t src, int fbits, FPRounding round_mode);
5040 float UFixedToFloat(uint64_t src, int fbits, FPRounding round_mode);
5041 ::vixl::internal::SimFloat16 FixedToFloat16(int64_t src,
5042 int fbits,
5043 FPRounding round_mode);
5044 ::vixl::internal::SimFloat16 UFixedToFloat16(uint64_t src,
5045 int fbits,
5046 FPRounding round_mode);
5047 int16_t FPToInt16(double value, FPRounding rmode);
5048 int32_t FPToInt32(double value, FPRounding rmode);
5049 int64_t FPToInt64(double value, FPRounding rmode);
5050 uint16_t FPToUInt16(double value, FPRounding rmode);
5051 uint32_t FPToUInt32(double value, FPRounding rmode);
5052 uint64_t FPToUInt64(double value, FPRounding rmode);
5053 int32_t FPToFixedJS(double value);
5054
5055 template <typename T>
5056 T FPAdd(T op1, T op2);
5057
5058 template <typename T>
5059 T FPNeg(T op);
5060
5061 template <typename T>
5062 T FPDiv(T op1, T op2);
5063
5064 template <typename T>
5065 T FPMax(T a, T b);
5066
5067 template <typename T>
5068 T FPMaxNM(T a, T b);
5069
5070 template <typename T>
5071 T FPMin(T a, T b);
5072
5073 template <typename T>
5074 T FPMinNM(T a, T b);
5075
5076 template <typename T>
5077 T FPMulNaNs(T op1, T op2);
5078
5079 template <typename T>
5080 T FPMul(T op1, T op2);
5081
5082 template <typename T>
5083 T FPMulx(T op1, T op2);
5084
5085 template <typename T>
5086 T FPMulAdd(T a, T op1, T op2);
5087
5088 template <typename T>
5089 T FPSqrt(T op);
5090
5091 template <typename T>
5092 T FPSub(T op1, T op2);
5093
5094 template <typename T>
5095 T FPRecipStepFused(T op1, T op2);
5096
5097 template <typename T>
5098 T FPRSqrtStepFused(T op1, T op2);
5099
5100 // This doesn't do anything at the moment. We'll need it if we want support
5101 // for cumulative exception bits or floating-point exceptions.
5102 void FPProcessException() {}
5103
5104 bool FPProcessNaNs(const Instruction* instr);
5105
5106 // Pseudo Printf instruction
5107 void DoPrintf(const Instruction* instr);
5108
5109 // Pseudo-instructions to configure CPU features dynamically.
5110 void DoConfigureCPUFeatures(const Instruction* instr);
5111
5112 void DoSaveCPUFeatures(const Instruction* instr);
5113 void DoRestoreCPUFeatures(const Instruction* instr);
5114
5115 // General arithmetic helpers ----------------------------
5116
5117 // Add `delta` to the accumulator (`acc`), optionally saturate, then zero- or
5118 // sign-extend. Initial `acc` bits outside `n` are ignored, but the delta must
5119 // be a valid int<n>_t.
5120 uint64_t IncDecN(uint64_t acc,
5121 int64_t delta,
5122 unsigned n,
5123 bool is_saturating = false,
5124 bool is_signed = false);
5125
5126 // SVE helpers -------------------------------------------
5127 LogicVRegister SVEBitwiseLogicalUnpredicatedHelper(LogicalOp op,
5128 VectorFormat vform,
5129 LogicVRegister zd,
5130 const LogicVRegister& zn,
5131 const LogicVRegister& zm);
5132
5133 LogicPRegister SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
5134 LogicPRegister Pd,
5135 const LogicPRegister& pn,
5136 const LogicPRegister& pm);
5137
5138 LogicVRegister SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op,
5139 VectorFormat vform,
5140 LogicVRegister zd,
5141 uint64_t imm);
5142 enum UnpackType { kHiHalf, kLoHalf };
5143 enum ExtendType { kSignedExtend, kUnsignedExtend };
5144 LogicVRegister unpk(VectorFormat vform,
5145 LogicVRegister zd,
5146 const LogicVRegister& zn,
5147 UnpackType unpack_type,
5148 ExtendType extend_type);
5149
5150 LogicPRegister SVEIntCompareVectorsHelper(Condition cc,
5151 VectorFormat vform,
5152 LogicPRegister dst,
5153 const LogicPRegister& mask,
5154 const LogicVRegister& src1,
5155 const LogicVRegister& src2,
5156 bool is_wide_elements = false,
5157 FlagsUpdate flags = SetFlags);
5158
5159 void SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
5160 VectorFormat vform,
5161 SVEOffsetModifier mod);
5162
5163 // Store each active zt<i>[lane] to `addr.GetElementAddress(lane, ...)`.
5164 //
5165 // `zt_code` specifies the code of the first register (zt). Each additional
5166 // register (up to `reg_count`) is `(zt_code + i) % 32`.
5167 //
5168 // This helper calls LogZWrite in the proper way, according to `addr`.
5169 void SVEStructuredStoreHelper(VectorFormat vform,
5170 const LogicPRegister& pg,
5171 unsigned zt_code,
5172 const LogicSVEAddressVector& addr);
5173 // Load each active zt<i>[lane] from `addr.GetElementAddress(lane, ...)`.
5174 // Returns false if a load failed.
5175 bool SVEStructuredLoadHelper(VectorFormat vform,
5176 const LogicPRegister& pg,
5177 unsigned zt_code,
5178 const LogicSVEAddressVector& addr,
5179 bool is_signed = false);
5180
5181 enum SVEFaultTolerantLoadType {
5182 // - Elements active in both FFR and pg are accessed as usual. If the access
5183 // fails, the corresponding lane and all subsequent lanes are filled with
5184 // an unpredictable value, and made inactive in FFR.
5185 //
5186 // - Elements active in FFR but not pg are set to zero.
5187 //
5188 // - Elements that are not active in FFR are filled with an unpredictable
5189 // value, regardless of pg.
5190 kSVENonFaultLoad,
5191
5192 // If type == kSVEFirstFaultLoad, the behaviour is the same, except that the
5193 // first active element is always accessed, regardless of FFR, and will
5194 // generate a real fault if it is inaccessible. If the lane is not active in
5195 // FFR, the actual value loaded into the result is still unpredictable.
5196 kSVEFirstFaultLoad
5197 };
5198
5199 // Load with first-faulting or non-faulting load semantics, respecting and
5200 // updating FFR.
5201 void SVEFaultTolerantLoadHelper(VectorFormat vform,
5202 const LogicPRegister& pg,
5203 unsigned zt_code,
5204 const LogicSVEAddressVector& addr,
5205 SVEFaultTolerantLoadType type,
5206 bool is_signed);
5207
5208 LogicVRegister SVEBitwiseShiftHelper(Shift shift_op,
5209 VectorFormat vform,
5210 LogicVRegister dst,
5211 const LogicVRegister& src1,
5212 const LogicVRegister& src2,
5213 bool is_wide_elements);
5214
5215 // Pack all even- or odd-numbered elements of source vector side by side and
5216 // place in elements of lower half the destination vector, and leave the upper
5217 // half all zero.
5218 // [...| H | G | F | E | D | C | B | A ]
5219 // => [...................| G | E | C | A ]
5220 LogicVRegister pack_even_elements(VectorFormat vform,
5221 LogicVRegister dst,
5222 const LogicVRegister& src);
5223
5224 // [...| H | G | F | E | D | C | B | A ]
5225 // => [...................| H | F | D | B ]
5226 LogicVRegister pack_odd_elements(VectorFormat vform,
5227 LogicVRegister dst,
5228 const LogicVRegister& src);
5229
5230 LogicVRegister adcl(VectorFormat vform,
5231 LogicVRegister dst,
5232 const LogicVRegister& src1,
5233 const LogicVRegister& src2,
5234 bool top);
5235
5236 template <typename T>
5237 LogicVRegister FTMaddHelper(VectorFormat vform,
5238 LogicVRegister dst,
5239 const LogicVRegister& src1,
5240 const LogicVRegister& src2,
5241 uint64_t coeff_pos,
5242 uint64_t coeff_neg);
5243
5244 // Return the first or last active lane, or -1 if none are active.
5245 int GetFirstActive(VectorFormat vform, const LogicPRegister& pg) const;
5246 int GetLastActive(VectorFormat vform, const LogicPRegister& pg) const;
5247
5248 int CountActiveLanes(VectorFormat vform, const LogicPRegister& pg) const;
5249
5250 // Count active and true lanes in `pn`.
5251 int CountActiveAndTrueLanes(VectorFormat vform,
5252 const LogicPRegister& pg,
5253 const LogicPRegister& pn) const;
5254
5255 // Count the number of lanes referred to by `pattern`, given the vector
5256 // length. If `pattern` is not a recognised SVEPredicateConstraint, this
5257 // returns zero.
5258 int GetPredicateConstraintLaneCount(VectorFormat vform, int pattern) const;
5259
5260 // Simulate a runtime call.
5261 void DoRuntimeCall(const Instruction* instr);
5262
5263 // Processor state ---------------------------------------
5264
5265 // Simulated monitors for exclusive access instructions.
5266 SimExclusiveLocalMonitor local_monitor_;
5267 SimExclusiveGlobalMonitor global_monitor_;
5268
5269 // Output stream.
5270 FILE* stream_;
5271 PrintDisassembler* print_disasm_;
5272
5273 // General purpose registers. Register 31 is the stack pointer.
5274 SimRegister registers_[kNumberOfRegisters];
5275
5276 // Vector registers
5277 SimVRegister vregisters_[kNumberOfVRegisters];
5278
5279 // SVE predicate registers.
5280 SimPRegister pregisters_[kNumberOfPRegisters];
5281
5282 // SVE first-fault register.
5283 SimFFRRegister ffr_register_;
5284
5285 // A pseudo SVE predicate register with all bits set to true.
5286 SimPRegister pregister_all_true_;
5287
5288 // Program Status Register.
5289 // bits[31, 27]: Condition flags N, Z, C, and V.
5290 // (Negative, Zero, Carry, Overflow)
5291 SimSystemRegister nzcv_;
5292
5293 // Floating-Point Control Register
5294 SimSystemRegister fpcr_;
5295
5296 // Only a subset of FPCR features are supported by the simulator. This helper
5297 // checks that the FPCR settings are supported.
5298 //
5299 // This is checked when floating-point instructions are executed, not when
5300 // FPCR is set. This allows generated code to modify FPCR for external
5301 // functions, or to save and restore it when entering and leaving generated
5302 // code.
5303 void AssertSupportedFPCR() {
5304 // No flush-to-zero support.
5305 VIXL_ASSERT(ReadFpcr().GetFZ() == 0);
5306 // Ties-to-even rounding only.
5307 VIXL_ASSERT(ReadFpcr().GetRMode() == FPTieEven);
5308 // No alternative half-precision support.
5309 VIXL_ASSERT(ReadFpcr().GetAHP() == 0);
5310 }
5311
5312 static int CalcNFlag(uint64_t result, unsigned reg_size) {
5313 return (result >> (reg_size - 1)) & 1;
5314 }
5315
5316 static int CalcZFlag(uint64_t result) { return (result == 0) ? 1 : 0; }
5317
5318 static const uint32_t kConditionFlagsMask = 0xf0000000;
5319
5320 Memory memory_;
5321
5322 static const size_t kDefaultStackGuardStartSize = 0;
5323 static const size_t kDefaultStackGuardEndSize = 4 * 1024;
5324 static const size_t kDefaultStackUsableSize = 8 * 1024;
5325
5326 Decoder* decoder_;
5327 // Indicates if the pc has been modified by the instruction and should not be
5328 // automatically incremented.
5329 bool pc_modified_;
5330 const Instruction* pc_;
5331
5332 // Pointer to the last simulated instruction, used for checking the validity
5333 // of the current instruction with the previous instruction, such as movprfx.
5334 Instruction const* last_instr_;
5335
5336 // Branch type register, used for branch target identification.
5337 BType btype_;
5338
5339 // Next value of branch type register after the current instruction has been
5340 // decoded.
5341 BType next_btype_;
5342
5343 // Global flag for enabling guarded pages.
5344 // TODO: implement guarding at page granularity, rather than globally.
5345 bool guard_pages_;
5346
5347 static const char* xreg_names[];
5348 static const char* wreg_names[];
5349 static const char* breg_names[];
5350 static const char* hreg_names[];
5351 static const char* sreg_names[];
5352 static const char* dreg_names[];
5353 static const char* vreg_names[];
5354 static const char* zreg_names[];
5355 static const char* preg_names[];
5356
5357 private:
5358 using FormToVisitorFnMap =
5359 std::unordered_map<uint32_t,
5360 std::function<void(Simulator*, const Instruction*)>>;
5361 static const FormToVisitorFnMap* GetFormToVisitorFnMap();
5362
5363 uint32_t form_hash_;
5364
5365 static const PACKey kPACKeyIA;
5366 static const PACKey kPACKeyIB;
5367 static const PACKey kPACKeyDA;
5368 static const PACKey kPACKeyDB;
5369 static const PACKey kPACKeyGA;
5370
5371 bool CanReadMemory(uintptr_t address, size_t size);
5372
5373 #ifndef _WIN32
5374 // CanReadMemory needs placeholder file descriptors, so we use a pipe. We can
5375 // save some system call overhead by opening them on construction, rather than
5376 // on every call to CanReadMemory.
5377 int placeholder_pipe_fd_[2];
5378 #endif
5379
5380 template <typename T>
5381 static T FPDefaultNaN();
5382
5383 // Standard NaN processing.
5384 template <typename T>
5385 T FPProcessNaN(T op) {
5386 VIXL_ASSERT(IsNaN(op));
5387 if (IsSignallingNaN(op)) {
5388 FPProcessException();
5389 }
5390 return (ReadDN() == kUseDefaultNaN) ? FPDefaultNaN<T>() : ToQuietNaN(op);
5391 }
5392
5393 template <typename T>
5394 T FPProcessNaNs(T op1, T op2) {
5395 if (IsSignallingNaN(op1)) {
5396 return FPProcessNaN(op1);
5397 } else if (IsSignallingNaN(op2)) {
5398 return FPProcessNaN(op2);
5399 } else if (IsNaN(op1)) {
5400 VIXL_ASSERT(IsQuietNaN(op1));
5401 return FPProcessNaN(op1);
5402 } else if (IsNaN(op2)) {
5403 VIXL_ASSERT(IsQuietNaN(op2));
5404 return FPProcessNaN(op2);
5405 } else {
5406 return 0.0;
5407 }
5408 }
5409
5410 template <typename T>
5411 T FPProcessNaNs3(T op1, T op2, T op3) {
5412 if (IsSignallingNaN(op1)) {
5413 return FPProcessNaN(op1);
5414 } else if (IsSignallingNaN(op2)) {
5415 return FPProcessNaN(op2);
5416 } else if (IsSignallingNaN(op3)) {
5417 return FPProcessNaN(op3);
5418 } else if (IsNaN(op1)) {
5419 VIXL_ASSERT(IsQuietNaN(op1));
5420 return FPProcessNaN(op1);
5421 } else if (IsNaN(op2)) {
5422 VIXL_ASSERT(IsQuietNaN(op2));
5423 return FPProcessNaN(op2);
5424 } else if (IsNaN(op3)) {
5425 VIXL_ASSERT(IsQuietNaN(op3));
5426 return FPProcessNaN(op3);
5427 } else {
5428 return 0.0;
5429 }
5430 }
5431
5432 // Construct a SimVRegister from a SimPRegister, where each byte-sized lane of
5433 // the destination is set to all true (0xff) when the corresponding
5434 // predicate flag is set, and false (0x00) otherwise.
5435 SimVRegister ExpandToSimVRegister(const SimPRegister& preg);
5436
5437 // Set each predicate flag in pd where the corresponding assigned-sized lane
5438 // in vreg is non-zero. Clear the flag, otherwise. This is almost the opposite
5439 // operation to ExpandToSimVRegister(), except that any non-zero lane is
5440 // interpreted as true.
5441 void ExtractFromSimVRegister(VectorFormat vform,
5442 SimPRegister& pd, // NOLINT(runtime/references)
5443 SimVRegister vreg);
5444
5445 bool coloured_trace_;
5446
5447 // A set of TraceParameters flags.
5448 int trace_parameters_;
5449
5450 // Indicates whether the exclusive-access warning has been printed.
5451 bool print_exclusive_access_warning_;
5452 void PrintExclusiveAccessWarning();
5453
5454 CPUFeaturesAuditor cpu_features_auditor_;
5455 std::vector<CPUFeatures> saved_cpu_features_;
5456
5457 // linear_congruential_engine, used to simulate randomness with repeatable
5458 // behaviour (so that tests are deterministic). This is used to simulate RNDR
5459 // and RNDRRS, as well as to simulate a source of entropy for architecturally
5460 // undefined behaviour.
5461 std::linear_congruential_engine<uint64_t,
5462 0x5DEECE66D,
5463 0xB,
5464 static_cast<uint64_t>(1) << 48>
5465 rand_gen_;
5466
5467 // A configurable size of SVE vector registers.
5468 unsigned vector_length_;
5469
5470 // DC ZVA enable (= 0) status and block size.
5471 unsigned dczid_ = (0 << 4) | 4; // 2^4 words => 64-byte block size.
5472
5473 // Representation of memory attributes such as MTE tagging and BTI page
5474 // protection in addition to branch interceptions.
5475 MetaDataDepot meta_data_;
5476
5477 // True if the debugger is enabled and might get entered.
5478 bool debugger_enabled_;
5479
5480 // Debugger for the simulator.
5481 std::unique_ptr<Debugger> debugger_;
5482
5483 // The Guarded Control Stack is represented using a vector, where the more
5484 // recently stored addresses are at higher-numbered indices.
5485 using GuardedControlStack = std::vector<uint64_t>;
5486
5487 // The GCSManager handles the synchronisation of GCS across multiple
5488 // Simulator instances. Each Simulator has its own stack, but all share
5489 // a GCSManager instance. This allows exchanging stacks between Simulators
5490 // in a threaded application.
5491 class GCSManager {
5492 public:
5493 // Allocate a new Guarded Control Stack and add it to the vector of stacks.
5494 uint64_t AllocateStack() {
5495 const std::lock_guard<std::mutex> lock(stacks_mtx_);
5496
5497 GuardedControlStack* new_stack = new GuardedControlStack;
5498 uint64_t result;
5499
5500 // Put the new stack into the first available slot.
5501 for (result = 0; result < stacks_.size(); result++) {
5502 if (stacks_[result] == nullptr) {
5503 stacks_[result] = new_stack;
5504 break;
5505 }
5506 }
5507
5508 // If there were no slots, create a new one.
5509 if (result == stacks_.size()) {
5510 stacks_.push_back(new_stack);
5511 }
5512
5513 // Shift the index to look like a stack pointer aligned to a page.
5514 result <<= kPageSizeLog2;
5515
5516 // Push the tagged index onto the new stack as a seal.
5517 new_stack->push_back(result + 1);
5518 return result;
5519 }
5520
5521 // Free a Guarded Control Stack and set the stacks_ slot to null.
5522 void FreeStack(uint64_t gcs) {
5523 const std::lock_guard<std::mutex> lock(stacks_mtx_);
5524 uint64_t gcs_index = GetGCSIndex(gcs);
5525 GuardedControlStack* gcsptr = stacks_[gcs_index];
5526 if (gcsptr == nullptr) {
5527 VIXL_ABORT_WITH_MSG("Tried to free unallocated GCS ");
5528 } else {
5529 delete gcsptr;
5530 stacks_[gcs_index] = nullptr;
5531 }
5532 }
5533
5534 // Get a pointer to the GCS vector using a GCS id.
5535 GuardedControlStack* GetGCSPtr(uint64_t gcs) const {
5536 return stacks_[GetGCSIndex(gcs)];
5537 }
5538
5539 private:
5540 uint64_t GetGCSIndex(uint64_t gcs) const { return gcs >> 12; }
5541
5542 std::vector<GuardedControlStack*> stacks_;
5543 std::mutex stacks_mtx_;
5544 };
5545
5546 // A GCS id indicating no GCS has been allocated.
5547 static const uint64_t kGCSNoStack = kPageSize - 1;
5548 uint64_t gcs_;
5549 bool gcs_enabled_;
5550
5551 public:
5552 GCSManager& GetGCSManager() {
5553 static GCSManager manager;
5554 return manager;
5555 }
5556
5557 void EnableGCSCheck() { gcs_enabled_ = true; }
5558 void DisableGCSCheck() { gcs_enabled_ = false; }
5559 bool IsGCSCheckEnabled() const { return gcs_enabled_; }
5560
5561 private:
5562 bool IsAllocatedGCS(uint64_t gcs) const { return gcs != kGCSNoStack; }
5563 void ResetGCSState() {
5564 GCSManager& m = GetGCSManager();
5565 if (IsAllocatedGCS(gcs_)) {
5566 m.FreeStack(gcs_);
5567 }
5568 ActivateGCS(m.AllocateStack());
5569 GCSPop(); // Remove seal.
5570 }
5571
5572 GuardedControlStack* GetGCSPtr(uint64_t gcs) {
5573 GCSManager& m = GetGCSManager();
5574 GuardedControlStack* result = m.GetGCSPtr(gcs);
5575 return result;
5576 }
5577 GuardedControlStack* GetActiveGCSPtr() { return GetGCSPtr(gcs_); }
5578
5579 uint64_t ActivateGCS(uint64_t gcs) {
5580 uint64_t outgoing_gcs = gcs_;
5581 gcs_ = gcs;
5582 return outgoing_gcs;
5583 }
5584
5585 void GCSPush(uint64_t addr) {
5586 GetActiveGCSPtr()->push_back(addr);
5587 size_t entry = GetActiveGCSPtr()->size() - 1;
5588 LogGCS(/* is_push = */ true, addr, entry);
5589 }
5590
5591 uint64_t GCSPop() {
5592 GuardedControlStack* gcs = GetActiveGCSPtr();
5593 if (gcs->empty()) {
5594 return 0;
5595 }
5596 uint64_t return_addr = gcs->back();
5597 size_t entry = gcs->size() - 1;
5598 gcs->pop_back();
5599 LogGCS(/* is_push = */ false, return_addr, entry);
5600 return return_addr;
5601 }
5602
5603 uint64_t GCSPeek() {
5604 GuardedControlStack* gcs = GetActiveGCSPtr();
5605 if (gcs->empty()) {
5606 return 0;
5607 }
5608 uint64_t return_addr = gcs->back();
5609 return return_addr;
5610 }
5611
5612 void ReportGCSFailure(const char* msg) {
5613 if (IsGCSCheckEnabled()) {
5614 GuardedControlStack* gcs = GetActiveGCSPtr();
5615 printf("%s", msg);
5616 if (gcs == nullptr) {
5617 printf("GCS pointer is null\n");
5618 } else {
5619 printf("GCS records, most recent first:\n");
5620 int most_recent_index = static_cast<int>(gcs->size()) - 1;
5621 for (int i = 0; i < 8; i++) {
5622 if (!gcs->empty()) {
5623 uint64_t entry = gcs->back();
5624 gcs->pop_back();
5625 int index = most_recent_index - i;
5626 printf(" gcs%" PRIu64 "[%d]: 0x%016" PRIx64 "\n",
5627 gcs_,
5628 index,
5629 entry);
5630 }
5631 }
5632 printf("End of GCS records.\n");
5633 }
5634 VIXL_ABORT_WITH_MSG("GCS failed ");
5635 }
5636 }
5637 };
5638
5639 #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) && __cplusplus < 201402L
5640 // Base case of the recursive template used to emulate C++14
5641 // `std::index_sequence`.
5642 template <size_t... I>
5643 struct Simulator::emulated_make_index_sequence_helper<0, I...>
5644 : Simulator::emulated_index_sequence<I...> {};
5645 #endif
5646
5647 template <typename R, typename... P>
5648 void MetaDataDepot::BranchInterception<R, P...>::operator()(
5649 Simulator* simulator) const {
5650 if (callback_ == nullptr) {
5651 Simulator::RuntimeCallStructHelper<R, P...>::
5652 Wrapper(simulator, reinterpret_cast<uint64_t>(function_));
5653 } else {
5654 callback_(reinterpret_cast<uint64_t>(function_));
5655 }
5656 }
5657
5658 } // namespace aarch64
5659 } // namespace vixl
5660
5661 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
5662
5663 #endif // VIXL_AARCH64_SIMULATOR_AARCH64_H_
5664