• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_
28 #define VIXL_AARCH64_ASSEMBLER_AARCH64_H_
29 
30 #include "../assembler-base-vixl.h"
31 #include "../code-generation-scopes-vixl.h"
32 #include "../cpu-features.h"
33 #include "../globals-vixl.h"
34 #include "../invalset-vixl.h"
35 #include "../utils-vixl.h"
36 #include "operands-aarch64.h"
37 
38 namespace vixl {
39 namespace aarch64 {
40 
41 class LabelTestHelper;  // Forward declaration.
42 
43 
44 class Label {
45  public:
Label()46   Label() : location_(kLocationUnbound) {}
~Label()47   ~Label() {
48     // All links to a label must have been resolved before it is destructed.
49     VIXL_ASSERT(!IsLinked());
50   }
51 
IsBound()52   bool IsBound() const { return location_ >= 0; }
IsLinked()53   bool IsLinked() const { return !links_.empty(); }
54 
GetLocation()55   ptrdiff_t GetLocation() const { return location_; }
56   VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) {
57     return GetLocation();
58   }
59 
60   static const int kNPreallocatedLinks = 4;
61   static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
62   static const size_t kReclaimFrom = 512;
63   static const size_t kReclaimFactor = 2;
64 
65   typedef InvalSet<ptrdiff_t,
66                    kNPreallocatedLinks,
67                    ptrdiff_t,
68                    kInvalidLinkKey,
69                    kReclaimFrom,
70                    kReclaimFactor>
71       LinksSetBase;
72   typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
73 
74  private:
75   class LinksSet : public LinksSetBase {
76    public:
LinksSet()77     LinksSet() : LinksSetBase() {}
78   };
79 
80   // Allows iterating over the links of a label. The behaviour is undefined if
81   // the list of links is modified in any way while iterating.
82   class LabelLinksIterator : public LabelLinksIteratorBase {
83    public:
LabelLinksIterator(Label * label)84     explicit LabelLinksIterator(Label* label)
85         : LabelLinksIteratorBase(&label->links_) {}
86 
87     // TODO: Remove these and use the STL-like interface instead.
88     using LabelLinksIteratorBase::Advance;
89     using LabelLinksIteratorBase::Current;
90   };
91 
Bind(ptrdiff_t location)92   void Bind(ptrdiff_t location) {
93     // Labels can only be bound once.
94     VIXL_ASSERT(!IsBound());
95     location_ = location;
96   }
97 
AddLink(ptrdiff_t instruction)98   void AddLink(ptrdiff_t instruction) {
99     // If a label is bound, the assembler already has the information it needs
100     // to write the instruction, so there is no need to add it to links_.
101     VIXL_ASSERT(!IsBound());
102     links_.insert(instruction);
103   }
104 
DeleteLink(ptrdiff_t instruction)105   void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); }
106 
ClearAllLinks()107   void ClearAllLinks() { links_.clear(); }
108 
109   // TODO: The comment below considers average case complexity for our
110   // usual use-cases. The elements of interest are:
111   // - Branches to a label are emitted in order: branch instructions to a label
112   // are generated at an offset in the code generation buffer greater than any
113   // other branch to that same label already generated. As an example, this can
114   // be broken when an instruction is patched to become a branch. Note that the
115   // code will still work, but the complexity considerations below may locally
116   // not apply any more.
117   // - Veneers are generated in order: for multiple branches of the same type
118   // branching to the same unbound label going out of range, veneers are
119   // generated in growing order of the branch instruction offset from the start
120   // of the buffer.
121   //
122   // When creating a veneer for a branch going out of range, the link for this
123   // branch needs to be removed from this `links_`. Since all branches are
124   // tracked in one underlying InvalSet, the complexity for this deletion is the
125   // same as for finding the element, ie. O(n), where n is the number of links
126   // in the set.
127   // This could be reduced to O(1) by using the same trick as used when tracking
128   // branch information for veneers: split the container to use one set per type
129   // of branch. With that setup, when a veneer is created and the link needs to
130   // be deleted, if the two points above hold, it must be the minimum element of
131   // the set for its type of branch, and that minimum element will be accessible
132   // in O(1).
133 
134   // The offsets of the instructions that have linked to this label.
135   LinksSet links_;
136   // The label location.
137   ptrdiff_t location_;
138 
139   static const ptrdiff_t kLocationUnbound = -1;
140 
141 // It is not safe to copy labels, so disable the copy constructor and operator
142 // by declaring them private (without an implementation).
143 #if __cplusplus >= 201103L
144   Label(const Label&) = delete;
145   void operator=(const Label&) = delete;
146 #else
147   Label(const Label&);
148   void operator=(const Label&);
149 #endif
150 
151   // The Assembler class is responsible for binding and linking labels, since
152   // the stored offsets need to be consistent with the Assembler's buffer.
153   friend class Assembler;
154   // The MacroAssembler and VeneerPool handle resolution of branches to distant
155   // targets.
156   friend class MacroAssembler;
157   friend class VeneerPool;
158 };
159 
160 
161 class Assembler;
162 class LiteralPool;
163 
164 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
165 // stream and loaded through a pc relative load. The same literal can be
166 // referred to by multiple instructions but a literal can only reside at one
167 // place in memory. A literal can be used by a load before or after being
168 // placed in memory.
169 //
170 // Internally an offset of 0 is associated with a literal which has been
171 // neither used nor placed. Then two possibilities arise:
172 //  1) the label is placed, the offset (stored as offset + 1) is used to
173 //     resolve any subsequent load using the label.
174 //  2) the label is not placed and offset is the offset of the last load using
175 //     the literal (stored as -offset -1). If multiple loads refer to this
176 //     literal then the last load holds the offset of the preceding load and
177 //     all loads form a chain. Once the offset is placed all the loads in the
178 //     chain are resolved and future loads fall back to possibility 1.
179 class RawLiteral {
180  public:
181   enum DeletionPolicy {
182     kDeletedOnPlacementByPool,
183     kDeletedOnPoolDestruction,
184     kManuallyDeleted
185   };
186 
187   RawLiteral(size_t size,
188              LiteralPool* literal_pool,
189              DeletionPolicy deletion_policy = kManuallyDeleted);
190 
191   // The literal pool only sees and deletes `RawLiteral*` pointers, but they are
192   // actually pointing to `Literal<T>` objects.
~RawLiteral()193   virtual ~RawLiteral() {}
194 
GetSize()195   size_t GetSize() const {
196     VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
197     VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
198     VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) ||
199                 (size_ == kQRegSizeInBytes));
200     return size_;
201   }
202   VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); }
203 
GetRawValue128Low64()204   uint64_t GetRawValue128Low64() const {
205     VIXL_ASSERT(size_ == kQRegSizeInBytes);
206     return low64_;
207   }
208   VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) {
209     return GetRawValue128Low64();
210   }
211 
GetRawValue128High64()212   uint64_t GetRawValue128High64() const {
213     VIXL_ASSERT(size_ == kQRegSizeInBytes);
214     return high64_;
215   }
216   VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) {
217     return GetRawValue128High64();
218   }
219 
GetRawValue64()220   uint64_t GetRawValue64() const {
221     VIXL_ASSERT(size_ == kXRegSizeInBytes);
222     VIXL_ASSERT(high64_ == 0);
223     return low64_;
224   }
225   VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) {
226     return GetRawValue64();
227   }
228 
GetRawValue32()229   uint32_t GetRawValue32() const {
230     VIXL_ASSERT(size_ == kWRegSizeInBytes);
231     VIXL_ASSERT(high64_ == 0);
232     VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_));
233     return static_cast<uint32_t>(low64_);
234   }
235   VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) {
236     return GetRawValue32();
237   }
238 
IsUsed()239   bool IsUsed() const { return offset_ < 0; }
IsPlaced()240   bool IsPlaced() const { return offset_ > 0; }
241 
GetLiteralPool()242   LiteralPool* GetLiteralPool() const { return literal_pool_; }
243 
GetOffset()244   ptrdiff_t GetOffset() const {
245     VIXL_ASSERT(IsPlaced());
246     return offset_ - 1;
247   }
248   VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); }
249 
250  protected:
SetOffset(ptrdiff_t offset)251   void SetOffset(ptrdiff_t offset) {
252     VIXL_ASSERT(offset >= 0);
253     VIXL_ASSERT(IsWordAligned(offset));
254     VIXL_ASSERT(!IsPlaced());
255     offset_ = offset + 1;
256   }
set_offset(ptrdiff_t offset)257   VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) {
258     SetOffset(offset);
259   }
260 
GetLastUse()261   ptrdiff_t GetLastUse() const {
262     VIXL_ASSERT(IsUsed());
263     return -offset_ - 1;
264   }
265   VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); }
266 
SetLastUse(ptrdiff_t offset)267   void SetLastUse(ptrdiff_t offset) {
268     VIXL_ASSERT(offset >= 0);
269     VIXL_ASSERT(IsWordAligned(offset));
270     VIXL_ASSERT(!IsPlaced());
271     offset_ = -offset - 1;
272   }
set_last_use(ptrdiff_t offset)273   VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) {
274     SetLastUse(offset);
275   }
276 
277   size_t size_;
278   ptrdiff_t offset_;
279   uint64_t low64_;
280   uint64_t high64_;
281 
282  private:
283   LiteralPool* literal_pool_;
284   DeletionPolicy deletion_policy_;
285 
286   friend class Assembler;
287   friend class LiteralPool;
288 };
289 
290 
291 template <typename T>
292 class Literal : public RawLiteral {
293  public:
294   explicit Literal(T value,
295                    LiteralPool* literal_pool = NULL,
296                    RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(sizeof (value),literal_pool,ownership)297       : RawLiteral(sizeof(value), literal_pool, ownership) {
298     VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes);
299     UpdateValue(value);
300   }
301 
302   Literal(T high64,
303           T low64,
304           LiteralPool* literal_pool = NULL,
305           RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(kQRegSizeInBytes,literal_pool,ownership)306       : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) {
307     VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2));
308     UpdateValue(high64, low64);
309   }
310 
~Literal()311   virtual ~Literal() {}
312 
313   // Update the value of this literal, if necessary by rewriting the value in
314   // the pool.
315   // If the literal has already been placed in a literal pool, the address of
316   // the start of the code buffer must be provided, as the literal only knows it
317   // offset from there. This also allows patching the value after the code has
318   // been moved in memory.
319   void UpdateValue(T new_value, uint8_t* code_buffer = NULL) {
320     VIXL_ASSERT(sizeof(new_value) == size_);
321     memcpy(&low64_, &new_value, sizeof(new_value));
322     if (IsPlaced()) {
323       VIXL_ASSERT(code_buffer != NULL);
324       RewriteValueInCode(code_buffer);
325     }
326   }
327 
328   void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) {
329     VIXL_ASSERT(sizeof(low64) == size_ / 2);
330     memcpy(&low64_, &low64, sizeof(low64));
331     memcpy(&high64_, &high64, sizeof(high64));
332     if (IsPlaced()) {
333       VIXL_ASSERT(code_buffer != NULL);
334       RewriteValueInCode(code_buffer);
335     }
336   }
337 
338   void UpdateValue(T new_value, const Assembler* assembler);
339   void UpdateValue(T high64, T low64, const Assembler* assembler);
340 
341  private:
RewriteValueInCode(uint8_t * code_buffer)342   void RewriteValueInCode(uint8_t* code_buffer) {
343     VIXL_ASSERT(IsPlaced());
344     VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
345     switch (GetSize()) {
346       case kSRegSizeInBytes:
347         *reinterpret_cast<uint32_t*>(code_buffer + GetOffset()) =
348             GetRawValue32();
349         break;
350       case kDRegSizeInBytes:
351         *reinterpret_cast<uint64_t*>(code_buffer + GetOffset()) =
352             GetRawValue64();
353         break;
354       default:
355         VIXL_ASSERT(GetSize() == kQRegSizeInBytes);
356         uint64_t* base_address =
357             reinterpret_cast<uint64_t*>(code_buffer + GetOffset());
358         *base_address = GetRawValue128Low64();
359         *(base_address + 1) = GetRawValue128High64();
360     }
361   }
362 };
363 
364 
365 // Control whether or not position-independent code should be emitted.
366 enum PositionIndependentCodeOption {
367   // All code generated will be position-independent; all branches and
368   // references to labels generated with the Label class will use PC-relative
369   // addressing.
370   PositionIndependentCode,
371 
372   // Allow VIXL to generate code that refers to absolute addresses. With this
373   // option, it will not be possible to copy the code buffer and run it from a
374   // different address; code must be generated in its final location.
375   PositionDependentCode,
376 
377   // Allow VIXL to assume that the bottom 12 bits of the address will be
378   // constant, but that the top 48 bits may change. This allows `adrp` to
379   // function in systems which copy code between pages, but otherwise maintain
380   // 4KB page alignment.
381   PageOffsetDependentCode
382 };
383 
384 
385 // Control how scaled- and unscaled-offset loads and stores are generated.
386 enum LoadStoreScalingOption {
387   // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
388   // register-offset, pre-index or post-index instructions if necessary.
389   PreferScaledOffset,
390 
391   // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
392   // register-offset, pre-index or post-index instructions if necessary.
393   PreferUnscaledOffset,
394 
395   // Require scaled-immediate-offset instructions.
396   RequireScaledOffset,
397 
398   // Require unscaled-immediate-offset instructions.
399   RequireUnscaledOffset
400 };
401 
402 
403 // Assembler.
404 class Assembler : public vixl::internal::AssemblerBase {
405  public:
406   explicit Assembler(
407       PositionIndependentCodeOption pic = PositionIndependentCode)
pic_(pic)408       : pic_(pic), cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
409   explicit Assembler(
410       size_t capacity,
411       PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(capacity)412       : AssemblerBase(capacity),
413         pic_(pic),
414         cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
415   Assembler(byte* buffer,
416             size_t capacity,
417             PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(buffer,capacity)418       : AssemblerBase(buffer, capacity),
419         pic_(pic),
420         cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
421 
422   // Upon destruction, the code will assert that one of the following is true:
423   //  * The Assembler object has not been used.
424   //  * Nothing has been emitted since the last Reset() call.
425   //  * Nothing has been emitted since the last FinalizeCode() call.
~Assembler()426   ~Assembler() {}
427 
428   // System functions.
429 
430   // Start generating code from the beginning of the buffer, discarding any code
431   // and data that has already been emitted into the buffer.
432   void Reset();
433 
434   // Bind a label to the current PC.
435   void bind(Label* label);
436 
437   // Bind a label to a specified offset from the start of the buffer.
438   void BindToOffset(Label* label, ptrdiff_t offset);
439 
440   // Place a literal at the current PC.
441   void place(RawLiteral* literal);
442 
443   VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) {
444     return GetCursorOffset();
445   }
446 
447   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
448                   ptrdiff_t GetBufferEndOffset() const) {
449     return static_cast<ptrdiff_t>(GetBuffer().GetCapacity());
450   }
451   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
452                   ptrdiff_t BufferEndOffset() const) {
453     return GetBuffer().GetCapacity();
454   }
455 
456   // Return the address of a bound label.
457   template <typename T>
GetLabelAddress(const Label * label)458   T GetLabelAddress(const Label* label) const {
459     VIXL_ASSERT(label->IsBound());
460     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
461     return GetBuffer().GetOffsetAddress<T>(label->GetLocation());
462   }
463 
GetInstructionAt(ptrdiff_t instruction_offset)464   Instruction* GetInstructionAt(ptrdiff_t instruction_offset) {
465     return GetBuffer()->GetOffsetAddress<Instruction*>(instruction_offset);
466   }
467   VIXL_DEPRECATED("GetInstructionAt",
468                   Instruction* InstructionAt(ptrdiff_t instruction_offset)) {
469     return GetInstructionAt(instruction_offset);
470   }
471 
GetInstructionOffset(Instruction * instruction)472   ptrdiff_t GetInstructionOffset(Instruction* instruction) {
473     VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
474     ptrdiff_t offset =
475         instruction - GetBuffer()->GetStartAddress<Instruction*>();
476     VIXL_ASSERT((0 <= offset) &&
477                 (offset < static_cast<ptrdiff_t>(GetBuffer()->GetCapacity())));
478     return offset;
479   }
480   VIXL_DEPRECATED("GetInstructionOffset",
481                   ptrdiff_t InstructionOffset(Instruction* instruction)) {
482     return GetInstructionOffset(instruction);
483   }
484 
485   // Instruction set functions.
486 
487   // Branch / Jump instructions.
488 
489   // Branch to register.
490   void br(const Register& xn);
491 
492   // Branch with link to register.
493   void blr(const Register& xn);
494 
495   // Branch to register with return hint.
496   void ret(const Register& xn = lr);
497 
498   // Branch to register, with pointer authentication. Using key A and a modifier
499   // of zero [Armv8.3].
500   void braaz(const Register& xn);
501 
502   // Branch to register, with pointer authentication. Using key B and a modifier
503   // of zero [Armv8.3].
504   void brabz(const Register& xn);
505 
506   // Branch with link to register, with pointer authentication. Using key A and
507   // a modifier of zero [Armv8.3].
508   void blraaz(const Register& xn);
509 
510   // Branch with link to register, with pointer authentication. Using key B and
511   // a modifier of zero [Armv8.3].
512   void blrabz(const Register& xn);
513 
514   // Return from subroutine, with pointer authentication. Using key A [Armv8.3].
515   void retaa();
516 
517   // Return from subroutine, with pointer authentication. Using key B [Armv8.3].
518   void retab();
519 
520   // Branch to register, with pointer authentication. Using key A [Armv8.3].
521   void braa(const Register& xn, const Register& xm);
522 
523   // Branch to register, with pointer authentication. Using key B [Armv8.3].
524   void brab(const Register& xn, const Register& xm);
525 
526   // Branch with link to register, with pointer authentication. Using key A
527   // [Armv8.3].
528   void blraa(const Register& xn, const Register& xm);
529 
530   // Branch with link to register, with pointer authentication. Using key B
531   // [Armv8.3].
532   void blrab(const Register& xn, const Register& xm);
533 
534   // Unconditional branch to label.
535   void b(Label* label);
536 
537   // Conditional branch to label.
538   void b(Label* label, Condition cond);
539 
540   // Unconditional branch to PC offset.
541   void b(int64_t imm26);
542 
543   // Conditional branch to PC offset.
544   void b(int64_t imm19, Condition cond);
545 
546   // Branch with link to label.
547   void bl(Label* label);
548 
549   // Branch with link to PC offset.
550   void bl(int64_t imm26);
551 
552   // Compare and branch to label if zero.
553   void cbz(const Register& rt, Label* label);
554 
555   // Compare and branch to PC offset if zero.
556   void cbz(const Register& rt, int64_t imm19);
557 
558   // Compare and branch to label if not zero.
559   void cbnz(const Register& rt, Label* label);
560 
561   // Compare and branch to PC offset if not zero.
562   void cbnz(const Register& rt, int64_t imm19);
563 
564   // Table lookup from one register.
565   void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
566 
567   // Table lookup from two registers.
568   void tbl(const VRegister& vd,
569            const VRegister& vn,
570            const VRegister& vn2,
571            const VRegister& vm);
572 
573   // Table lookup from three registers.
574   void tbl(const VRegister& vd,
575            const VRegister& vn,
576            const VRegister& vn2,
577            const VRegister& vn3,
578            const VRegister& vm);
579 
580   // Table lookup from four registers.
581   void tbl(const VRegister& vd,
582            const VRegister& vn,
583            const VRegister& vn2,
584            const VRegister& vn3,
585            const VRegister& vn4,
586            const VRegister& vm);
587 
588   // Table lookup extension from one register.
589   void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
590 
591   // Table lookup extension from two registers.
592   void tbx(const VRegister& vd,
593            const VRegister& vn,
594            const VRegister& vn2,
595            const VRegister& vm);
596 
597   // Table lookup extension from three registers.
598   void tbx(const VRegister& vd,
599            const VRegister& vn,
600            const VRegister& vn2,
601            const VRegister& vn3,
602            const VRegister& vm);
603 
604   // Table lookup extension from four registers.
605   void tbx(const VRegister& vd,
606            const VRegister& vn,
607            const VRegister& vn2,
608            const VRegister& vn3,
609            const VRegister& vn4,
610            const VRegister& vm);
611 
612   // Test bit and branch to label if zero.
613   void tbz(const Register& rt, unsigned bit_pos, Label* label);
614 
615   // Test bit and branch to PC offset if zero.
616   void tbz(const Register& rt, unsigned bit_pos, int64_t imm14);
617 
618   // Test bit and branch to label if not zero.
619   void tbnz(const Register& rt, unsigned bit_pos, Label* label);
620 
621   // Test bit and branch to PC offset if not zero.
622   void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14);
623 
624   // Address calculation instructions.
625   // Calculate a PC-relative address. Unlike for branches the offset in adr is
626   // unscaled (i.e. the result can be unaligned).
627 
628   // Calculate the address of a label.
629   void adr(const Register& xd, Label* label);
630 
631   // Calculate the address of a PC offset.
632   void adr(const Register& xd, int64_t imm21);
633 
634   // Calculate the page address of a label.
635   void adrp(const Register& xd, Label* label);
636 
637   // Calculate the page address of a PC offset.
638   void adrp(const Register& xd, int64_t imm21);
639 
640   // Data Processing instructions.
641 
642   // Add.
643   void add(const Register& rd, const Register& rn, const Operand& operand);
644 
645   // Add and update status flags.
646   void adds(const Register& rd, const Register& rn, const Operand& operand);
647 
648   // Compare negative.
649   void cmn(const Register& rn, const Operand& operand);
650 
651   // Subtract.
652   void sub(const Register& rd, const Register& rn, const Operand& operand);
653 
654   // Subtract and update status flags.
655   void subs(const Register& rd, const Register& rn, const Operand& operand);
656 
657   // Compare.
658   void cmp(const Register& rn, const Operand& operand);
659 
660   // Negate.
661   void neg(const Register& rd, const Operand& operand);
662 
663   // Negate and update status flags.
664   void negs(const Register& rd, const Operand& operand);
665 
666   // Add with carry bit.
667   void adc(const Register& rd, const Register& rn, const Operand& operand);
668 
669   // Add with carry bit and update status flags.
670   void adcs(const Register& rd, const Register& rn, const Operand& operand);
671 
672   // Subtract with carry bit.
673   void sbc(const Register& rd, const Register& rn, const Operand& operand);
674 
675   // Subtract with carry bit and update status flags.
676   void sbcs(const Register& rd, const Register& rn, const Operand& operand);
677 
678   // Rotate register right and insert into NZCV flags under the control of a
679   // mask [Armv8.4].
680   void rmif(const Register& xn, unsigned rotation, StatusFlags flags);
681 
682   // Set NZCV flags from register, treated as an 8-bit value [Armv8.4].
683   void setf8(const Register& rn);
684 
685   // Set NZCV flags from register, treated as an 16-bit value [Armv8.4].
686   void setf16(const Register& rn);
687 
688   // Negate with carry bit.
689   void ngc(const Register& rd, const Operand& operand);
690 
691   // Negate with carry bit and update status flags.
692   void ngcs(const Register& rd, const Operand& operand);
693 
694   // Logical instructions.
695 
696   // Bitwise and (A & B).
697   void and_(const Register& rd, const Register& rn, const Operand& operand);
698 
699   // Bitwise and (A & B) and update status flags.
700   void ands(const Register& rd, const Register& rn, const Operand& operand);
701 
702   // Bit test and set flags.
703   void tst(const Register& rn, const Operand& operand);
704 
705   // Bit clear (A & ~B).
706   void bic(const Register& rd, const Register& rn, const Operand& operand);
707 
708   // Bit clear (A & ~B) and update status flags.
709   void bics(const Register& rd, const Register& rn, const Operand& operand);
710 
711   // Bitwise or (A | B).
712   void orr(const Register& rd, const Register& rn, const Operand& operand);
713 
714   // Bitwise nor (A | ~B).
715   void orn(const Register& rd, const Register& rn, const Operand& operand);
716 
717   // Bitwise eor/xor (A ^ B).
718   void eor(const Register& rd, const Register& rn, const Operand& operand);
719 
720   // Bitwise enor/xnor (A ^ ~B).
721   void eon(const Register& rd, const Register& rn, const Operand& operand);
722 
723   // Logical shift left by variable.
724   void lslv(const Register& rd, const Register& rn, const Register& rm);
725 
726   // Logical shift right by variable.
727   void lsrv(const Register& rd, const Register& rn, const Register& rm);
728 
729   // Arithmetic shift right by variable.
730   void asrv(const Register& rd, const Register& rn, const Register& rm);
731 
732   // Rotate right by variable.
733   void rorv(const Register& rd, const Register& rn, const Register& rm);
734 
735   // Bitfield instructions.
736 
737   // Bitfield move.
738   void bfm(const Register& rd,
739            const Register& rn,
740            unsigned immr,
741            unsigned imms);
742 
743   // Signed bitfield move.
744   void sbfm(const Register& rd,
745             const Register& rn,
746             unsigned immr,
747             unsigned imms);
748 
749   // Unsigned bitfield move.
750   void ubfm(const Register& rd,
751             const Register& rn,
752             unsigned immr,
753             unsigned imms);
754 
755   // Bfm aliases.
756 
757   // Bitfield insert.
bfi(const Register & rd,const Register & rn,unsigned lsb,unsigned width)758   void bfi(const Register& rd,
759            const Register& rn,
760            unsigned lsb,
761            unsigned width) {
762     VIXL_ASSERT(width >= 1);
763     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
764     bfm(rd,
765         rn,
766         (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
767         width - 1);
768   }
769 
770   // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,unsigned lsb,unsigned width)771   void bfxil(const Register& rd,
772              const Register& rn,
773              unsigned lsb,
774              unsigned width) {
775     VIXL_ASSERT(width >= 1);
776     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
777     bfm(rd, rn, lsb, lsb + width - 1);
778   }
779 
780   // Bitfield clear [Armv8.2].
bfc(const Register & rd,unsigned lsb,unsigned width)781   void bfc(const Register& rd, unsigned lsb, unsigned width) {
782     bfi(rd, AppropriateZeroRegFor(rd), lsb, width);
783   }
784 
785   // Sbfm aliases.
786 
787   // Arithmetic shift right.
asr(const Register & rd,const Register & rn,unsigned shift)788   void asr(const Register& rd, const Register& rn, unsigned shift) {
789     VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
790     sbfm(rd, rn, shift, rd.GetSizeInBits() - 1);
791   }
792 
793   // Signed bitfield insert with zero at right.
sbfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)794   void sbfiz(const Register& rd,
795              const Register& rn,
796              unsigned lsb,
797              unsigned width) {
798     VIXL_ASSERT(width >= 1);
799     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
800     sbfm(rd,
801          rn,
802          (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
803          width - 1);
804   }
805 
806   // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)807   void sbfx(const Register& rd,
808             const Register& rn,
809             unsigned lsb,
810             unsigned width) {
811     VIXL_ASSERT(width >= 1);
812     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
813     sbfm(rd, rn, lsb, lsb + width - 1);
814   }
815 
816   // Signed extend byte.
sxtb(const Register & rd,const Register & rn)817   void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); }
818 
819   // Signed extend halfword.
sxth(const Register & rd,const Register & rn)820   void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); }
821 
822   // Signed extend word.
sxtw(const Register & rd,const Register & rn)823   void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); }
824 
825   // Ubfm aliases.
826 
827   // Logical shift left.
lsl(const Register & rd,const Register & rn,unsigned shift)828   void lsl(const Register& rd, const Register& rn, unsigned shift) {
829     unsigned reg_size = rd.GetSizeInBits();
830     VIXL_ASSERT(shift < reg_size);
831     ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
832   }
833 
834   // Logical shift right.
lsr(const Register & rd,const Register & rn,unsigned shift)835   void lsr(const Register& rd, const Register& rn, unsigned shift) {
836     VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
837     ubfm(rd, rn, shift, rd.GetSizeInBits() - 1);
838   }
839 
840   // Unsigned bitfield insert with zero at right.
ubfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)841   void ubfiz(const Register& rd,
842              const Register& rn,
843              unsigned lsb,
844              unsigned width) {
845     VIXL_ASSERT(width >= 1);
846     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
847     ubfm(rd,
848          rn,
849          (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
850          width - 1);
851   }
852 
853   // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)854   void ubfx(const Register& rd,
855             const Register& rn,
856             unsigned lsb,
857             unsigned width) {
858     VIXL_ASSERT(width >= 1);
859     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
860     ubfm(rd, rn, lsb, lsb + width - 1);
861   }
862 
863   // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)864   void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); }
865 
866   // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)867   void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); }
868 
869   // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)870   void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); }
871 
872   // Extract.
873   void extr(const Register& rd,
874             const Register& rn,
875             const Register& rm,
876             unsigned lsb);
877 
878   // Conditional select: rd = cond ? rn : rm.
879   void csel(const Register& rd,
880             const Register& rn,
881             const Register& rm,
882             Condition cond);
883 
884   // Conditional select increment: rd = cond ? rn : rm + 1.
885   void csinc(const Register& rd,
886              const Register& rn,
887              const Register& rm,
888              Condition cond);
889 
890   // Conditional select inversion: rd = cond ? rn : ~rm.
891   void csinv(const Register& rd,
892              const Register& rn,
893              const Register& rm,
894              Condition cond);
895 
896   // Conditional select negation: rd = cond ? rn : -rm.
897   void csneg(const Register& rd,
898              const Register& rn,
899              const Register& rm,
900              Condition cond);
901 
902   // Conditional set: rd = cond ? 1 : 0.
903   void cset(const Register& rd, Condition cond);
904 
905   // Conditional set mask: rd = cond ? -1 : 0.
906   void csetm(const Register& rd, Condition cond);
907 
908   // Conditional increment: rd = cond ? rn + 1 : rn.
909   void cinc(const Register& rd, const Register& rn, Condition cond);
910 
911   // Conditional invert: rd = cond ? ~rn : rn.
912   void cinv(const Register& rd, const Register& rn, Condition cond);
913 
914   // Conditional negate: rd = cond ? -rn : rn.
915   void cneg(const Register& rd, const Register& rn, Condition cond);
916 
917   // Rotate right.
ror(const Register & rd,const Register & rs,unsigned shift)918   void ror(const Register& rd, const Register& rs, unsigned shift) {
919     extr(rd, rs, rs, shift);
920   }
921 
922   // Conditional comparison.
923 
924   // Conditional compare negative.
925   void ccmn(const Register& rn,
926             const Operand& operand,
927             StatusFlags nzcv,
928             Condition cond);
929 
930   // Conditional compare.
931   void ccmp(const Register& rn,
932             const Operand& operand,
933             StatusFlags nzcv,
934             Condition cond);
935 
936   // CRC-32 checksum from byte.
937   void crc32b(const Register& wd, const Register& wn, const Register& wm);
938 
939   // CRC-32 checksum from half-word.
940   void crc32h(const Register& wd, const Register& wn, const Register& wm);
941 
942   // CRC-32 checksum from word.
943   void crc32w(const Register& wd, const Register& wn, const Register& wm);
944 
945   // CRC-32 checksum from double word.
946   void crc32x(const Register& wd, const Register& wn, const Register& xm);
947 
948   // CRC-32 C checksum from byte.
949   void crc32cb(const Register& wd, const Register& wn, const Register& wm);
950 
951   // CRC-32 C checksum from half-word.
952   void crc32ch(const Register& wd, const Register& wn, const Register& wm);
953 
954   // CRC-32 C checksum from word.
955   void crc32cw(const Register& wd, const Register& wn, const Register& wm);
956 
957   // CRC-32C checksum from double word.
958   void crc32cx(const Register& wd, const Register& wn, const Register& xm);
959 
960   // Multiply.
961   void mul(const Register& rd, const Register& rn, const Register& rm);
962 
963   // Negated multiply.
964   void mneg(const Register& rd, const Register& rn, const Register& rm);
965 
966   // Signed long multiply: 32 x 32 -> 64-bit.
967   void smull(const Register& xd, const Register& wn, const Register& wm);
968 
969   // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
970   void smulh(const Register& xd, const Register& xn, const Register& xm);
971 
972   // Multiply and accumulate.
973   void madd(const Register& rd,
974             const Register& rn,
975             const Register& rm,
976             const Register& ra);
977 
978   // Multiply and subtract.
979   void msub(const Register& rd,
980             const Register& rn,
981             const Register& rm,
982             const Register& ra);
983 
984   // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
985   void smaddl(const Register& xd,
986               const Register& wn,
987               const Register& wm,
988               const Register& xa);
989 
990   // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
991   void umaddl(const Register& xd,
992               const Register& wn,
993               const Register& wm,
994               const Register& xa);
995 
996   // Unsigned long multiply: 32 x 32 -> 64-bit.
umull(const Register & xd,const Register & wn,const Register & wm)997   void umull(const Register& xd, const Register& wn, const Register& wm) {
998     umaddl(xd, wn, wm, xzr);
999   }
1000 
1001   // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
1002   void umulh(const Register& xd, const Register& xn, const Register& xm);
1003 
1004   // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1005   void smsubl(const Register& xd,
1006               const Register& wn,
1007               const Register& wm,
1008               const Register& xa);
1009 
1010   // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1011   void umsubl(const Register& xd,
1012               const Register& wn,
1013               const Register& wm,
1014               const Register& xa);
1015 
1016   // Signed integer divide.
1017   void sdiv(const Register& rd, const Register& rn, const Register& rm);
1018 
1019   // Unsigned integer divide.
1020   void udiv(const Register& rd, const Register& rn, const Register& rm);
1021 
1022   // Bit reverse.
1023   void rbit(const Register& rd, const Register& rn);
1024 
1025   // Reverse bytes in 16-bit half words.
1026   void rev16(const Register& rd, const Register& rn);
1027 
1028   // Reverse bytes in 32-bit words.
1029   void rev32(const Register& xd, const Register& xn);
1030 
1031   // Reverse bytes in 64-bit general purpose register, an alias for rev
1032   // [Armv8.2].
rev64(const Register & xd,const Register & xn)1033   void rev64(const Register& xd, const Register& xn) {
1034     VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits());
1035     rev(xd, xn);
1036   }
1037 
1038   // Reverse bytes.
1039   void rev(const Register& rd, const Register& rn);
1040 
1041   // Count leading zeroes.
1042   void clz(const Register& rd, const Register& rn);
1043 
1044   // Count leading sign bits.
1045   void cls(const Register& rd, const Register& rn);
1046 
1047   // Pointer Authentication Code for Instruction address, using key A [Armv8.3].
1048   void pacia(const Register& xd, const Register& rn);
1049 
1050   // Pointer Authentication Code for Instruction address, using key A and a
1051   // modifier of zero [Armv8.3].
1052   void paciza(const Register& xd);
1053 
1054   // Pointer Authentication Code for Instruction address, using key A, with
1055   // address in x17 and modifier in x16 [Armv8.3].
1056   void pacia1716();
1057 
1058   // Pointer Authentication Code for Instruction address, using key A, with
1059   // address in LR and modifier in SP [Armv8.3].
1060   void paciasp();
1061 
1062   // Pointer Authentication Code for Instruction address, using key A, with
1063   // address in LR and a modifier of zero [Armv8.3].
1064   void paciaz();
1065 
1066   // Pointer Authentication Code for Instruction address, using key B [Armv8.3].
1067   void pacib(const Register& xd, const Register& xn);
1068 
1069   // Pointer Authentication Code for Instruction address, using key B and a
1070   // modifier of zero [Armv8.3].
1071   void pacizb(const Register& xd);
1072 
1073   // Pointer Authentication Code for Instruction address, using key B, with
1074   // address in x17 and modifier in x16 [Armv8.3].
1075   void pacib1716();
1076 
1077   // Pointer Authentication Code for Instruction address, using key B, with
1078   // address in LR and modifier in SP [Armv8.3].
1079   void pacibsp();
1080 
1081   // Pointer Authentication Code for Instruction address, using key B, with
1082   // address in LR and a modifier of zero [Armv8.3].
1083   void pacibz();
1084 
1085   // Pointer Authentication Code for Data address, using key A [Armv8.3].
1086   void pacda(const Register& xd, const Register& xn);
1087 
1088   // Pointer Authentication Code for Data address, using key A and a modifier of
1089   // zero [Armv8.3].
1090   void pacdza(const Register& xd);
1091 
1092   // Pointer Authentication Code for Data address, using key B [Armv8.3].
1093   void pacdb(const Register& xd, const Register& xn);
1094 
1095   // Pointer Authentication Code for Data address, using key B and a modifier of
1096   // zero [Armv8.3].
1097   void pacdzb(const Register& xd);
1098 
1099   // Pointer Authentication Code, using Generic key [Armv8.3].
1100   void pacga(const Register& xd, const Register& xn, const Register& xm);
1101 
1102   // Authenticate Instruction address, using key A [Armv8.3].
1103   void autia(const Register& xd, const Register& xn);
1104 
1105   // Authenticate Instruction address, using key A and a modifier of zero
1106   // [Armv8.3].
1107   void autiza(const Register& xd);
1108 
1109   // Authenticate Instruction address, using key A, with address in x17 and
1110   // modifier in x16 [Armv8.3].
1111   void autia1716();
1112 
1113   // Authenticate Instruction address, using key A, with address in LR and
1114   // modifier in SP [Armv8.3].
1115   void autiasp();
1116 
1117   // Authenticate Instruction address, using key A, with address in LR and a
1118   // modifier of zero [Armv8.3].
1119   void autiaz();
1120 
1121   // Authenticate Instruction address, using key B [Armv8.3].
1122   void autib(const Register& xd, const Register& xn);
1123 
1124   // Authenticate Instruction address, using key B and a modifier of zero
1125   // [Armv8.3].
1126   void autizb(const Register& xd);
1127 
1128   // Authenticate Instruction address, using key B, with address in x17 and
1129   // modifier in x16 [Armv8.3].
1130   void autib1716();
1131 
1132   // Authenticate Instruction address, using key B, with address in LR and
1133   // modifier in SP [Armv8.3].
1134   void autibsp();
1135 
1136   // Authenticate Instruction address, using key B, with address in LR and a
1137   // modifier of zero [Armv8.3].
1138   void autibz();
1139 
1140   // Authenticate Data address, using key A [Armv8.3].
1141   void autda(const Register& xd, const Register& xn);
1142 
1143   // Authenticate Data address, using key A and a modifier of zero [Armv8.3].
1144   void autdza(const Register& xd);
1145 
1146   // Authenticate Data address, using key B [Armv8.3].
1147   void autdb(const Register& xd, const Register& xn);
1148 
1149   // Authenticate Data address, using key B and a modifier of zero [Armv8.3].
1150   void autdzb(const Register& xd);
1151 
1152   // Strip Pointer Authentication Code of Data address [Armv8.3].
1153   void xpacd(const Register& xd);
1154 
1155   // Strip Pointer Authentication Code of Instruction address [Armv8.3].
1156   void xpaci(const Register& xd);
1157 
1158   // Strip Pointer Authentication Code of Instruction address in LR [Armv8.3].
1159   void xpaclri();
1160 
1161   // Memory instructions.
1162 
1163   // Load integer or FP register.
1164   void ldr(const CPURegister& rt,
1165            const MemOperand& src,
1166            LoadStoreScalingOption option = PreferScaledOffset);
1167 
1168   // Store integer or FP register.
1169   void str(const CPURegister& rt,
1170            const MemOperand& dst,
1171            LoadStoreScalingOption option = PreferScaledOffset);
1172 
1173   // Load word with sign extension.
1174   void ldrsw(const Register& xt,
1175              const MemOperand& src,
1176              LoadStoreScalingOption option = PreferScaledOffset);
1177 
1178   // Load byte.
1179   void ldrb(const Register& rt,
1180             const MemOperand& src,
1181             LoadStoreScalingOption option = PreferScaledOffset);
1182 
1183   // Store byte.
1184   void strb(const Register& rt,
1185             const MemOperand& dst,
1186             LoadStoreScalingOption option = PreferScaledOffset);
1187 
1188   // Load byte with sign extension.
1189   void ldrsb(const Register& rt,
1190              const MemOperand& src,
1191              LoadStoreScalingOption option = PreferScaledOffset);
1192 
1193   // Load half-word.
1194   void ldrh(const Register& rt,
1195             const MemOperand& src,
1196             LoadStoreScalingOption option = PreferScaledOffset);
1197 
1198   // Store half-word.
1199   void strh(const Register& rt,
1200             const MemOperand& dst,
1201             LoadStoreScalingOption option = PreferScaledOffset);
1202 
1203   // Load half-word with sign extension.
1204   void ldrsh(const Register& rt,
1205              const MemOperand& src,
1206              LoadStoreScalingOption option = PreferScaledOffset);
1207 
1208   // Load integer or FP register (with unscaled offset).
1209   void ldur(const CPURegister& rt,
1210             const MemOperand& src,
1211             LoadStoreScalingOption option = PreferUnscaledOffset);
1212 
1213   // Store integer or FP register (with unscaled offset).
1214   void stur(const CPURegister& rt,
1215             const MemOperand& src,
1216             LoadStoreScalingOption option = PreferUnscaledOffset);
1217 
1218   // Load word with sign extension.
1219   void ldursw(const Register& xt,
1220               const MemOperand& src,
1221               LoadStoreScalingOption option = PreferUnscaledOffset);
1222 
1223   // Load byte (with unscaled offset).
1224   void ldurb(const Register& rt,
1225              const MemOperand& src,
1226              LoadStoreScalingOption option = PreferUnscaledOffset);
1227 
1228   // Store byte (with unscaled offset).
1229   void sturb(const Register& rt,
1230              const MemOperand& dst,
1231              LoadStoreScalingOption option = PreferUnscaledOffset);
1232 
1233   // Load byte with sign extension (and unscaled offset).
1234   void ldursb(const Register& rt,
1235               const MemOperand& src,
1236               LoadStoreScalingOption option = PreferUnscaledOffset);
1237 
1238   // Load half-word (with unscaled offset).
1239   void ldurh(const Register& rt,
1240              const MemOperand& src,
1241              LoadStoreScalingOption option = PreferUnscaledOffset);
1242 
1243   // Store half-word (with unscaled offset).
1244   void sturh(const Register& rt,
1245              const MemOperand& dst,
1246              LoadStoreScalingOption option = PreferUnscaledOffset);
1247 
1248   // Load half-word with sign extension (and unscaled offset).
1249   void ldursh(const Register& rt,
1250               const MemOperand& src,
1251               LoadStoreScalingOption option = PreferUnscaledOffset);
1252 
1253   // Load double-word with pointer authentication, using data key A and a
1254   // modifier of zero [Armv8.3].
1255   void ldraa(const Register& xt, const MemOperand& src);
1256 
1257   // Load double-word with pointer authentication, using data key B and a
1258   // modifier of zero [Armv8.3].
1259   void ldrab(const Register& xt, const MemOperand& src);
1260 
1261   // Load integer or FP register pair.
1262   void ldp(const CPURegister& rt,
1263            const CPURegister& rt2,
1264            const MemOperand& src);
1265 
1266   // Store integer or FP register pair.
1267   void stp(const CPURegister& rt,
1268            const CPURegister& rt2,
1269            const MemOperand& dst);
1270 
1271   // Load word pair with sign extension.
1272   void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src);
1273 
1274   // Load integer or FP register pair, non-temporal.
1275   void ldnp(const CPURegister& rt,
1276             const CPURegister& rt2,
1277             const MemOperand& src);
1278 
1279   // Store integer or FP register pair, non-temporal.
1280   void stnp(const CPURegister& rt,
1281             const CPURegister& rt2,
1282             const MemOperand& dst);
1283 
1284   // Load integer or FP register from literal pool.
1285   void ldr(const CPURegister& rt, RawLiteral* literal);
1286 
1287   // Load word with sign extension from literal pool.
1288   void ldrsw(const Register& xt, RawLiteral* literal);
1289 
1290   // Load integer or FP register from pc + imm19 << 2.
1291   void ldr(const CPURegister& rt, int64_t imm19);
1292 
1293   // Load word with sign extension from pc + imm19 << 2.
1294   void ldrsw(const Register& xt, int64_t imm19);
1295 
1296   // Store exclusive byte.
1297   void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1298 
1299   // Store exclusive half-word.
1300   void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1301 
1302   // Store exclusive register.
1303   void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1304 
1305   // Load exclusive byte.
1306   void ldxrb(const Register& rt, const MemOperand& src);
1307 
1308   // Load exclusive half-word.
1309   void ldxrh(const Register& rt, const MemOperand& src);
1310 
1311   // Load exclusive register.
1312   void ldxr(const Register& rt, const MemOperand& src);
1313 
1314   // Store exclusive register pair.
1315   void stxp(const Register& rs,
1316             const Register& rt,
1317             const Register& rt2,
1318             const MemOperand& dst);
1319 
1320   // Load exclusive register pair.
1321   void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1322 
1323   // Store-release exclusive byte.
1324   void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1325 
1326   // Store-release exclusive half-word.
1327   void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1328 
1329   // Store-release exclusive register.
1330   void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1331 
1332   // Load-acquire exclusive byte.
1333   void ldaxrb(const Register& rt, const MemOperand& src);
1334 
1335   // Load-acquire exclusive half-word.
1336   void ldaxrh(const Register& rt, const MemOperand& src);
1337 
1338   // Load-acquire exclusive register.
1339   void ldaxr(const Register& rt, const MemOperand& src);
1340 
1341   // Store-release exclusive register pair.
1342   void stlxp(const Register& rs,
1343              const Register& rt,
1344              const Register& rt2,
1345              const MemOperand& dst);
1346 
1347   // Load-acquire exclusive register pair.
1348   void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1349 
1350   // Store-release byte.
1351   void stlrb(const Register& rt, const MemOperand& dst);
1352 
1353   // Store-release half-word.
1354   void stlrh(const Register& rt, const MemOperand& dst);
1355 
1356   // Store-release register.
1357   void stlr(const Register& rt, const MemOperand& dst);
1358 
1359   // Load-acquire byte.
1360   void ldarb(const Register& rt, const MemOperand& src);
1361 
1362   // Load-acquire half-word.
1363   void ldarh(const Register& rt, const MemOperand& src);
1364 
1365   // Load-acquire register.
1366   void ldar(const Register& rt, const MemOperand& src);
1367 
1368   // Store LORelease byte [Armv8.1].
1369   void stllrb(const Register& rt, const MemOperand& dst);
1370 
1371   // Store LORelease half-word [Armv8.1].
1372   void stllrh(const Register& rt, const MemOperand& dst);
1373 
1374   // Store LORelease register [Armv8.1].
1375   void stllr(const Register& rt, const MemOperand& dst);
1376 
1377   // Load LORelease byte [Armv8.1].
1378   void ldlarb(const Register& rt, const MemOperand& src);
1379 
1380   // Load LORelease half-word [Armv8.1].
1381   void ldlarh(const Register& rt, const MemOperand& src);
1382 
1383   // Load LORelease register [Armv8.1].
1384   void ldlar(const Register& rt, const MemOperand& src);
1385 
1386   // Compare and Swap word or doubleword in memory [Armv8.1].
1387   void cas(const Register& rs, const Register& rt, const MemOperand& src);
1388 
1389   // Compare and Swap word or doubleword in memory [Armv8.1].
1390   void casa(const Register& rs, const Register& rt, const MemOperand& src);
1391 
1392   // Compare and Swap word or doubleword in memory [Armv8.1].
1393   void casl(const Register& rs, const Register& rt, const MemOperand& src);
1394 
1395   // Compare and Swap word or doubleword in memory [Armv8.1].
1396   void casal(const Register& rs, const Register& rt, const MemOperand& src);
1397 
1398   // Compare and Swap byte in memory [Armv8.1].
1399   void casb(const Register& rs, const Register& rt, const MemOperand& src);
1400 
1401   // Compare and Swap byte in memory [Armv8.1].
1402   void casab(const Register& rs, const Register& rt, const MemOperand& src);
1403 
1404   // Compare and Swap byte in memory [Armv8.1].
1405   void caslb(const Register& rs, const Register& rt, const MemOperand& src);
1406 
1407   // Compare and Swap byte in memory [Armv8.1].
1408   void casalb(const Register& rs, const Register& rt, const MemOperand& src);
1409 
1410   // Compare and Swap halfword in memory [Armv8.1].
1411   void cash(const Register& rs, const Register& rt, const MemOperand& src);
1412 
1413   // Compare and Swap halfword in memory [Armv8.1].
1414   void casah(const Register& rs, const Register& rt, const MemOperand& src);
1415 
1416   // Compare and Swap halfword in memory [Armv8.1].
1417   void caslh(const Register& rs, const Register& rt, const MemOperand& src);
1418 
1419   // Compare and Swap halfword in memory [Armv8.1].
1420   void casalh(const Register& rs, const Register& rt, const MemOperand& src);
1421 
1422   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1423   void casp(const Register& rs,
1424             const Register& rs2,
1425             const Register& rt,
1426             const Register& rt2,
1427             const MemOperand& src);
1428 
1429   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1430   void caspa(const Register& rs,
1431              const Register& rs2,
1432              const Register& rt,
1433              const Register& rt2,
1434              const MemOperand& src);
1435 
1436   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1437   void caspl(const Register& rs,
1438              const Register& rs2,
1439              const Register& rt,
1440              const Register& rt2,
1441              const MemOperand& src);
1442 
1443   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1444   void caspal(const Register& rs,
1445               const Register& rs2,
1446               const Register& rt,
1447               const Register& rt2,
1448               const MemOperand& src);
1449 
1450   // Store-release byte (with unscaled offset) [Armv8.4].
1451   void stlurb(const Register& rt, const MemOperand& dst);
1452 
1453   // Load-acquire RCpc Register byte (with unscaled offset) [Armv8.4].
1454   void ldapurb(const Register& rt, const MemOperand& src);
1455 
1456   // Load-acquire RCpc Register signed byte (with unscaled offset) [Armv8.4].
1457   void ldapursb(const Register& rt, const MemOperand& src);
1458 
1459   // Store-release half-word (with unscaled offset) [Armv8.4].
1460   void stlurh(const Register& rt, const MemOperand& dst);
1461 
1462   // Load-acquire RCpc Register half-word (with unscaled offset) [Armv8.4].
1463   void ldapurh(const Register& rt, const MemOperand& src);
1464 
1465   // Load-acquire RCpc Register signed half-word (with unscaled offset)
1466   // [Armv8.4].
1467   void ldapursh(const Register& rt, const MemOperand& src);
1468 
1469   // Store-release word or double-word (with unscaled offset) [Armv8.4].
1470   void stlur(const Register& rt, const MemOperand& dst);
1471 
1472   // Load-acquire RCpc Register word or double-word (with unscaled offset)
1473   // [Armv8.4].
1474   void ldapur(const Register& rt, const MemOperand& src);
1475 
1476   // Load-acquire RCpc Register signed word (with unscaled offset) [Armv8.4].
1477   void ldapursw(const Register& xt, const MemOperand& src);
1478 
1479   // Atomic add on byte in memory [Armv8.1]
1480   void ldaddb(const Register& rs, const Register& rt, const MemOperand& src);
1481 
1482   // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1]
1483   void ldaddab(const Register& rs, const Register& rt, const MemOperand& src);
1484 
1485   // Atomic add on byte in memory, with Store-release semantics [Armv8.1]
1486   void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src);
1487 
1488   // Atomic add on byte in memory, with Load-acquire and Store-release semantics
1489   // [Armv8.1]
1490   void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src);
1491 
1492   // Atomic add on halfword in memory [Armv8.1]
1493   void ldaddh(const Register& rs, const Register& rt, const MemOperand& src);
1494 
1495   // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1]
1496   void ldaddah(const Register& rs, const Register& rt, const MemOperand& src);
1497 
1498   // Atomic add on halfword in memory, with Store-release semantics [Armv8.1]
1499   void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src);
1500 
1501   // Atomic add on halfword in memory, with Load-acquire and Store-release
1502   // semantics [Armv8.1]
1503   void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src);
1504 
1505   // Atomic add on word or doubleword in memory [Armv8.1]
1506   void ldadd(const Register& rs, const Register& rt, const MemOperand& src);
1507 
1508   // Atomic add on word or doubleword in memory, with Load-acquire semantics
1509   // [Armv8.1]
1510   void ldadda(const Register& rs, const Register& rt, const MemOperand& src);
1511 
1512   // Atomic add on word or doubleword in memory, with Store-release semantics
1513   // [Armv8.1]
1514   void ldaddl(const Register& rs, const Register& rt, const MemOperand& src);
1515 
1516   // Atomic add on word or doubleword in memory, with Load-acquire and
1517   // Store-release semantics [Armv8.1]
1518   void ldaddal(const Register& rs, const Register& rt, const MemOperand& src);
1519 
1520   // Atomic bit clear on byte in memory [Armv8.1]
1521   void ldclrb(const Register& rs, const Register& rt, const MemOperand& src);
1522 
1523   // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1]
1524   void ldclrab(const Register& rs, const Register& rt, const MemOperand& src);
1525 
1526   // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1]
1527   void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src);
1528 
1529   // Atomic bit clear on byte in memory, with Load-acquire and Store-release
1530   // semantics [Armv8.1]
1531   void ldclralb(const Register& rs, const Register& rt, const MemOperand& src);
1532 
1533   // Atomic bit clear on halfword in memory [Armv8.1]
1534   void ldclrh(const Register& rs, const Register& rt, const MemOperand& src);
1535 
1536   // Atomic bit clear on halfword in memory, with Load-acquire semantics
1537   // [Armv8.1]
1538   void ldclrah(const Register& rs, const Register& rt, const MemOperand& src);
1539 
1540   // Atomic bit clear on halfword in memory, with Store-release semantics
1541   // [Armv8.1]
1542   void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src);
1543 
1544   // Atomic bit clear on halfword in memory, with Load-acquire and Store-release
1545   // semantics [Armv8.1]
1546   void ldclralh(const Register& rs, const Register& rt, const MemOperand& src);
1547 
1548   // Atomic bit clear on word or doubleword in memory [Armv8.1]
1549   void ldclr(const Register& rs, const Register& rt, const MemOperand& src);
1550 
1551   // Atomic bit clear on word or doubleword in memory, with Load-acquire
1552   // semantics [Armv8.1]
1553   void ldclra(const Register& rs, const Register& rt, const MemOperand& src);
1554 
1555   // Atomic bit clear on word or doubleword in memory, with Store-release
1556   // semantics [Armv8.1]
1557   void ldclrl(const Register& rs, const Register& rt, const MemOperand& src);
1558 
1559   // Atomic bit clear on word or doubleword in memory, with Load-acquire and
1560   // Store-release semantics [Armv8.1]
1561   void ldclral(const Register& rs, const Register& rt, const MemOperand& src);
1562 
1563   // Atomic exclusive OR on byte in memory [Armv8.1]
1564   void ldeorb(const Register& rs, const Register& rt, const MemOperand& src);
1565 
1566   // Atomic exclusive OR on byte in memory, with Load-acquire semantics
1567   // [Armv8.1]
1568   void ldeorab(const Register& rs, const Register& rt, const MemOperand& src);
1569 
1570   // Atomic exclusive OR on byte in memory, with Store-release semantics
1571   // [Armv8.1]
1572   void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src);
1573 
1574   // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release
1575   // semantics [Armv8.1]
1576   void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src);
1577 
1578   // Atomic exclusive OR on halfword in memory [Armv8.1]
1579   void ldeorh(const Register& rs, const Register& rt, const MemOperand& src);
1580 
1581   // Atomic exclusive OR on halfword in memory, with Load-acquire semantics
1582   // [Armv8.1]
1583   void ldeorah(const Register& rs, const Register& rt, const MemOperand& src);
1584 
1585   // Atomic exclusive OR on halfword in memory, with Store-release semantics
1586   // [Armv8.1]
1587   void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src);
1588 
1589   // Atomic exclusive OR on halfword in memory, with Load-acquire and
1590   // Store-release semantics [Armv8.1]
1591   void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src);
1592 
1593   // Atomic exclusive OR on word or doubleword in memory [Armv8.1]
1594   void ldeor(const Register& rs, const Register& rt, const MemOperand& src);
1595 
1596   // Atomic exclusive OR on word or doubleword in memory, with Load-acquire
1597   // semantics [Armv8.1]
1598   void ldeora(const Register& rs, const Register& rt, const MemOperand& src);
1599 
1600   // Atomic exclusive OR on word or doubleword in memory, with Store-release
1601   // semantics [Armv8.1]
1602   void ldeorl(const Register& rs, const Register& rt, const MemOperand& src);
1603 
1604   // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and
1605   // Store-release semantics [Armv8.1]
1606   void ldeoral(const Register& rs, const Register& rt, const MemOperand& src);
1607 
1608   // Atomic bit set on byte in memory [Armv8.1]
1609   void ldsetb(const Register& rs, const Register& rt, const MemOperand& src);
1610 
1611   // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1]
1612   void ldsetab(const Register& rs, const Register& rt, const MemOperand& src);
1613 
1614   // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1]
1615   void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src);
1616 
1617   // Atomic bit set on byte in memory, with Load-acquire and Store-release
1618   // semantics [Armv8.1]
1619   void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src);
1620 
1621   // Atomic bit set on halfword in memory [Armv8.1]
1622   void ldseth(const Register& rs, const Register& rt, const MemOperand& src);
1623 
1624   // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1]
1625   void ldsetah(const Register& rs, const Register& rt, const MemOperand& src);
1626 
1627   // Atomic bit set on halfword in memory, with Store-release semantics
1628   // [Armv8.1]
1629   void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src);
1630 
1631   // Atomic bit set on halfword in memory, with Load-acquire and Store-release
1632   // semantics [Armv8.1]
1633   void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src);
1634 
1635   // Atomic bit set on word or doubleword in memory [Armv8.1]
1636   void ldset(const Register& rs, const Register& rt, const MemOperand& src);
1637 
1638   // Atomic bit set on word or doubleword in memory, with Load-acquire semantics
1639   // [Armv8.1]
1640   void ldseta(const Register& rs, const Register& rt, const MemOperand& src);
1641 
1642   // Atomic bit set on word or doubleword in memory, with Store-release
1643   // semantics [Armv8.1]
1644   void ldsetl(const Register& rs, const Register& rt, const MemOperand& src);
1645 
1646   // Atomic bit set on word or doubleword in memory, with Load-acquire and
1647   // Store-release semantics [Armv8.1]
1648   void ldsetal(const Register& rs, const Register& rt, const MemOperand& src);
1649 
1650   // Atomic signed maximum on byte in memory [Armv8.1]
1651   void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src);
1652 
1653   // Atomic signed maximum on byte in memory, with Load-acquire semantics
1654   // [Armv8.1]
1655   void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src);
1656 
1657   // Atomic signed maximum on byte in memory, with Store-release semantics
1658   // [Armv8.1]
1659   void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1660 
1661   // Atomic signed maximum on byte in memory, with Load-acquire and
1662   // Store-release semantics [Armv8.1]
1663   void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1664 
1665   // Atomic signed maximum on halfword in memory [Armv8.1]
1666   void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src);
1667 
1668   // Atomic signed maximum on halfword in memory, with Load-acquire semantics
1669   // [Armv8.1]
1670   void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src);
1671 
1672   // Atomic signed maximum on halfword in memory, with Store-release semantics
1673   // [Armv8.1]
1674   void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1675 
1676   // Atomic signed maximum on halfword in memory, with Load-acquire and
1677   // Store-release semantics [Armv8.1]
1678   void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1679 
1680   // Atomic signed maximum on word or doubleword in memory [Armv8.1]
1681   void ldsmax(const Register& rs, const Register& rt, const MemOperand& src);
1682 
1683   // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1684   // semantics [Armv8.1]
1685   void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src);
1686 
1687   // Atomic signed maximum on word or doubleword in memory, with Store-release
1688   // semantics [Armv8.1]
1689   void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src);
1690 
1691   // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1692   // and Store-release semantics [Armv8.1]
1693   void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src);
1694 
1695   // Atomic signed minimum on byte in memory [Armv8.1]
1696   void ldsminb(const Register& rs, const Register& rt, const MemOperand& src);
1697 
1698   // Atomic signed minimum on byte in memory, with Load-acquire semantics
1699   // [Armv8.1]
1700   void ldsminab(const Register& rs, const Register& rt, const MemOperand& src);
1701 
1702   // Atomic signed minimum on byte in memory, with Store-release semantics
1703   // [Armv8.1]
1704   void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src);
1705 
1706   // Atomic signed minimum on byte in memory, with Load-acquire and
1707   // Store-release semantics [Armv8.1]
1708   void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src);
1709 
1710   // Atomic signed minimum on halfword in memory [Armv8.1]
1711   void ldsminh(const Register& rs, const Register& rt, const MemOperand& src);
1712 
1713   // Atomic signed minimum on halfword in memory, with Load-acquire semantics
1714   // [Armv8.1]
1715   void ldsminah(const Register& rs, const Register& rt, const MemOperand& src);
1716 
1717   // Atomic signed minimum on halfword in memory, with Store-release semantics
1718   // [Armv8.1]
1719   void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src);
1720 
1721   // Atomic signed minimum on halfword in memory, with Load-acquire and
1722   // Store-release semantics [Armv8.1]
1723   void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src);
1724 
1725   // Atomic signed minimum on word or doubleword in memory [Armv8.1]
1726   void ldsmin(const Register& rs, const Register& rt, const MemOperand& src);
1727 
1728   // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1729   // semantics [Armv8.1]
1730   void ldsmina(const Register& rs, const Register& rt, const MemOperand& src);
1731 
1732   // Atomic signed minimum on word or doubleword in memory, with Store-release
1733   // semantics [Armv8.1]
1734   void ldsminl(const Register& rs, const Register& rt, const MemOperand& src);
1735 
1736   // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1737   // and Store-release semantics [Armv8.1]
1738   void ldsminal(const Register& rs, const Register& rt, const MemOperand& src);
1739 
1740   // Atomic unsigned maximum on byte in memory [Armv8.1]
1741   void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src);
1742 
1743   // Atomic unsigned maximum on byte in memory, with Load-acquire semantics
1744   // [Armv8.1]
1745   void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src);
1746 
1747   // Atomic unsigned maximum on byte in memory, with Store-release semantics
1748   // [Armv8.1]
1749   void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1750 
1751   // Atomic unsigned maximum on byte in memory, with Load-acquire and
1752   // Store-release semantics [Armv8.1]
1753   void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1754 
1755   // Atomic unsigned maximum on halfword in memory [Armv8.1]
1756   void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src);
1757 
1758   // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics
1759   // [Armv8.1]
1760   void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src);
1761 
1762   // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1763   // [Armv8.1]
1764   void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1765 
1766   // Atomic unsigned maximum on halfword in memory, with Load-acquire and
1767   // Store-release semantics [Armv8.1]
1768   void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1769 
1770   // Atomic unsigned maximum on word or doubleword in memory [Armv8.1]
1771   void ldumax(const Register& rs, const Register& rt, const MemOperand& src);
1772 
1773   // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1774   // semantics [Armv8.1]
1775   void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src);
1776 
1777   // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1778   // semantics [Armv8.1]
1779   void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src);
1780 
1781   // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1782   // and Store-release semantics [Armv8.1]
1783   void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src);
1784 
1785   // Atomic unsigned minimum on byte in memory [Armv8.1]
1786   void lduminb(const Register& rs, const Register& rt, const MemOperand& src);
1787 
1788   // Atomic unsigned minimum on byte in memory, with Load-acquire semantics
1789   // [Armv8.1]
1790   void lduminab(const Register& rs, const Register& rt, const MemOperand& src);
1791 
1792   // Atomic unsigned minimum on byte in memory, with Store-release semantics
1793   // [Armv8.1]
1794   void lduminlb(const Register& rs, const Register& rt, const MemOperand& src);
1795 
1796   // Atomic unsigned minimum on byte in memory, with Load-acquire and
1797   // Store-release semantics [Armv8.1]
1798   void lduminalb(const Register& rs, const Register& rt, const MemOperand& src);
1799 
1800   // Atomic unsigned minimum on halfword in memory [Armv8.1]
1801   void lduminh(const Register& rs, const Register& rt, const MemOperand& src);
1802 
1803   // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics
1804   // [Armv8.1]
1805   void lduminah(const Register& rs, const Register& rt, const MemOperand& src);
1806 
1807   // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1808   // [Armv8.1]
1809   void lduminlh(const Register& rs, const Register& rt, const MemOperand& src);
1810 
1811   // Atomic unsigned minimum on halfword in memory, with Load-acquire and
1812   // Store-release semantics [Armv8.1]
1813   void lduminalh(const Register& rs, const Register& rt, const MemOperand& src);
1814 
1815   // Atomic unsigned minimum on word or doubleword in memory [Armv8.1]
1816   void ldumin(const Register& rs, const Register& rt, const MemOperand& src);
1817 
1818   // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1819   // semantics [Armv8.1]
1820   void ldumina(const Register& rs, const Register& rt, const MemOperand& src);
1821 
1822   // Atomic unsigned minimum on word or doubleword in memory, with Store-release
1823   // semantics [Armv8.1]
1824   void lduminl(const Register& rs, const Register& rt, const MemOperand& src);
1825 
1826   // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1827   // and Store-release semantics [Armv8.1]
1828   void lduminal(const Register& rs, const Register& rt, const MemOperand& src);
1829 
1830   // Atomic add on byte in memory, without return. [Armv8.1]
1831   void staddb(const Register& rs, const MemOperand& src);
1832 
1833   // Atomic add on byte in memory, with Store-release semantics and without
1834   // return. [Armv8.1]
1835   void staddlb(const Register& rs, const MemOperand& src);
1836 
1837   // Atomic add on halfword in memory, without return. [Armv8.1]
1838   void staddh(const Register& rs, const MemOperand& src);
1839 
1840   // Atomic add on halfword in memory, with Store-release semantics and without
1841   // return. [Armv8.1]
1842   void staddlh(const Register& rs, const MemOperand& src);
1843 
1844   // Atomic add on word or doubleword in memory, without return. [Armv8.1]
1845   void stadd(const Register& rs, const MemOperand& src);
1846 
1847   // Atomic add on word or doubleword in memory, with Store-release semantics
1848   // and without return. [Armv8.1]
1849   void staddl(const Register& rs, const MemOperand& src);
1850 
1851   // Atomic bit clear on byte in memory, without return. [Armv8.1]
1852   void stclrb(const Register& rs, const MemOperand& src);
1853 
1854   // Atomic bit clear on byte in memory, with Store-release semantics and
1855   // without return. [Armv8.1]
1856   void stclrlb(const Register& rs, const MemOperand& src);
1857 
1858   // Atomic bit clear on halfword in memory, without return. [Armv8.1]
1859   void stclrh(const Register& rs, const MemOperand& src);
1860 
1861   // Atomic bit clear on halfword in memory, with Store-release semantics and
1862   // without return. [Armv8.1]
1863   void stclrlh(const Register& rs, const MemOperand& src);
1864 
1865   // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1]
1866   void stclr(const Register& rs, const MemOperand& src);
1867 
1868   // Atomic bit clear on word or doubleword in memory, with Store-release
1869   // semantics and without return. [Armv8.1]
1870   void stclrl(const Register& rs, const MemOperand& src);
1871 
1872   // Atomic exclusive OR on byte in memory, without return. [Armv8.1]
1873   void steorb(const Register& rs, const MemOperand& src);
1874 
1875   // Atomic exclusive OR on byte in memory, with Store-release semantics and
1876   // without return. [Armv8.1]
1877   void steorlb(const Register& rs, const MemOperand& src);
1878 
1879   // Atomic exclusive OR on halfword in memory, without return. [Armv8.1]
1880   void steorh(const Register& rs, const MemOperand& src);
1881 
1882   // Atomic exclusive OR on halfword in memory, with Store-release semantics
1883   // and without return. [Armv8.1]
1884   void steorlh(const Register& rs, const MemOperand& src);
1885 
1886   // Atomic exclusive OR on word or doubleword in memory, without return.
1887   // [Armv8.1]
1888   void steor(const Register& rs, const MemOperand& src);
1889 
1890   // Atomic exclusive OR on word or doubleword in memory, with Store-release
1891   // semantics and without return. [Armv8.1]
1892   void steorl(const Register& rs, const MemOperand& src);
1893 
1894   // Atomic bit set on byte in memory, without return. [Armv8.1]
1895   void stsetb(const Register& rs, const MemOperand& src);
1896 
1897   // Atomic bit set on byte in memory, with Store-release semantics and without
1898   // return. [Armv8.1]
1899   void stsetlb(const Register& rs, const MemOperand& src);
1900 
1901   // Atomic bit set on halfword in memory, without return. [Armv8.1]
1902   void stseth(const Register& rs, const MemOperand& src);
1903 
1904   // Atomic bit set on halfword in memory, with Store-release semantics and
1905   // without return. [Armv8.1]
1906   void stsetlh(const Register& rs, const MemOperand& src);
1907 
1908   // Atomic bit set on word or doubleword in memory, without return. [Armv8.1]
1909   void stset(const Register& rs, const MemOperand& src);
1910 
1911   // Atomic bit set on word or doubleword in memory, with Store-release
1912   // semantics and without return. [Armv8.1]
1913   void stsetl(const Register& rs, const MemOperand& src);
1914 
1915   // Atomic signed maximum on byte in memory, without return. [Armv8.1]
1916   void stsmaxb(const Register& rs, const MemOperand& src);
1917 
1918   // Atomic signed maximum on byte in memory, with Store-release semantics and
1919   // without return. [Armv8.1]
1920   void stsmaxlb(const Register& rs, const MemOperand& src);
1921 
1922   // Atomic signed maximum on halfword in memory, without return. [Armv8.1]
1923   void stsmaxh(const Register& rs, const MemOperand& src);
1924 
1925   // Atomic signed maximum on halfword in memory, with Store-release semantics
1926   // and without return. [Armv8.1]
1927   void stsmaxlh(const Register& rs, const MemOperand& src);
1928 
1929   // Atomic signed maximum on word or doubleword in memory, without return.
1930   // [Armv8.1]
1931   void stsmax(const Register& rs, const MemOperand& src);
1932 
1933   // Atomic signed maximum on word or doubleword in memory, with Store-release
1934   // semantics and without return. [Armv8.1]
1935   void stsmaxl(const Register& rs, const MemOperand& src);
1936 
1937   // Atomic signed minimum on byte in memory, without return. [Armv8.1]
1938   void stsminb(const Register& rs, const MemOperand& src);
1939 
1940   // Atomic signed minimum on byte in memory, with Store-release semantics and
1941   // without return. [Armv8.1]
1942   void stsminlb(const Register& rs, const MemOperand& src);
1943 
1944   // Atomic signed minimum on halfword in memory, without return. [Armv8.1]
1945   void stsminh(const Register& rs, const MemOperand& src);
1946 
1947   // Atomic signed minimum on halfword in memory, with Store-release semantics
1948   // and without return. [Armv8.1]
1949   void stsminlh(const Register& rs, const MemOperand& src);
1950 
1951   // Atomic signed minimum on word or doubleword in memory, without return.
1952   // [Armv8.1]
1953   void stsmin(const Register& rs, const MemOperand& src);
1954 
1955   // Atomic signed minimum on word or doubleword in memory, with Store-release
1956   // semantics and without return. semantics [Armv8.1]
1957   void stsminl(const Register& rs, const MemOperand& src);
1958 
1959   // Atomic unsigned maximum on byte in memory, without return. [Armv8.1]
1960   void stumaxb(const Register& rs, const MemOperand& src);
1961 
1962   // Atomic unsigned maximum on byte in memory, with Store-release semantics and
1963   // without return. [Armv8.1]
1964   void stumaxlb(const Register& rs, const MemOperand& src);
1965 
1966   // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1]
1967   void stumaxh(const Register& rs, const MemOperand& src);
1968 
1969   // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1970   // and without return. [Armv8.1]
1971   void stumaxlh(const Register& rs, const MemOperand& src);
1972 
1973   // Atomic unsigned maximum on word or doubleword in memory, without return.
1974   // [Armv8.1]
1975   void stumax(const Register& rs, const MemOperand& src);
1976 
1977   // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1978   // semantics and without return. [Armv8.1]
1979   void stumaxl(const Register& rs, const MemOperand& src);
1980 
1981   // Atomic unsigned minimum on byte in memory, without return. [Armv8.1]
1982   void stuminb(const Register& rs, const MemOperand& src);
1983 
1984   // Atomic unsigned minimum on byte in memory, with Store-release semantics and
1985   // without return. [Armv8.1]
1986   void stuminlb(const Register& rs, const MemOperand& src);
1987 
1988   // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1]
1989   void stuminh(const Register& rs, const MemOperand& src);
1990 
1991   // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1992   // and without return. [Armv8.1]
1993   void stuminlh(const Register& rs, const MemOperand& src);
1994 
1995   // Atomic unsigned minimum on word or doubleword in memory, without return.
1996   // [Armv8.1]
1997   void stumin(const Register& rs, const MemOperand& src);
1998 
1999   // Atomic unsigned minimum on word or doubleword in memory, with Store-release
2000   // semantics and without return. [Armv8.1]
2001   void stuminl(const Register& rs, const MemOperand& src);
2002 
2003   // Swap byte in memory [Armv8.1]
2004   void swpb(const Register& rs, const Register& rt, const MemOperand& src);
2005 
2006   // Swap byte in memory, with Load-acquire semantics [Armv8.1]
2007   void swpab(const Register& rs, const Register& rt, const MemOperand& src);
2008 
2009   // Swap byte in memory, with Store-release semantics [Armv8.1]
2010   void swplb(const Register& rs, const Register& rt, const MemOperand& src);
2011 
2012   // Swap byte in memory, with Load-acquire and Store-release semantics
2013   // [Armv8.1]
2014   void swpalb(const Register& rs, const Register& rt, const MemOperand& src);
2015 
2016   // Swap halfword in memory [Armv8.1]
2017   void swph(const Register& rs, const Register& rt, const MemOperand& src);
2018 
2019   // Swap halfword in memory, with Load-acquire semantics [Armv8.1]
2020   void swpah(const Register& rs, const Register& rt, const MemOperand& src);
2021 
2022   // Swap halfword in memory, with Store-release semantics [Armv8.1]
2023   void swplh(const Register& rs, const Register& rt, const MemOperand& src);
2024 
2025   // Swap halfword in memory, with Load-acquire and Store-release semantics
2026   // [Armv8.1]
2027   void swpalh(const Register& rs, const Register& rt, const MemOperand& src);
2028 
2029   // Swap word or doubleword in memory [Armv8.1]
2030   void swp(const Register& rs, const Register& rt, const MemOperand& src);
2031 
2032   // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1]
2033   void swpa(const Register& rs, const Register& rt, const MemOperand& src);
2034 
2035   // Swap word or doubleword in memory, with Store-release semantics [Armv8.1]
2036   void swpl(const Register& rs, const Register& rt, const MemOperand& src);
2037 
2038   // Swap word or doubleword in memory, with Load-acquire and Store-release
2039   // semantics [Armv8.1]
2040   void swpal(const Register& rs, const Register& rt, const MemOperand& src);
2041 
2042   // Load-Acquire RCpc Register byte [Armv8.3]
2043   void ldaprb(const Register& rt, const MemOperand& src);
2044 
2045   // Load-Acquire RCpc Register halfword [Armv8.3]
2046   void ldaprh(const Register& rt, const MemOperand& src);
2047 
2048   // Load-Acquire RCpc Register word or doubleword [Armv8.3]
2049   void ldapr(const Register& rt, const MemOperand& src);
2050 
2051   // Prefetch memory.
2052   void prfm(PrefetchOperation op,
2053             const MemOperand& addr,
2054             LoadStoreScalingOption option = PreferScaledOffset);
2055 
2056   // Prefetch memory (with unscaled offset).
2057   void prfum(PrefetchOperation op,
2058              const MemOperand& addr,
2059              LoadStoreScalingOption option = PreferUnscaledOffset);
2060 
2061   // Prefetch memory in the literal pool.
2062   void prfm(PrefetchOperation op, RawLiteral* literal);
2063 
2064   // Prefetch from pc + imm19 << 2.
2065   void prfm(PrefetchOperation op, int64_t imm19);
2066 
2067   // Prefetch memory (allowing unallocated hints).
2068   void prfm(int op,
2069             const MemOperand& addr,
2070             LoadStoreScalingOption option = PreferScaledOffset);
2071 
2072   // Prefetch memory (with unscaled offset, allowing unallocated hints).
2073   void prfum(int op,
2074              const MemOperand& addr,
2075              LoadStoreScalingOption option = PreferUnscaledOffset);
2076 
2077   // Prefetch memory in the literal pool (allowing unallocated hints).
2078   void prfm(int op, RawLiteral* literal);
2079 
2080   // Prefetch from pc + imm19 << 2 (allowing unallocated hints).
2081   void prfm(int op, int64_t imm19);
2082 
2083   // Move instructions. The default shift of -1 indicates that the move
2084   // instruction will calculate an appropriate 16-bit immediate and left shift
2085   // that is equal to the 64-bit immediate argument. If an explicit left shift
2086   // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
2087   //
2088   // For movk, an explicit shift can be used to indicate which half word should
2089   // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
2090   // half word with zero, whereas movk(x0, 0, 48) will overwrite the
2091   // most-significant.
2092 
2093   // Move immediate and keep.
2094   void movk(const Register& rd, uint64_t imm, int shift = -1) {
2095     MoveWide(rd, imm, shift, MOVK);
2096   }
2097 
2098   // Move inverted immediate.
2099   void movn(const Register& rd, uint64_t imm, int shift = -1) {
2100     MoveWide(rd, imm, shift, MOVN);
2101   }
2102 
2103   // Move immediate.
2104   void movz(const Register& rd, uint64_t imm, int shift = -1) {
2105     MoveWide(rd, imm, shift, MOVZ);
2106   }
2107 
2108   // Misc instructions.
2109 
2110   // Monitor debug-mode breakpoint.
2111   void brk(int code);
2112 
2113   // Halting debug-mode breakpoint.
2114   void hlt(int code);
2115 
2116   // Generate exception targeting EL1.
2117   void svc(int code);
2118 
2119   // Generate undefined instruction exception.
2120   void udf(int code);
2121 
2122   // Move register to register.
2123   void mov(const Register& rd, const Register& rn);
2124 
2125   // Move inverted operand to register.
2126   void mvn(const Register& rd, const Operand& operand);
2127 
2128   // System instructions.
2129 
2130   // Move to register from system register.
2131   void mrs(const Register& xt, SystemRegister sysreg);
2132 
2133   // Move from register to system register.
2134   void msr(SystemRegister sysreg, const Register& xt);
2135 
2136   // Invert carry flag [Armv8.4].
2137   void cfinv();
2138 
2139   // Convert floating-point condition flags from alternative format to Arm
2140   // format [Armv8.5].
2141   void xaflag();
2142 
2143   // Convert floating-point condition flags from Arm format to alternative
2144   // format [Armv8.5].
2145   void axflag();
2146 
2147   // System instruction.
2148   void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr);
2149 
2150   // System instruction with pre-encoded op (op1:crn:crm:op2).
2151   void sys(int op, const Register& xt = xzr);
2152 
2153   // System data cache operation.
2154   void dc(DataCacheOp op, const Register& rt);
2155 
2156   // System instruction cache operation.
2157   void ic(InstructionCacheOp op, const Register& rt);
2158 
2159   // System hint (named type).
2160   void hint(SystemHint code);
2161 
2162   // System hint (numbered type).
2163   void hint(int imm7);
2164 
2165   // Clear exclusive monitor.
2166   void clrex(int imm4 = 0xf);
2167 
2168   // Data memory barrier.
2169   void dmb(BarrierDomain domain, BarrierType type);
2170 
2171   // Data synchronization barrier.
2172   void dsb(BarrierDomain domain, BarrierType type);
2173 
2174   // Instruction synchronization barrier.
2175   void isb();
2176 
2177   // Error synchronization barrier.
2178   void esb();
2179 
2180   // Conditional speculation dependency barrier.
2181   void csdb();
2182 
2183   // No-op.
nop()2184   void nop() { hint(NOP); }
2185 
2186   // Branch target identification.
2187   void bti(BranchTargetIdentifier id);
2188 
2189   // FP and NEON instructions.
2190 
2191   // Move double precision immediate to FP register.
2192   void fmov(const VRegister& vd, double imm);
2193 
2194   // Move single precision immediate to FP register.
2195   void fmov(const VRegister& vd, float imm);
2196 
2197   // Move half precision immediate to FP register [Armv8.2].
2198   void fmov(const VRegister& vd, Float16 imm);
2199 
2200   // Move FP register to register.
2201   void fmov(const Register& rd, const VRegister& fn);
2202 
2203   // Move register to FP register.
2204   void fmov(const VRegister& vd, const Register& rn);
2205 
2206   // Move FP register to FP register.
2207   void fmov(const VRegister& vd, const VRegister& fn);
2208 
2209   // Move 64-bit register to top half of 128-bit FP register.
2210   void fmov(const VRegister& vd, int index, const Register& rn);
2211 
2212   // Move top half of 128-bit FP register to 64-bit register.
2213   void fmov(const Register& rd, const VRegister& vn, int index);
2214 
2215   // FP add.
2216   void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2217 
2218   // FP subtract.
2219   void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2220 
2221   // FP multiply.
2222   void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2223 
2224   // FP fused multiply-add.
2225   void fmadd(const VRegister& vd,
2226              const VRegister& vn,
2227              const VRegister& vm,
2228              const VRegister& va);
2229 
2230   // FP fused multiply-subtract.
2231   void fmsub(const VRegister& vd,
2232              const VRegister& vn,
2233              const VRegister& vm,
2234              const VRegister& va);
2235 
2236   // FP fused multiply-add and negate.
2237   void fnmadd(const VRegister& vd,
2238               const VRegister& vn,
2239               const VRegister& vm,
2240               const VRegister& va);
2241 
2242   // FP fused multiply-subtract and negate.
2243   void fnmsub(const VRegister& vd,
2244               const VRegister& vn,
2245               const VRegister& vm,
2246               const VRegister& va);
2247 
2248   // FP multiply-negate scalar.
2249   void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2250 
2251   // FP reciprocal exponent scalar.
2252   void frecpx(const VRegister& vd, const VRegister& vn);
2253 
2254   // FP divide.
2255   void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2256 
2257   // FP maximum.
2258   void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2259 
2260   // FP minimum.
2261   void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2262 
2263   // FP maximum number.
2264   void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2265 
2266   // FP minimum number.
2267   void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2268 
2269   // FP absolute.
2270   void fabs(const VRegister& vd, const VRegister& vn);
2271 
2272   // FP negate.
2273   void fneg(const VRegister& vd, const VRegister& vn);
2274 
2275   // FP square root.
2276   void fsqrt(const VRegister& vd, const VRegister& vn);
2277 
2278   // FP round to integer, nearest with ties to away.
2279   void frinta(const VRegister& vd, const VRegister& vn);
2280 
2281   // FP round to integer, implicit rounding.
2282   void frinti(const VRegister& vd, const VRegister& vn);
2283 
2284   // FP round to integer, toward minus infinity.
2285   void frintm(const VRegister& vd, const VRegister& vn);
2286 
2287   // FP round to integer, nearest with ties to even.
2288   void frintn(const VRegister& vd, const VRegister& vn);
2289 
2290   // FP round to integer, toward plus infinity.
2291   void frintp(const VRegister& vd, const VRegister& vn);
2292 
2293   // FP round to integer, exact, implicit rounding.
2294   void frintx(const VRegister& vd, const VRegister& vn);
2295 
2296   // FP round to integer, towards zero.
2297   void frintz(const VRegister& vd, const VRegister& vn);
2298 
2299   // FP round to 32-bit integer, exact, implicit rounding [Armv8.5].
2300   void frint32x(const VRegister& vd, const VRegister& vn);
2301 
2302   // FP round to 32-bit integer, towards zero [Armv8.5].
2303   void frint32z(const VRegister& vd, const VRegister& vn);
2304 
2305   // FP round to 64-bit integer, exact, implicit rounding [Armv8.5].
2306   void frint64x(const VRegister& vd, const VRegister& vn);
2307 
2308   // FP round to 64-bit integer, towards zero [Armv8.5].
2309   void frint64z(const VRegister& vd, const VRegister& vn);
2310 
2311   void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap);
2312 
2313   void FPCompareMacro(const VRegister& vn,
2314                       const VRegister& vm,
2315                       FPTrapFlags trap);
2316 
2317   // FP compare registers.
2318   void fcmp(const VRegister& vn, const VRegister& vm);
2319 
2320   // FP compare immediate.
2321   void fcmp(const VRegister& vn, double value);
2322 
2323   void FPCCompareMacro(const VRegister& vn,
2324                        const VRegister& vm,
2325                        StatusFlags nzcv,
2326                        Condition cond,
2327                        FPTrapFlags trap);
2328 
2329   // FP conditional compare.
2330   void fccmp(const VRegister& vn,
2331              const VRegister& vm,
2332              StatusFlags nzcv,
2333              Condition cond);
2334 
2335   // FP signaling compare registers.
2336   void fcmpe(const VRegister& vn, const VRegister& vm);
2337 
2338   // FP signaling compare immediate.
2339   void fcmpe(const VRegister& vn, double value);
2340 
2341   // FP conditional signaling compare.
2342   void fccmpe(const VRegister& vn,
2343               const VRegister& vm,
2344               StatusFlags nzcv,
2345               Condition cond);
2346 
2347   // FP conditional select.
2348   void fcsel(const VRegister& vd,
2349              const VRegister& vn,
2350              const VRegister& vm,
2351              Condition cond);
2352 
2353   // Common FP Convert functions.
2354   void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
2355   void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2356   void NEONFP16ConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2357 
2358   // FP convert between precisions.
2359   void fcvt(const VRegister& vd, const VRegister& vn);
2360 
2361   // FP convert to higher precision.
2362   void fcvtl(const VRegister& vd, const VRegister& vn);
2363 
2364   // FP convert to higher precision (second part).
2365   void fcvtl2(const VRegister& vd, const VRegister& vn);
2366 
2367   // FP convert to lower precision.
2368   void fcvtn(const VRegister& vd, const VRegister& vn);
2369 
2370   // FP convert to lower prevision (second part).
2371   void fcvtn2(const VRegister& vd, const VRegister& vn);
2372 
2373   // FP convert to lower precision, rounding to odd.
2374   void fcvtxn(const VRegister& vd, const VRegister& vn);
2375 
2376   // FP convert to lower precision, rounding to odd (second part).
2377   void fcvtxn2(const VRegister& vd, const VRegister& vn);
2378 
2379   // FP convert to signed integer, nearest with ties to away.
2380   void fcvtas(const Register& rd, const VRegister& vn);
2381 
2382   // FP convert to unsigned integer, nearest with ties to away.
2383   void fcvtau(const Register& rd, const VRegister& vn);
2384 
2385   // FP convert to signed integer, nearest with ties to away.
2386   void fcvtas(const VRegister& vd, const VRegister& vn);
2387 
2388   // FP convert to unsigned integer, nearest with ties to away.
2389   void fcvtau(const VRegister& vd, const VRegister& vn);
2390 
2391   // FP convert to signed integer, round towards -infinity.
2392   void fcvtms(const Register& rd, const VRegister& vn);
2393 
2394   // FP convert to unsigned integer, round towards -infinity.
2395   void fcvtmu(const Register& rd, const VRegister& vn);
2396 
2397   // FP convert to signed integer, round towards -infinity.
2398   void fcvtms(const VRegister& vd, const VRegister& vn);
2399 
2400   // FP convert to unsigned integer, round towards -infinity.
2401   void fcvtmu(const VRegister& vd, const VRegister& vn);
2402 
2403   // FP convert to signed integer, nearest with ties to even.
2404   void fcvtns(const Register& rd, const VRegister& vn);
2405 
2406   // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
2407   void fjcvtzs(const Register& rd, const VRegister& vn);
2408 
2409   // FP convert to unsigned integer, nearest with ties to even.
2410   void fcvtnu(const Register& rd, const VRegister& vn);
2411 
2412   // FP convert to signed integer, nearest with ties to even.
2413   void fcvtns(const VRegister& rd, const VRegister& vn);
2414 
2415   // FP convert to unsigned integer, nearest with ties to even.
2416   void fcvtnu(const VRegister& rd, const VRegister& vn);
2417 
2418   // FP convert to signed integer or fixed-point, round towards zero.
2419   void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2420 
2421   // FP convert to unsigned integer or fixed-point, round towards zero.
2422   void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2423 
2424   // FP convert to signed integer or fixed-point, round towards zero.
2425   void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2426 
2427   // FP convert to unsigned integer or fixed-point, round towards zero.
2428   void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2429 
2430   // FP convert to signed integer, round towards +infinity.
2431   void fcvtps(const Register& rd, const VRegister& vn);
2432 
2433   // FP convert to unsigned integer, round towards +infinity.
2434   void fcvtpu(const Register& rd, const VRegister& vn);
2435 
2436   // FP convert to signed integer, round towards +infinity.
2437   void fcvtps(const VRegister& vd, const VRegister& vn);
2438 
2439   // FP convert to unsigned integer, round towards +infinity.
2440   void fcvtpu(const VRegister& vd, const VRegister& vn);
2441 
2442   // Convert signed integer or fixed point to FP.
2443   void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2444 
2445   // Convert unsigned integer or fixed point to FP.
2446   void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2447 
2448   // Convert signed integer or fixed-point to FP.
2449   void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2450 
2451   // Convert unsigned integer or fixed-point to FP.
2452   void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2453 
2454   // Unsigned absolute difference.
2455   void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2456 
2457   // Signed absolute difference.
2458   void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2459 
2460   // Unsigned absolute difference and accumulate.
2461   void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2462 
2463   // Signed absolute difference and accumulate.
2464   void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2465 
2466   // Add.
2467   void add(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2468 
2469   // Subtract.
2470   void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2471 
2472   // Unsigned halving add.
2473   void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2474 
2475   // Signed halving add.
2476   void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2477 
2478   // Unsigned rounding halving add.
2479   void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2480 
2481   // Signed rounding halving add.
2482   void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2483 
2484   // Unsigned halving sub.
2485   void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2486 
2487   // Signed halving sub.
2488   void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2489 
2490   // Unsigned saturating add.
2491   void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2492 
2493   // Signed saturating add.
2494   void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2495 
2496   // Unsigned saturating subtract.
2497   void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2498 
2499   // Signed saturating subtract.
2500   void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2501 
2502   // Add pairwise.
2503   void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2504 
2505   // Add pair of elements scalar.
2506   void addp(const VRegister& vd, const VRegister& vn);
2507 
2508   // Multiply-add to accumulator.
2509   void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2510 
2511   // Multiply-subtract to accumulator.
2512   void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2513 
2514   // Multiply.
2515   void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2516 
2517   // Multiply by scalar element.
2518   void mul(const VRegister& vd,
2519            const VRegister& vn,
2520            const VRegister& vm,
2521            int vm_index);
2522 
2523   // Multiply-add by scalar element.
2524   void mla(const VRegister& vd,
2525            const VRegister& vn,
2526            const VRegister& vm,
2527            int vm_index);
2528 
2529   // Multiply-subtract by scalar element.
2530   void mls(const VRegister& vd,
2531            const VRegister& vn,
2532            const VRegister& vm,
2533            int vm_index);
2534 
2535   // Signed long multiply-add by scalar element.
2536   void smlal(const VRegister& vd,
2537              const VRegister& vn,
2538              const VRegister& vm,
2539              int vm_index);
2540 
2541   // Signed long multiply-add by scalar element (second part).
2542   void smlal2(const VRegister& vd,
2543               const VRegister& vn,
2544               const VRegister& vm,
2545               int vm_index);
2546 
2547   // Unsigned long multiply-add by scalar element.
2548   void umlal(const VRegister& vd,
2549              const VRegister& vn,
2550              const VRegister& vm,
2551              int vm_index);
2552 
2553   // Unsigned long multiply-add by scalar element (second part).
2554   void umlal2(const VRegister& vd,
2555               const VRegister& vn,
2556               const VRegister& vm,
2557               int vm_index);
2558 
2559   // Signed long multiply-sub by scalar element.
2560   void smlsl(const VRegister& vd,
2561              const VRegister& vn,
2562              const VRegister& vm,
2563              int vm_index);
2564 
2565   // Signed long multiply-sub by scalar element (second part).
2566   void smlsl2(const VRegister& vd,
2567               const VRegister& vn,
2568               const VRegister& vm,
2569               int vm_index);
2570 
2571   // Unsigned long multiply-sub by scalar element.
2572   void umlsl(const VRegister& vd,
2573              const VRegister& vn,
2574              const VRegister& vm,
2575              int vm_index);
2576 
2577   // Unsigned long multiply-sub by scalar element (second part).
2578   void umlsl2(const VRegister& vd,
2579               const VRegister& vn,
2580               const VRegister& vm,
2581               int vm_index);
2582 
2583   // Signed long multiply by scalar element.
2584   void smull(const VRegister& vd,
2585              const VRegister& vn,
2586              const VRegister& vm,
2587              int vm_index);
2588 
2589   // Signed long multiply by scalar element (second part).
2590   void smull2(const VRegister& vd,
2591               const VRegister& vn,
2592               const VRegister& vm,
2593               int vm_index);
2594 
2595   // Unsigned long multiply by scalar element.
2596   void umull(const VRegister& vd,
2597              const VRegister& vn,
2598              const VRegister& vm,
2599              int vm_index);
2600 
2601   // Unsigned long multiply by scalar element (second part).
2602   void umull2(const VRegister& vd,
2603               const VRegister& vn,
2604               const VRegister& vm,
2605               int vm_index);
2606 
2607   // Signed saturating double long multiply by element.
2608   void sqdmull(const VRegister& vd,
2609                const VRegister& vn,
2610                const VRegister& vm,
2611                int vm_index);
2612 
2613   // Signed saturating double long multiply by element (second part).
2614   void sqdmull2(const VRegister& vd,
2615                 const VRegister& vn,
2616                 const VRegister& vm,
2617                 int vm_index);
2618 
2619   // Signed saturating doubling long multiply-add by element.
2620   void sqdmlal(const VRegister& vd,
2621                const VRegister& vn,
2622                const VRegister& vm,
2623                int vm_index);
2624 
2625   // Signed saturating doubling long multiply-add by element (second part).
2626   void sqdmlal2(const VRegister& vd,
2627                 const VRegister& vn,
2628                 const VRegister& vm,
2629                 int vm_index);
2630 
2631   // Signed saturating doubling long multiply-sub by element.
2632   void sqdmlsl(const VRegister& vd,
2633                const VRegister& vn,
2634                const VRegister& vm,
2635                int vm_index);
2636 
2637   // Signed saturating doubling long multiply-sub by element (second part).
2638   void sqdmlsl2(const VRegister& vd,
2639                 const VRegister& vn,
2640                 const VRegister& vm,
2641                 int vm_index);
2642 
2643   // Compare equal.
2644   void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2645 
2646   // Compare signed greater than or equal.
2647   void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2648 
2649   // Compare signed greater than.
2650   void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2651 
2652   // Compare unsigned higher.
2653   void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2654 
2655   // Compare unsigned higher or same.
2656   void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2657 
2658   // Compare bitwise test bits nonzero.
2659   void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2660 
2661   // Compare bitwise to zero.
2662   void cmeq(const VRegister& vd, const VRegister& vn, int value);
2663 
2664   // Compare signed greater than or equal to zero.
2665   void cmge(const VRegister& vd, const VRegister& vn, int value);
2666 
2667   // Compare signed greater than zero.
2668   void cmgt(const VRegister& vd, const VRegister& vn, int value);
2669 
2670   // Compare signed less than or equal to zero.
2671   void cmle(const VRegister& vd, const VRegister& vn, int value);
2672 
2673   // Compare signed less than zero.
2674   void cmlt(const VRegister& vd, const VRegister& vn, int value);
2675 
2676   // Signed shift left by register.
2677   void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2678 
2679   // Unsigned shift left by register.
2680   void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2681 
2682   // Signed saturating shift left by register.
2683   void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2684 
2685   // Unsigned saturating shift left by register.
2686   void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2687 
2688   // Signed rounding shift left by register.
2689   void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2690 
2691   // Unsigned rounding shift left by register.
2692   void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2693 
2694   // Signed saturating rounding shift left by register.
2695   void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2696 
2697   // Unsigned saturating rounding shift left by register.
2698   void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2699 
2700   // Bitwise and.
2701   void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2702 
2703   // Bitwise or.
2704   void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2705 
2706   // Bitwise or immediate.
2707   void orr(const VRegister& vd, const int imm8, const int left_shift = 0);
2708 
2709   // Move register to register.
2710   void mov(const VRegister& vd, const VRegister& vn);
2711 
2712   // Bitwise orn.
2713   void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2714 
2715   // Bitwise eor.
2716   void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2717 
2718   // Bit clear immediate.
2719   void bic(const VRegister& vd, const int imm8, const int left_shift = 0);
2720 
2721   // Bit clear.
2722   void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2723 
2724   // Bitwise insert if false.
2725   void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2726 
2727   // Bitwise insert if true.
2728   void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2729 
2730   // Bitwise select.
2731   void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2732 
2733   // Polynomial multiply.
2734   void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2735 
2736   // Vector move immediate.
2737   void movi(const VRegister& vd,
2738             const uint64_t imm,
2739             Shift shift = LSL,
2740             const int shift_amount = 0);
2741 
2742   // Bitwise not.
2743   void mvn(const VRegister& vd, const VRegister& vn);
2744 
2745   // Vector move inverted immediate.
2746   void mvni(const VRegister& vd,
2747             const int imm8,
2748             Shift shift = LSL,
2749             const int shift_amount = 0);
2750 
2751   // Signed saturating accumulate of unsigned value.
2752   void suqadd(const VRegister& vd, const VRegister& vn);
2753 
2754   // Unsigned saturating accumulate of signed value.
2755   void usqadd(const VRegister& vd, const VRegister& vn);
2756 
2757   // Absolute value.
2758   void abs(const VRegister& vd, const VRegister& vn);
2759 
2760   // Signed saturating absolute value.
2761   void sqabs(const VRegister& vd, const VRegister& vn);
2762 
2763   // Negate.
2764   void neg(const VRegister& vd, const VRegister& vn);
2765 
2766   // Signed saturating negate.
2767   void sqneg(const VRegister& vd, const VRegister& vn);
2768 
2769   // Bitwise not.
2770   void not_(const VRegister& vd, const VRegister& vn);
2771 
2772   // Extract narrow.
2773   void xtn(const VRegister& vd, const VRegister& vn);
2774 
2775   // Extract narrow (second part).
2776   void xtn2(const VRegister& vd, const VRegister& vn);
2777 
2778   // Signed saturating extract narrow.
2779   void sqxtn(const VRegister& vd, const VRegister& vn);
2780 
2781   // Signed saturating extract narrow (second part).
2782   void sqxtn2(const VRegister& vd, const VRegister& vn);
2783 
2784   // Unsigned saturating extract narrow.
2785   void uqxtn(const VRegister& vd, const VRegister& vn);
2786 
2787   // Unsigned saturating extract narrow (second part).
2788   void uqxtn2(const VRegister& vd, const VRegister& vn);
2789 
2790   // Signed saturating extract unsigned narrow.
2791   void sqxtun(const VRegister& vd, const VRegister& vn);
2792 
2793   // Signed saturating extract unsigned narrow (second part).
2794   void sqxtun2(const VRegister& vd, const VRegister& vn);
2795 
2796   // Extract vector from pair of vectors.
2797   void ext(const VRegister& vd,
2798            const VRegister& vn,
2799            const VRegister& vm,
2800            int index);
2801 
2802   // Duplicate vector element to vector or scalar.
2803   void dup(const VRegister& vd, const VRegister& vn, int vn_index);
2804 
2805   // Move vector element to scalar.
2806   void mov(const VRegister& vd, const VRegister& vn, int vn_index);
2807 
2808   // Duplicate general-purpose register to vector.
2809   void dup(const VRegister& vd, const Register& rn);
2810 
2811   // Insert vector element from another vector element.
2812   void ins(const VRegister& vd,
2813            int vd_index,
2814            const VRegister& vn,
2815            int vn_index);
2816 
2817   // Move vector element to another vector element.
2818   void mov(const VRegister& vd,
2819            int vd_index,
2820            const VRegister& vn,
2821            int vn_index);
2822 
2823   // Insert vector element from general-purpose register.
2824   void ins(const VRegister& vd, int vd_index, const Register& rn);
2825 
2826   // Move general-purpose register to a vector element.
2827   void mov(const VRegister& vd, int vd_index, const Register& rn);
2828 
2829   // Unsigned move vector element to general-purpose register.
2830   void umov(const Register& rd, const VRegister& vn, int vn_index);
2831 
2832   // Move vector element to general-purpose register.
2833   void mov(const Register& rd, const VRegister& vn, int vn_index);
2834 
2835   // Signed move vector element to general-purpose register.
2836   void smov(const Register& rd, const VRegister& vn, int vn_index);
2837 
2838   // One-element structure load to one register.
2839   void ld1(const VRegister& vt, const MemOperand& src);
2840 
2841   // One-element structure load to two registers.
2842   void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2843 
2844   // One-element structure load to three registers.
2845   void ld1(const VRegister& vt,
2846            const VRegister& vt2,
2847            const VRegister& vt3,
2848            const MemOperand& src);
2849 
2850   // One-element structure load to four registers.
2851   void ld1(const VRegister& vt,
2852            const VRegister& vt2,
2853            const VRegister& vt3,
2854            const VRegister& vt4,
2855            const MemOperand& src);
2856 
2857   // One-element single structure load to one lane.
2858   void ld1(const VRegister& vt, int lane, const MemOperand& src);
2859 
2860   // One-element single structure load to all lanes.
2861   void ld1r(const VRegister& vt, const MemOperand& src);
2862 
2863   // Two-element structure load.
2864   void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2865 
2866   // Two-element single structure load to one lane.
2867   void ld2(const VRegister& vt,
2868            const VRegister& vt2,
2869            int lane,
2870            const MemOperand& src);
2871 
2872   // Two-element single structure load to all lanes.
2873   void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2874 
2875   // Three-element structure load.
2876   void ld3(const VRegister& vt,
2877            const VRegister& vt2,
2878            const VRegister& vt3,
2879            const MemOperand& src);
2880 
2881   // Three-element single structure load to one lane.
2882   void ld3(const VRegister& vt,
2883            const VRegister& vt2,
2884            const VRegister& vt3,
2885            int lane,
2886            const MemOperand& src);
2887 
2888   // Three-element single structure load to all lanes.
2889   void ld3r(const VRegister& vt,
2890             const VRegister& vt2,
2891             const VRegister& vt3,
2892             const MemOperand& src);
2893 
2894   // Four-element structure load.
2895   void ld4(const VRegister& vt,
2896            const VRegister& vt2,
2897            const VRegister& vt3,
2898            const VRegister& vt4,
2899            const MemOperand& src);
2900 
2901   // Four-element single structure load to one lane.
2902   void ld4(const VRegister& vt,
2903            const VRegister& vt2,
2904            const VRegister& vt3,
2905            const VRegister& vt4,
2906            int lane,
2907            const MemOperand& src);
2908 
2909   // Four-element single structure load to all lanes.
2910   void ld4r(const VRegister& vt,
2911             const VRegister& vt2,
2912             const VRegister& vt3,
2913             const VRegister& vt4,
2914             const MemOperand& src);
2915 
2916   // Count leading sign bits.
2917   void cls(const VRegister& vd, const VRegister& vn);
2918 
2919   // Count leading zero bits (vector).
2920   void clz(const VRegister& vd, const VRegister& vn);
2921 
2922   // Population count per byte.
2923   void cnt(const VRegister& vd, const VRegister& vn);
2924 
2925   // Reverse bit order.
2926   void rbit(const VRegister& vd, const VRegister& vn);
2927 
2928   // Reverse elements in 16-bit halfwords.
2929   void rev16(const VRegister& vd, const VRegister& vn);
2930 
2931   // Reverse elements in 32-bit words.
2932   void rev32(const VRegister& vd, const VRegister& vn);
2933 
2934   // Reverse elements in 64-bit doublewords.
2935   void rev64(const VRegister& vd, const VRegister& vn);
2936 
2937   // Unsigned reciprocal square root estimate.
2938   void ursqrte(const VRegister& vd, const VRegister& vn);
2939 
2940   // Unsigned reciprocal estimate.
2941   void urecpe(const VRegister& vd, const VRegister& vn);
2942 
2943   // Signed pairwise long add.
2944   void saddlp(const VRegister& vd, const VRegister& vn);
2945 
2946   // Unsigned pairwise long add.
2947   void uaddlp(const VRegister& vd, const VRegister& vn);
2948 
2949   // Signed pairwise long add and accumulate.
2950   void sadalp(const VRegister& vd, const VRegister& vn);
2951 
2952   // Unsigned pairwise long add and accumulate.
2953   void uadalp(const VRegister& vd, const VRegister& vn);
2954 
2955   // Shift left by immediate.
2956   void shl(const VRegister& vd, const VRegister& vn, int shift);
2957 
2958   // Signed saturating shift left by immediate.
2959   void sqshl(const VRegister& vd, const VRegister& vn, int shift);
2960 
2961   // Signed saturating shift left unsigned by immediate.
2962   void sqshlu(const VRegister& vd, const VRegister& vn, int shift);
2963 
2964   // Unsigned saturating shift left by immediate.
2965   void uqshl(const VRegister& vd, const VRegister& vn, int shift);
2966 
2967   // Signed shift left long by immediate.
2968   void sshll(const VRegister& vd, const VRegister& vn, int shift);
2969 
2970   // Signed shift left long by immediate (second part).
2971   void sshll2(const VRegister& vd, const VRegister& vn, int shift);
2972 
2973   // Signed extend long.
2974   void sxtl(const VRegister& vd, const VRegister& vn);
2975 
2976   // Signed extend long (second part).
2977   void sxtl2(const VRegister& vd, const VRegister& vn);
2978 
2979   // Unsigned shift left long by immediate.
2980   void ushll(const VRegister& vd, const VRegister& vn, int shift);
2981 
2982   // Unsigned shift left long by immediate (second part).
2983   void ushll2(const VRegister& vd, const VRegister& vn, int shift);
2984 
2985   // Shift left long by element size.
2986   void shll(const VRegister& vd, const VRegister& vn, int shift);
2987 
2988   // Shift left long by element size (second part).
2989   void shll2(const VRegister& vd, const VRegister& vn, int shift);
2990 
2991   // Unsigned extend long.
2992   void uxtl(const VRegister& vd, const VRegister& vn);
2993 
2994   // Unsigned extend long (second part).
2995   void uxtl2(const VRegister& vd, const VRegister& vn);
2996 
2997   // Shift left by immediate and insert.
2998   void sli(const VRegister& vd, const VRegister& vn, int shift);
2999 
3000   // Shift right by immediate and insert.
3001   void sri(const VRegister& vd, const VRegister& vn, int shift);
3002 
3003   // Signed maximum.
3004   void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3005 
3006   // Signed pairwise maximum.
3007   void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3008 
3009   // Add across vector.
3010   void addv(const VRegister& vd, const VRegister& vn);
3011 
3012   // Signed add long across vector.
3013   void saddlv(const VRegister& vd, const VRegister& vn);
3014 
3015   // Unsigned add long across vector.
3016   void uaddlv(const VRegister& vd, const VRegister& vn);
3017 
3018   // FP maximum number across vector.
3019   void fmaxnmv(const VRegister& vd, const VRegister& vn);
3020 
3021   // FP maximum across vector.
3022   void fmaxv(const VRegister& vd, const VRegister& vn);
3023 
3024   // FP minimum number across vector.
3025   void fminnmv(const VRegister& vd, const VRegister& vn);
3026 
3027   // FP minimum across vector.
3028   void fminv(const VRegister& vd, const VRegister& vn);
3029 
3030   // Signed maximum across vector.
3031   void smaxv(const VRegister& vd, const VRegister& vn);
3032 
3033   // Signed minimum.
3034   void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3035 
3036   // Signed minimum pairwise.
3037   void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3038 
3039   // Signed minimum across vector.
3040   void sminv(const VRegister& vd, const VRegister& vn);
3041 
3042   // One-element structure store from one register.
3043   void st1(const VRegister& vt, const MemOperand& src);
3044 
3045   // One-element structure store from two registers.
3046   void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3047 
3048   // One-element structure store from three registers.
3049   void st1(const VRegister& vt,
3050            const VRegister& vt2,
3051            const VRegister& vt3,
3052            const MemOperand& src);
3053 
3054   // One-element structure store from four registers.
3055   void st1(const VRegister& vt,
3056            const VRegister& vt2,
3057            const VRegister& vt3,
3058            const VRegister& vt4,
3059            const MemOperand& src);
3060 
3061   // One-element single structure store from one lane.
3062   void st1(const VRegister& vt, int lane, const MemOperand& src);
3063 
3064   // Two-element structure store from two registers.
3065   void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3066 
3067   // Two-element single structure store from two lanes.
3068   void st2(const VRegister& vt,
3069            const VRegister& vt2,
3070            int lane,
3071            const MemOperand& src);
3072 
3073   // Three-element structure store from three registers.
3074   void st3(const VRegister& vt,
3075            const VRegister& vt2,
3076            const VRegister& vt3,
3077            const MemOperand& src);
3078 
3079   // Three-element single structure store from three lanes.
3080   void st3(const VRegister& vt,
3081            const VRegister& vt2,
3082            const VRegister& vt3,
3083            int lane,
3084            const MemOperand& src);
3085 
3086   // Four-element structure store from four registers.
3087   void st4(const VRegister& vt,
3088            const VRegister& vt2,
3089            const VRegister& vt3,
3090            const VRegister& vt4,
3091            const MemOperand& src);
3092 
3093   // Four-element single structure store from four lanes.
3094   void st4(const VRegister& vt,
3095            const VRegister& vt2,
3096            const VRegister& vt3,
3097            const VRegister& vt4,
3098            int lane,
3099            const MemOperand& src);
3100 
3101   // Unsigned add long.
3102   void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3103 
3104   // Unsigned add long (second part).
3105   void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3106 
3107   // Unsigned add wide.
3108   void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3109 
3110   // Unsigned add wide (second part).
3111   void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3112 
3113   // Signed add long.
3114   void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3115 
3116   // Signed add long (second part).
3117   void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3118 
3119   // Signed add wide.
3120   void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3121 
3122   // Signed add wide (second part).
3123   void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3124 
3125   // Unsigned subtract long.
3126   void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3127 
3128   // Unsigned subtract long (second part).
3129   void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3130 
3131   // Unsigned subtract wide.
3132   void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3133 
3134   // Unsigned subtract wide (second part).
3135   void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3136 
3137   // Signed subtract long.
3138   void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3139 
3140   // Signed subtract long (second part).
3141   void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3142 
3143   // Signed integer subtract wide.
3144   void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3145 
3146   // Signed integer subtract wide (second part).
3147   void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3148 
3149   // Unsigned maximum.
3150   void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3151 
3152   // Unsigned pairwise maximum.
3153   void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3154 
3155   // Unsigned maximum across vector.
3156   void umaxv(const VRegister& vd, const VRegister& vn);
3157 
3158   // Unsigned minimum.
3159   void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3160 
3161   // Unsigned pairwise minimum.
3162   void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3163 
3164   // Unsigned minimum across vector.
3165   void uminv(const VRegister& vd, const VRegister& vn);
3166 
3167   // Transpose vectors (primary).
3168   void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3169 
3170   // Transpose vectors (secondary).
3171   void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3172 
3173   // Unzip vectors (primary).
3174   void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3175 
3176   // Unzip vectors (secondary).
3177   void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3178 
3179   // Zip vectors (primary).
3180   void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3181 
3182   // Zip vectors (secondary).
3183   void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3184 
3185   // Signed shift right by immediate.
3186   void sshr(const VRegister& vd, const VRegister& vn, int shift);
3187 
3188   // Unsigned shift right by immediate.
3189   void ushr(const VRegister& vd, const VRegister& vn, int shift);
3190 
3191   // Signed rounding shift right by immediate.
3192   void srshr(const VRegister& vd, const VRegister& vn, int shift);
3193 
3194   // Unsigned rounding shift right by immediate.
3195   void urshr(const VRegister& vd, const VRegister& vn, int shift);
3196 
3197   // Signed shift right by immediate and accumulate.
3198   void ssra(const VRegister& vd, const VRegister& vn, int shift);
3199 
3200   // Unsigned shift right by immediate and accumulate.
3201   void usra(const VRegister& vd, const VRegister& vn, int shift);
3202 
3203   // Signed rounding shift right by immediate and accumulate.
3204   void srsra(const VRegister& vd, const VRegister& vn, int shift);
3205 
3206   // Unsigned rounding shift right by immediate and accumulate.
3207   void ursra(const VRegister& vd, const VRegister& vn, int shift);
3208 
3209   // Shift right narrow by immediate.
3210   void shrn(const VRegister& vd, const VRegister& vn, int shift);
3211 
3212   // Shift right narrow by immediate (second part).
3213   void shrn2(const VRegister& vd, const VRegister& vn, int shift);
3214 
3215   // Rounding shift right narrow by immediate.
3216   void rshrn(const VRegister& vd, const VRegister& vn, int shift);
3217 
3218   // Rounding shift right narrow by immediate (second part).
3219   void rshrn2(const VRegister& vd, const VRegister& vn, int shift);
3220 
3221   // Unsigned saturating shift right narrow by immediate.
3222   void uqshrn(const VRegister& vd, const VRegister& vn, int shift);
3223 
3224   // Unsigned saturating shift right narrow by immediate (second part).
3225   void uqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3226 
3227   // Unsigned saturating rounding shift right narrow by immediate.
3228   void uqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3229 
3230   // Unsigned saturating rounding shift right narrow by immediate (second part).
3231   void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3232 
3233   // Signed saturating shift right narrow by immediate.
3234   void sqshrn(const VRegister& vd, const VRegister& vn, int shift);
3235 
3236   // Signed saturating shift right narrow by immediate (second part).
3237   void sqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3238 
3239   // Signed saturating rounded shift right narrow by immediate.
3240   void sqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3241 
3242   // Signed saturating rounded shift right narrow by immediate (second part).
3243   void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3244 
3245   // Signed saturating shift right unsigned narrow by immediate.
3246   void sqshrun(const VRegister& vd, const VRegister& vn, int shift);
3247 
3248   // Signed saturating shift right unsigned narrow by immediate (second part).
3249   void sqshrun2(const VRegister& vd, const VRegister& vn, int shift);
3250 
3251   // Signed sat rounded shift right unsigned narrow by immediate.
3252   void sqrshrun(const VRegister& vd, const VRegister& vn, int shift);
3253 
3254   // Signed sat rounded shift right unsigned narrow by immediate (second part).
3255   void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift);
3256 
3257   // FP reciprocal step.
3258   void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3259 
3260   // FP reciprocal estimate.
3261   void frecpe(const VRegister& vd, const VRegister& vn);
3262 
3263   // FP reciprocal square root estimate.
3264   void frsqrte(const VRegister& vd, const VRegister& vn);
3265 
3266   // FP reciprocal square root step.
3267   void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3268 
3269   // Signed absolute difference and accumulate long.
3270   void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3271 
3272   // Signed absolute difference and accumulate long (second part).
3273   void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3274 
3275   // Unsigned absolute difference and accumulate long.
3276   void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3277 
3278   // Unsigned absolute difference and accumulate long (second part).
3279   void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3280 
3281   // Signed absolute difference long.
3282   void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3283 
3284   // Signed absolute difference long (second part).
3285   void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3286 
3287   // Unsigned absolute difference long.
3288   void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3289 
3290   // Unsigned absolute difference long (second part).
3291   void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3292 
3293   // Polynomial multiply long.
3294   void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3295 
3296   // Polynomial multiply long (second part).
3297   void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3298 
3299   // Signed long multiply-add.
3300   void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3301 
3302   // Signed long multiply-add (second part).
3303   void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3304 
3305   // Unsigned long multiply-add.
3306   void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3307 
3308   // Unsigned long multiply-add (second part).
3309   void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3310 
3311   // Signed long multiply-sub.
3312   void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3313 
3314   // Signed long multiply-sub (second part).
3315   void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3316 
3317   // Unsigned long multiply-sub.
3318   void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3319 
3320   // Unsigned long multiply-sub (second part).
3321   void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3322 
3323   // Signed long multiply.
3324   void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3325 
3326   // Signed long multiply (second part).
3327   void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3328 
3329   // Signed saturating doubling long multiply-add.
3330   void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3331 
3332   // Signed saturating doubling long multiply-add (second part).
3333   void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3334 
3335   // Signed saturating doubling long multiply-subtract.
3336   void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3337 
3338   // Signed saturating doubling long multiply-subtract (second part).
3339   void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3340 
3341   // Signed saturating doubling long multiply.
3342   void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3343 
3344   // Signed saturating doubling long multiply (second part).
3345   void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3346 
3347   // Signed saturating doubling multiply returning high half.
3348   void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3349 
3350   // Signed saturating rounding doubling multiply returning high half.
3351   void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3352 
3353   // Signed dot product [Armv8.2].
3354   void sdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3355 
3356   // Signed saturating rounding doubling multiply accumulate returning high
3357   // half [Armv8.1].
3358   void sqrdmlah(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3359 
3360   // Unsigned dot product [Armv8.2].
3361   void udot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3362 
3363   // Signed saturating rounding doubling multiply subtract returning high half
3364   // [Armv8.1].
3365   void sqrdmlsh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3366 
3367   // Signed saturating doubling multiply element returning high half.
3368   void sqdmulh(const VRegister& vd,
3369                const VRegister& vn,
3370                const VRegister& vm,
3371                int vm_index);
3372 
3373   // Signed saturating rounding doubling multiply element returning high half.
3374   void sqrdmulh(const VRegister& vd,
3375                 const VRegister& vn,
3376                 const VRegister& vm,
3377                 int vm_index);
3378 
3379   // Signed dot product by element [Armv8.2].
3380   void sdot(const VRegister& vd,
3381             const VRegister& vn,
3382             const VRegister& vm,
3383             int vm_index);
3384 
3385   // Signed saturating rounding doubling multiply accumulate element returning
3386   // high half [Armv8.1].
3387   void sqrdmlah(const VRegister& vd,
3388                 const VRegister& vn,
3389                 const VRegister& vm,
3390                 int vm_index);
3391 
3392   // Unsigned dot product by element [Armv8.2].
3393   void udot(const VRegister& vd,
3394             const VRegister& vn,
3395             const VRegister& vm,
3396             int vm_index);
3397 
3398   // Signed saturating rounding doubling multiply subtract element returning
3399   // high half [Armv8.1].
3400   void sqrdmlsh(const VRegister& vd,
3401                 const VRegister& vn,
3402                 const VRegister& vm,
3403                 int vm_index);
3404 
3405   // Unsigned long multiply long.
3406   void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3407 
3408   // Unsigned long multiply (second part).
3409   void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3410 
3411   // Add narrow returning high half.
3412   void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3413 
3414   // Add narrow returning high half (second part).
3415   void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3416 
3417   // Rounding add narrow returning high half.
3418   void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3419 
3420   // Rounding add narrow returning high half (second part).
3421   void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3422 
3423   // Subtract narrow returning high half.
3424   void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3425 
3426   // Subtract narrow returning high half (second part).
3427   void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3428 
3429   // Rounding subtract narrow returning high half.
3430   void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3431 
3432   // Rounding subtract narrow returning high half (second part).
3433   void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3434 
3435   // FP vector multiply accumulate.
3436   void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3437 
3438   // FP fused multiply-add long to accumulator.
3439   void fmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3440 
3441   // FP fused multiply-add long to accumulator (second part).
3442   void fmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3443 
3444   // FP fused multiply-add long to accumulator by element.
3445   void fmlal(const VRegister& vd,
3446              const VRegister& vn,
3447              const VRegister& vm,
3448              int vm_index);
3449 
3450   // FP fused multiply-add long to accumulator by element (second part).
3451   void fmlal2(const VRegister& vd,
3452               const VRegister& vn,
3453               const VRegister& vm,
3454               int vm_index);
3455 
3456   // FP vector multiply subtract.
3457   void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3458 
3459   // FP fused multiply-subtract long to accumulator.
3460   void fmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3461 
3462   // FP fused multiply-subtract long to accumulator (second part).
3463   void fmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3464 
3465   // FP fused multiply-subtract long to accumulator by element.
3466   void fmlsl(const VRegister& vd,
3467              const VRegister& vn,
3468              const VRegister& vm,
3469              int vm_index);
3470 
3471   // FP fused multiply-subtract long to accumulator by element (second part).
3472   void fmlsl2(const VRegister& vd,
3473               const VRegister& vn,
3474               const VRegister& vm,
3475               int vm_index);
3476 
3477   // FP vector multiply extended.
3478   void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3479 
3480   // FP absolute greater than or equal.
3481   void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3482 
3483   // FP absolute greater than.
3484   void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3485 
3486   // FP multiply by element.
3487   void fmul(const VRegister& vd,
3488             const VRegister& vn,
3489             const VRegister& vm,
3490             int vm_index);
3491 
3492   // FP fused multiply-add to accumulator by element.
3493   void fmla(const VRegister& vd,
3494             const VRegister& vn,
3495             const VRegister& vm,
3496             int vm_index);
3497 
3498   // FP fused multiply-sub from accumulator by element.
3499   void fmls(const VRegister& vd,
3500             const VRegister& vn,
3501             const VRegister& vm,
3502             int vm_index);
3503 
3504   // FP multiply extended by element.
3505   void fmulx(const VRegister& vd,
3506              const VRegister& vn,
3507              const VRegister& vm,
3508              int vm_index);
3509 
3510   // FP compare equal.
3511   void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3512 
3513   // FP greater than.
3514   void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3515 
3516   // FP greater than or equal.
3517   void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3518 
3519   // FP compare equal to zero.
3520   void fcmeq(const VRegister& vd, const VRegister& vn, double imm);
3521 
3522   // FP greater than zero.
3523   void fcmgt(const VRegister& vd, const VRegister& vn, double imm);
3524 
3525   // FP greater than or equal to zero.
3526   void fcmge(const VRegister& vd, const VRegister& vn, double imm);
3527 
3528   // FP less than or equal to zero.
3529   void fcmle(const VRegister& vd, const VRegister& vn, double imm);
3530 
3531   // FP less than to zero.
3532   void fcmlt(const VRegister& vd, const VRegister& vn, double imm);
3533 
3534   // FP absolute difference.
3535   void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3536 
3537   // FP pairwise add vector.
3538   void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3539 
3540   // FP pairwise add scalar.
3541   void faddp(const VRegister& vd, const VRegister& vn);
3542 
3543   // FP pairwise maximum vector.
3544   void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3545 
3546   // FP pairwise maximum scalar.
3547   void fmaxp(const VRegister& vd, const VRegister& vn);
3548 
3549   // FP pairwise minimum vector.
3550   void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3551 
3552   // FP pairwise minimum scalar.
3553   void fminp(const VRegister& vd, const VRegister& vn);
3554 
3555   // FP pairwise maximum number vector.
3556   void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3557 
3558   // FP pairwise maximum number scalar.
3559   void fmaxnmp(const VRegister& vd, const VRegister& vn);
3560 
3561   // FP pairwise minimum number vector.
3562   void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3563 
3564   // FP pairwise minimum number scalar.
3565   void fminnmp(const VRegister& vd, const VRegister& vn);
3566 
3567   // v8.3 complex numbers - note that these are only partial/helper functions
3568   // and must be used in series in order to perform full CN operations.
3569 
3570   // FP complex multiply accumulate (by element) [Armv8.3].
3571   void fcmla(const VRegister& vd,
3572              const VRegister& vn,
3573              const VRegister& vm,
3574              int vm_index,
3575              int rot);
3576 
3577   // FP complex multiply accumulate [Armv8.3].
3578   void fcmla(const VRegister& vd,
3579              const VRegister& vn,
3580              const VRegister& vm,
3581              int rot);
3582 
3583   // FP complex add [Armv8.3].
3584   void fcadd(const VRegister& vd,
3585              const VRegister& vn,
3586              const VRegister& vm,
3587              int rot);
3588 
3589   // Scalable Vector Extensions.
3590 
3591   // Absolute value (predicated).
3592   void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3593 
3594   // Add vectors (predicated).
3595   void add(const ZRegister& zd,
3596            const PRegisterM& pg,
3597            const ZRegister& zn,
3598            const ZRegister& zm);
3599 
3600   // Add vectors (unpredicated).
3601   void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3602 
3603   // Add immediate (unpredicated).
3604   void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
3605 
3606   // Add multiple of predicate register size to scalar register.
3607   void addpl(const Register& xd, const Register& xn, int imm6);
3608 
3609   // Add multiple of vector register size to scalar register.
3610   void addvl(const Register& xd, const Register& xn, int imm6);
3611 
3612   // Compute vector address.
3613   void adr(const ZRegister& zd, const SVEMemOperand& addr);
3614 
3615   // Bitwise AND predicates.
3616   void and_(const PRegisterWithLaneSize& pd,
3617             const PRegisterZ& pg,
3618             const PRegisterWithLaneSize& pn,
3619             const PRegisterWithLaneSize& pm);
3620 
3621   // Bitwise AND vectors (predicated).
3622   void and_(const ZRegister& zd,
3623             const PRegisterM& pg,
3624             const ZRegister& zn,
3625             const ZRegister& zm);
3626 
3627   // Bitwise AND with immediate (unpredicated).
3628   void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3629 
3630   // Bitwise AND vectors (unpredicated).
3631   void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3632 
3633   // Bitwise AND predicates.
3634   void ands(const PRegisterWithLaneSize& pd,
3635             const PRegisterZ& pg,
3636             const PRegisterWithLaneSize& pn,
3637             const PRegisterWithLaneSize& pm);
3638 
3639   // Bitwise AND reduction to scalar.
3640   void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
3641 
3642   // Arithmetic shift right by immediate (predicated).
3643   void asr(const ZRegister& zd,
3644            const PRegisterM& pg,
3645            const ZRegister& zn,
3646            int shift);
3647 
3648   // Arithmetic shift right by 64-bit wide elements (predicated).
3649   void asr(const ZRegister& zd,
3650            const PRegisterM& pg,
3651            const ZRegister& zn,
3652            const ZRegister& zm);
3653 
3654   // Arithmetic shift right by immediate (unpredicated).
3655   void asr(const ZRegister& zd, const ZRegister& zn, int shift);
3656 
3657   // Arithmetic shift right by 64-bit wide elements (unpredicated).
3658   void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3659 
3660   // Arithmetic shift right for divide by immediate (predicated).
3661   void asrd(const ZRegister& zd,
3662             const PRegisterM& pg,
3663             const ZRegister& zn,
3664             int shift);
3665 
3666   // Reversed arithmetic shift right by vector (predicated).
3667   void asrr(const ZRegister& zd,
3668             const PRegisterM& pg,
3669             const ZRegister& zn,
3670             const ZRegister& zm);
3671 
3672   // Bitwise clear predicates.
3673   void bic(const PRegisterWithLaneSize& pd,
3674            const PRegisterZ& pg,
3675            const PRegisterWithLaneSize& pn,
3676            const PRegisterWithLaneSize& pm);
3677 
3678   // Bitwise clear vectors (predicated).
3679   void bic(const ZRegister& zd,
3680            const PRegisterM& pg,
3681            const ZRegister& zn,
3682            const ZRegister& zm);
3683 
3684   // Bitwise clear bits using immediate (unpredicated).
3685   void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3686 
3687   // Bitwise clear vectors (unpredicated).
3688   void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3689 
3690   // Bitwise clear predicates.
3691   void bics(const PRegisterWithLaneSize& pd,
3692             const PRegisterZ& pg,
3693             const PRegisterWithLaneSize& pn,
3694             const PRegisterWithLaneSize& pm);
3695 
3696   // Break after first true condition.
3697   void brka(const PRegisterWithLaneSize& pd,
3698             const PRegister& pg,
3699             const PRegisterWithLaneSize& pn);
3700 
3701   // Break after first true condition.
3702   void brkas(const PRegisterWithLaneSize& pd,
3703              const PRegisterZ& pg,
3704              const PRegisterWithLaneSize& pn);
3705 
3706   // Break before first true condition.
3707   void brkb(const PRegisterWithLaneSize& pd,
3708             const PRegister& pg,
3709             const PRegisterWithLaneSize& pn);
3710 
3711   // Break before first true condition.
3712   void brkbs(const PRegisterWithLaneSize& pd,
3713              const PRegisterZ& pg,
3714              const PRegisterWithLaneSize& pn);
3715 
3716   // Propagate break to next partition.
3717   void brkn(const PRegisterWithLaneSize& pd,
3718             const PRegisterZ& pg,
3719             const PRegisterWithLaneSize& pn,
3720             const PRegisterWithLaneSize& pm);
3721 
3722   // Propagate break to next partition.
3723   void brkns(const PRegisterWithLaneSize& pd,
3724              const PRegisterZ& pg,
3725              const PRegisterWithLaneSize& pn,
3726              const PRegisterWithLaneSize& pm);
3727 
3728   // Break after first true condition, propagating from previous partition.
3729   void brkpa(const PRegisterWithLaneSize& pd,
3730              const PRegisterZ& pg,
3731              const PRegisterWithLaneSize& pn,
3732              const PRegisterWithLaneSize& pm);
3733 
3734   // Break after first true condition, propagating from previous partition.
3735   void brkpas(const PRegisterWithLaneSize& pd,
3736               const PRegisterZ& pg,
3737               const PRegisterWithLaneSize& pn,
3738               const PRegisterWithLaneSize& pm);
3739 
3740   // Break before first true condition, propagating from previous partition.
3741   void brkpb(const PRegisterWithLaneSize& pd,
3742              const PRegisterZ& pg,
3743              const PRegisterWithLaneSize& pn,
3744              const PRegisterWithLaneSize& pm);
3745 
3746   // Break before first true condition, propagating from previous partition.
3747   void brkpbs(const PRegisterWithLaneSize& pd,
3748               const PRegisterZ& pg,
3749               const PRegisterWithLaneSize& pn,
3750               const PRegisterWithLaneSize& pm);
3751 
3752   // Conditionally extract element after last to general-purpose register.
3753   void clasta(const Register& rd,
3754               const PRegister& pg,
3755               const Register& rn,
3756               const ZRegister& zm);
3757 
3758   // Conditionally extract element after last to SIMD&FP scalar register.
3759   void clasta(const VRegister& vd,
3760               const PRegister& pg,
3761               const VRegister& vn,
3762               const ZRegister& zm);
3763 
3764   // Conditionally extract element after last to vector register.
3765   void clasta(const ZRegister& zd,
3766               const PRegister& pg,
3767               const ZRegister& zn,
3768               const ZRegister& zm);
3769 
3770   // Conditionally extract last element to general-purpose register.
3771   void clastb(const Register& rd,
3772               const PRegister& pg,
3773               const Register& rn,
3774               const ZRegister& zm);
3775 
3776   // Conditionally extract last element to SIMD&FP scalar register.
3777   void clastb(const VRegister& vd,
3778               const PRegister& pg,
3779               const VRegister& vn,
3780               const ZRegister& zm);
3781 
3782   // Conditionally extract last element to vector register.
3783   void clastb(const ZRegister& zd,
3784               const PRegister& pg,
3785               const ZRegister& zn,
3786               const ZRegister& zm);
3787 
3788   // Count leading sign bits (predicated).
3789   void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3790 
3791   // Count leading zero bits (predicated).
3792   void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3793 
3794   void cmp(Condition cond,
3795            const PRegisterWithLaneSize& pd,
3796            const PRegisterZ& pg,
3797            const ZRegister& zn,
3798            const ZRegister& zm);
3799 
3800   // Compare vector to 64-bit wide elements.
3801   void cmpeq(const PRegisterWithLaneSize& pd,
3802              const PRegisterZ& pg,
3803              const ZRegister& zn,
3804              const ZRegister& zm);
3805 
3806   // Compare vector to immediate.
3807   void cmpeq(const PRegisterWithLaneSize& pd,
3808              const PRegisterZ& pg,
3809              const ZRegister& zn,
3810              int imm5);
3811 
3812   // Compare vector to 64-bit wide elements.
3813   void cmpge(const PRegisterWithLaneSize& pd,
3814              const PRegisterZ& pg,
3815              const ZRegister& zn,
3816              const ZRegister& zm);
3817 
3818   // Compare vector to immediate.
3819   void cmpge(const PRegisterWithLaneSize& pd,
3820              const PRegisterZ& pg,
3821              const ZRegister& zn,
3822              int imm5);
3823 
3824   // Compare vector to 64-bit wide elements.
3825   void cmpgt(const PRegisterWithLaneSize& pd,
3826              const PRegisterZ& pg,
3827              const ZRegister& zn,
3828              const ZRegister& zm);
3829 
3830   // Compare vector to immediate.
3831   void cmpgt(const PRegisterWithLaneSize& pd,
3832              const PRegisterZ& pg,
3833              const ZRegister& zn,
3834              int imm5);
3835 
3836   // Compare vector to 64-bit wide elements.
3837   void cmphi(const PRegisterWithLaneSize& pd,
3838              const PRegisterZ& pg,
3839              const ZRegister& zn,
3840              const ZRegister& zm);
3841 
3842   // Compare vector to immediate.
3843   void cmphi(const PRegisterWithLaneSize& pd,
3844              const PRegisterZ& pg,
3845              const ZRegister& zn,
3846              unsigned imm7);
3847 
3848   // Compare vector to 64-bit wide elements.
3849   void cmphs(const PRegisterWithLaneSize& pd,
3850              const PRegisterZ& pg,
3851              const ZRegister& zn,
3852              const ZRegister& zm);
3853 
3854   // Compare vector to immediate.
3855   void cmphs(const PRegisterWithLaneSize& pd,
3856              const PRegisterZ& pg,
3857              const ZRegister& zn,
3858              unsigned imm7);
3859 
3860   // Compare vector to 64-bit wide elements.
3861   void cmple(const PRegisterWithLaneSize& pd,
3862              const PRegisterZ& pg,
3863              const ZRegister& zn,
3864              const ZRegister& zm);
3865 
3866   // Compare vector to immediate.
3867   void cmple(const PRegisterWithLaneSize& pd,
3868              const PRegisterZ& pg,
3869              const ZRegister& zn,
3870              int imm5);
3871 
3872   // Compare vector to 64-bit wide elements.
3873   void cmplo(const PRegisterWithLaneSize& pd,
3874              const PRegisterZ& pg,
3875              const ZRegister& zn,
3876              const ZRegister& zm);
3877 
3878   // Compare vector to immediate.
3879   void cmplo(const PRegisterWithLaneSize& pd,
3880              const PRegisterZ& pg,
3881              const ZRegister& zn,
3882              unsigned imm7);
3883 
3884   // Compare vector to 64-bit wide elements.
3885   void cmpls(const PRegisterWithLaneSize& pd,
3886              const PRegisterZ& pg,
3887              const ZRegister& zn,
3888              const ZRegister& zm);
3889 
3890   // Compare vector to immediate.
3891   void cmpls(const PRegisterWithLaneSize& pd,
3892              const PRegisterZ& pg,
3893              const ZRegister& zn,
3894              unsigned imm7);
3895 
3896   // Compare vector to 64-bit wide elements.
3897   void cmplt(const PRegisterWithLaneSize& pd,
3898              const PRegisterZ& pg,
3899              const ZRegister& zn,
3900              const ZRegister& zm);
3901 
3902   // Compare vector to immediate.
3903   void cmplt(const PRegisterWithLaneSize& pd,
3904              const PRegisterZ& pg,
3905              const ZRegister& zn,
3906              int imm5);
3907 
3908   // Compare vector to 64-bit wide elements.
3909   void cmpne(const PRegisterWithLaneSize& pd,
3910              const PRegisterZ& pg,
3911              const ZRegister& zn,
3912              const ZRegister& zm);
3913 
3914   // Compare vector to immediate.
3915   void cmpne(const PRegisterWithLaneSize& pd,
3916              const PRegisterZ& pg,
3917              const ZRegister& zn,
3918              int imm5);
3919 
3920   // Logically invert boolean condition in vector (predicated).
3921   void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3922 
3923   // Count non-zero bits (predicated).
3924   void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3925 
3926   // Set scalar to multiple of predicate constraint element count.
3927   void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3928 
3929   // Set scalar to multiple of predicate constraint element count.
3930   void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3931 
3932   // Set scalar to multiple of predicate constraint element count.
3933   void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3934 
3935   // Set scalar to active predicate element count.
3936   void cntp(const Register& xd,
3937             const PRegister& pg,
3938             const PRegisterWithLaneSize& pn);
3939 
3940   // Set scalar to multiple of predicate constraint element count.
3941   void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3942 
3943   // Shuffle active elements of vector to the right and fill with zero.
3944   void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
3945 
3946   // Copy signed integer immediate to vector elements (predicated).
3947   void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
3948 
3949   // Copy general-purpose register to vector elements (predicated).
3950   void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
3951 
3952   // Copy SIMD&FP scalar register to vector elements (predicated).
3953   void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
3954 
3955   // Compare and terminate loop.
3956   void ctermeq(const Register& rn, const Register& rm);
3957 
3958   // Compare and terminate loop.
3959   void ctermne(const Register& rn, const Register& rm);
3960 
3961   // Decrement scalar by multiple of predicate constraint element count.
3962   void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
3963 
3964   // Decrement scalar by multiple of predicate constraint element count.
3965   void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
3966 
3967   // Decrement vector by multiple of predicate constraint element count.
3968   void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
3969 
3970   // Decrement scalar by multiple of predicate constraint element count.
3971   void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
3972 
3973   // Decrement vector by multiple of predicate constraint element count.
3974   void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
3975 
3976   // Decrement scalar by active predicate element count.
3977   void decp(const Register& rdn, const PRegisterWithLaneSize& pg);
3978 
3979   // Decrement vector by active predicate element count.
3980   void decp(const ZRegister& zdn, const PRegister& pg);
3981 
3982   // Decrement scalar by multiple of predicate constraint element count.
3983   void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
3984 
3985   // Decrement vector by multiple of predicate constraint element count.
3986   void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
3987 
3988   // Broadcast general-purpose register to vector elements (unpredicated).
3989   void dup(const ZRegister& zd, const Register& xn);
3990 
3991   // Broadcast indexed element to vector (unpredicated).
3992   void dup(const ZRegister& zd, const ZRegister& zn, unsigned index);
3993 
3994   // As for movz/movk/movn, if the default shift of -1 is specified to dup, the
3995   // assembler will pick an appropriate immediate and left shift that is
3996   // equivalent to the immediate argument. If an explicit left shift is
3997   // specified (0 or 8), the immediate must be a signed 8-bit integer.
3998 
3999   // Broadcast signed immediate to vector elements (unpredicated).
4000   void dup(const ZRegister& zd, int imm8, int shift = -1);
4001 
4002   // Broadcast logical bitmask immediate to vector (unpredicated).
4003   void dupm(const ZRegister& zd, uint64_t imm);
4004 
4005   // Bitwise exclusive OR with inverted immediate (unpredicated).
4006   void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4007 
4008   // Bitwise exclusive OR predicates.
4009   void eor(const PRegisterWithLaneSize& pd,
4010            const PRegisterZ& pg,
4011            const PRegisterWithLaneSize& pn,
4012            const PRegisterWithLaneSize& pm);
4013 
4014   // Bitwise exclusive OR vectors (predicated).
4015   void eor(const ZRegister& zd,
4016            const PRegisterM& pg,
4017            const ZRegister& zn,
4018            const ZRegister& zm);
4019 
4020   // Bitwise exclusive OR with immediate (unpredicated).
4021   void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4022 
4023   // Bitwise exclusive OR vectors (unpredicated).
4024   void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4025 
4026   // Bitwise exclusive OR predicates.
4027   void eors(const PRegisterWithLaneSize& pd,
4028             const PRegisterZ& pg,
4029             const PRegisterWithLaneSize& pn,
4030             const PRegisterWithLaneSize& pm);
4031 
4032   // Bitwise XOR reduction to scalar.
4033   void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4034 
4035   // Extract vector from pair of vectors.
4036   void ext(const ZRegister& zd,
4037            const ZRegister& zn,
4038            const ZRegister& zm,
4039            unsigned offset);
4040 
4041   // Floating-point absolute difference (predicated).
4042   void fabd(const ZRegister& zd,
4043             const PRegisterM& pg,
4044             const ZRegister& zn,
4045             const ZRegister& zm);
4046 
4047   // Floating-point absolute value (predicated).
4048   void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4049 
4050   // Floating-point absolute compare vectors.
4051   void facge(const PRegisterWithLaneSize& pd,
4052              const PRegisterZ& pg,
4053              const ZRegister& zn,
4054              const ZRegister& zm);
4055 
4056   // Floating-point absolute compare vectors.
4057   void facgt(const PRegisterWithLaneSize& pd,
4058              const PRegisterZ& pg,
4059              const ZRegister& zn,
4060              const ZRegister& zm);
4061 
4062   // Floating-point add immediate (predicated).
4063   void fadd(const ZRegister& zd,
4064             const PRegisterM& pg,
4065             const ZRegister& zn,
4066             double imm);
4067 
4068   // Floating-point add vector (predicated).
4069   void fadd(const ZRegister& zd,
4070             const PRegisterM& pg,
4071             const ZRegister& zn,
4072             const ZRegister& zm);
4073 
4074   // Floating-point add vector (unpredicated).
4075   void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4076 
4077   // Floating-point add strictly-ordered reduction, accumulating in scalar.
4078   void fadda(const VRegister& vd,
4079              const PRegister& pg,
4080              const VRegister& vn,
4081              const ZRegister& zm);
4082 
4083   // Floating-point add recursive reduction to scalar.
4084   void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4085 
4086   // Floating-point complex add with rotate (predicated).
4087   void fcadd(const ZRegister& zd,
4088              const PRegisterM& pg,
4089              const ZRegister& zn,
4090              const ZRegister& zm,
4091              int rot);
4092 
4093   // Floating-point compare vector with zero.
4094   void fcmeq(const PRegisterWithLaneSize& pd,
4095              const PRegisterZ& pg,
4096              const ZRegister& zn,
4097              double zero);
4098 
4099   // Floating-point compare vectors.
4100   void fcmeq(const PRegisterWithLaneSize& pd,
4101              const PRegisterZ& pg,
4102              const ZRegister& zn,
4103              const ZRegister& zm);
4104 
4105   // Floating-point compare vector with zero.
4106   void fcmge(const PRegisterWithLaneSize& pd,
4107              const PRegisterZ& pg,
4108              const ZRegister& zn,
4109              double zero);
4110 
4111   // Floating-point compare vectors.
4112   void fcmge(const PRegisterWithLaneSize& pd,
4113              const PRegisterZ& pg,
4114              const ZRegister& zn,
4115              const ZRegister& zm);
4116 
4117   // Floating-point compare vector with zero.
4118   void fcmgt(const PRegisterWithLaneSize& pd,
4119              const PRegisterZ& pg,
4120              const ZRegister& zn,
4121              double zero);
4122 
4123   // Floating-point compare vectors.
4124   void fcmgt(const PRegisterWithLaneSize& pd,
4125              const PRegisterZ& pg,
4126              const ZRegister& zn,
4127              const ZRegister& zm);
4128 
4129   // Floating-point complex multiply-add with rotate (predicated).
4130   void fcmla(const ZRegister& zda,
4131              const PRegisterM& pg,
4132              const ZRegister& zn,
4133              const ZRegister& zm,
4134              int rot);
4135 
4136   // Floating-point complex multiply-add by indexed values with rotate.
4137   void fcmla(const ZRegister& zda,
4138              const ZRegister& zn,
4139              const ZRegister& zm,
4140              int index,
4141              int rot);
4142 
4143   // Floating-point compare vector with zero.
4144   void fcmle(const PRegisterWithLaneSize& pd,
4145              const PRegisterZ& pg,
4146              const ZRegister& zn,
4147              double zero);
4148 
4149   // Floating-point compare vector with zero.
4150   void fcmlt(const PRegisterWithLaneSize& pd,
4151              const PRegisterZ& pg,
4152              const ZRegister& zn,
4153              double zero);
4154 
4155   // Floating-point compare vector with zero.
4156   void fcmne(const PRegisterWithLaneSize& pd,
4157              const PRegisterZ& pg,
4158              const ZRegister& zn,
4159              double zero);
4160 
4161   // Floating-point compare vectors.
4162   void fcmne(const PRegisterWithLaneSize& pd,
4163              const PRegisterZ& pg,
4164              const ZRegister& zn,
4165              const ZRegister& zm);
4166 
4167   // Floating-point compare vectors.
4168   void fcmuo(const PRegisterWithLaneSize& pd,
4169              const PRegisterZ& pg,
4170              const ZRegister& zn,
4171              const ZRegister& zm);
4172 
4173   // Copy floating-point immediate to vector elements (predicated).
4174   void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm);
4175 
4176   // Copy half-precision floating-point immediate to vector elements
4177   // (predicated).
fcpy(const ZRegister & zd,const PRegisterM & pg,Float16 imm)4178   void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) {
4179     fcpy(zd, pg, FPToDouble(imm, kIgnoreDefaultNaN));
4180   }
4181 
4182   // Floating-point convert precision (predicated).
4183   void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4184 
4185   // Floating-point convert to signed integer, rounding toward zero
4186   // (predicated).
4187   void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4188 
4189   // Floating-point convert to unsigned integer, rounding toward zero
4190   // (predicated).
4191   void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4192 
4193   // Floating-point divide by vector (predicated).
4194   void fdiv(const ZRegister& zd,
4195             const PRegisterM& pg,
4196             const ZRegister& zn,
4197             const ZRegister& zm);
4198 
4199   // Floating-point reversed divide by vector (predicated).
4200   void fdivr(const ZRegister& zd,
4201              const PRegisterM& pg,
4202              const ZRegister& zn,
4203              const ZRegister& zm);
4204 
4205   // Broadcast floating-point immediate to vector elements.
4206   void fdup(const ZRegister& zd, double imm);
4207 
4208   // Broadcast half-precision floating-point immediate to vector elements.
fdup(const ZRegister & zd,Float16 imm)4209   void fdup(const ZRegister& zd, Float16 imm) {
4210     fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
4211   }
4212 
4213   // Floating-point exponential accelerator.
4214   void fexpa(const ZRegister& zd, const ZRegister& zn);
4215 
4216   // Floating-point fused multiply-add vectors (predicated), writing
4217   // multiplicand [Zdn = Za + Zdn * Zm].
4218   void fmad(const ZRegister& zdn,
4219             const PRegisterM& pg,
4220             const ZRegister& zm,
4221             const ZRegister& za);
4222 
4223   // Floating-point maximum with immediate (predicated).
4224   void fmax(const ZRegister& zd,
4225             const PRegisterM& pg,
4226             const ZRegister& zn,
4227             double imm);
4228 
4229   // Floating-point maximum (predicated).
4230   void fmax(const ZRegister& zd,
4231             const PRegisterM& pg,
4232             const ZRegister& zn,
4233             const ZRegister& zm);
4234 
4235   // Floating-point maximum number with immediate (predicated).
4236   void fmaxnm(const ZRegister& zd,
4237               const PRegisterM& pg,
4238               const ZRegister& zn,
4239               double imm);
4240 
4241   // Floating-point maximum number (predicated).
4242   void fmaxnm(const ZRegister& zd,
4243               const PRegisterM& pg,
4244               const ZRegister& zn,
4245               const ZRegister& zm);
4246 
4247   // Floating-point maximum number recursive reduction to scalar.
4248   void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4249 
4250   // Floating-point maximum recursive reduction to scalar.
4251   void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4252 
4253   // Floating-point minimum with immediate (predicated).
4254   void fmin(const ZRegister& zd,
4255             const PRegisterM& pg,
4256             const ZRegister& zn,
4257             double imm);
4258 
4259   // Floating-point minimum (predicated).
4260   void fmin(const ZRegister& zd,
4261             const PRegisterM& pg,
4262             const ZRegister& zn,
4263             const ZRegister& zm);
4264 
4265   // Floating-point minimum number with immediate (predicated).
4266   void fminnm(const ZRegister& zd,
4267               const PRegisterM& pg,
4268               const ZRegister& zn,
4269               double imm);
4270 
4271   // Floating-point minimum number (predicated).
4272   void fminnm(const ZRegister& zd,
4273               const PRegisterM& pg,
4274               const ZRegister& zn,
4275               const ZRegister& zm);
4276 
4277   // Floating-point minimum number recursive reduction to scalar.
4278   void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4279 
4280   // Floating-point minimum recursive reduction to scalar.
4281   void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4282 
4283   // Floating-point fused multiply-add vectors (predicated), writing addend
4284   // [Zda = Zda + Zn * Zm].
4285   void fmla(const ZRegister& zda,
4286             const PRegisterM& pg,
4287             const ZRegister& zn,
4288             const ZRegister& zm);
4289 
4290   // Floating-point fused multiply-add by indexed elements
4291   // (Zda = Zda + Zn * Zm[indexed]).
4292   void fmla(const ZRegister& zda,
4293             const ZRegister& zn,
4294             const ZRegister& zm,
4295             int index);
4296 
4297   // Floating-point fused multiply-subtract vectors (predicated), writing
4298   // addend [Zda = Zda + -Zn * Zm].
4299   void fmls(const ZRegister& zda,
4300             const PRegisterM& pg,
4301             const ZRegister& zn,
4302             const ZRegister& zm);
4303 
4304   // Floating-point fused multiply-subtract by indexed elements
4305   // (Zda = Zda + -Zn * Zm[indexed]).
4306   void fmls(const ZRegister& zda,
4307             const ZRegister& zn,
4308             const ZRegister& zm,
4309             int index);
4310 
4311   // Move 8-bit floating-point immediate to vector elements (unpredicated).
4312   void fmov(const ZRegister& zd, double imm);
4313 
4314   // Move 8-bit floating-point immediate to vector elements (predicated).
4315   void fmov(const ZRegister& zd, const PRegisterM& pg, double imm);
4316 
4317   // Floating-point fused multiply-subtract vectors (predicated), writing
4318   // multiplicand [Zdn = Za + -Zdn * Zm].
4319   void fmsb(const ZRegister& zdn,
4320             const PRegisterM& pg,
4321             const ZRegister& zm,
4322             const ZRegister& za);
4323 
4324   // Floating-point multiply by immediate (predicated).
4325   void fmul(const ZRegister& zd,
4326             const PRegisterM& pg,
4327             const ZRegister& zn,
4328             double imm);
4329 
4330   // Floating-point multiply vectors (predicated).
4331   void fmul(const ZRegister& zd,
4332             const PRegisterM& pg,
4333             const ZRegister& zn,
4334             const ZRegister& zm);
4335 
4336   // Floating-point multiply by indexed elements.
4337   void fmul(const ZRegister& zd,
4338             const ZRegister& zn,
4339             const ZRegister& zm,
4340             unsigned index);
4341 
4342   // Floating-point multiply vectors (unpredicated).
4343   void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4344 
4345   // Floating-point multiply-extended vectors (predicated).
4346   void fmulx(const ZRegister& zd,
4347              const PRegisterM& pg,
4348              const ZRegister& zn,
4349              const ZRegister& zm);
4350 
4351   // Floating-point negate (predicated).
4352   void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4353 
4354   // Floating-point negated fused multiply-add vectors (predicated), writing
4355   // multiplicand [Zdn = -Za + -Zdn * Zm].
4356   void fnmad(const ZRegister& zdn,
4357              const PRegisterM& pg,
4358              const ZRegister& zm,
4359              const ZRegister& za);
4360 
4361   // Floating-point negated fused multiply-add vectors (predicated), writing
4362   // addend [Zda = -Zda + -Zn * Zm].
4363   void fnmla(const ZRegister& zda,
4364              const PRegisterM& pg,
4365              const ZRegister& zn,
4366              const ZRegister& zm);
4367 
4368   // Floating-point negated fused multiply-subtract vectors (predicated),
4369   // writing addend [Zda = -Zda + Zn * Zm].
4370   void fnmls(const ZRegister& zda,
4371              const PRegisterM& pg,
4372              const ZRegister& zn,
4373              const ZRegister& zm);
4374 
4375   // Floating-point negated fused multiply-subtract vectors (predicated),
4376   // writing multiplicand [Zdn = -Za + Zdn * Zm].
4377   void fnmsb(const ZRegister& zdn,
4378              const PRegisterM& pg,
4379              const ZRegister& zm,
4380              const ZRegister& za);
4381 
4382   // Floating-point reciprocal estimate (unpredicated).
4383   void frecpe(const ZRegister& zd, const ZRegister& zn);
4384 
4385   // Floating-point reciprocal step (unpredicated).
4386   void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4387 
4388   // Floating-point reciprocal exponent (predicated).
4389   void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4390 
4391   // Floating-point round to integral value (predicated).
4392   void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4393 
4394   // Floating-point round to integral value (predicated).
4395   void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4396 
4397   // Floating-point round to integral value (predicated).
4398   void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4399 
4400   // Floating-point round to integral value (predicated).
4401   void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4402 
4403   // Floating-point round to integral value (predicated).
4404   void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4405 
4406   // Floating-point round to integral value (predicated).
4407   void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4408 
4409   // Floating-point round to integral value (predicated).
4410   void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4411 
4412   // Floating-point reciprocal square root estimate (unpredicated).
4413   void frsqrte(const ZRegister& zd, const ZRegister& zn);
4414 
4415   // Floating-point reciprocal square root step (unpredicated).
4416   void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4417 
4418   // Floating-point adjust exponent by vector (predicated).
4419   void fscale(const ZRegister& zd,
4420               const PRegisterM& pg,
4421               const ZRegister& zn,
4422               const ZRegister& zm);
4423 
4424   // Floating-point square root (predicated).
4425   void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4426 
4427   // Floating-point subtract immediate (predicated).
4428   void fsub(const ZRegister& zd,
4429             const PRegisterM& pg,
4430             const ZRegister& zn,
4431             double imm);
4432 
4433   // Floating-point subtract vectors (predicated).
4434   void fsub(const ZRegister& zd,
4435             const PRegisterM& pg,
4436             const ZRegister& zn,
4437             const ZRegister& zm);
4438 
4439   // Floating-point subtract vectors (unpredicated).
4440   void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4441 
4442   // Floating-point reversed subtract from immediate (predicated).
4443   void fsubr(const ZRegister& zd,
4444              const PRegisterM& pg,
4445              const ZRegister& zn,
4446              double imm);
4447 
4448   // Floating-point reversed subtract vectors (predicated).
4449   void fsubr(const ZRegister& zd,
4450              const PRegisterM& pg,
4451              const ZRegister& zn,
4452              const ZRegister& zm);
4453 
4454   // Floating-point trigonometric multiply-add coefficient.
4455   void ftmad(const ZRegister& zd,
4456              const ZRegister& zn,
4457              const ZRegister& zm,
4458              int imm3);
4459 
4460   // Floating-point trigonometric starting value.
4461   void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4462 
4463   // Floating-point trigonometric select coefficient.
4464   void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4465 
4466   // Increment scalar by multiple of predicate constraint element count.
4467   void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4468 
4469   // Increment scalar by multiple of predicate constraint element count.
4470   void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4471 
4472   // Increment vector by multiple of predicate constraint element count.
4473   void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4474 
4475   // Increment scalar by multiple of predicate constraint element count.
4476   void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4477 
4478   // Increment vector by multiple of predicate constraint element count.
4479   void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4480 
4481   // Increment scalar by active predicate element count.
4482   void incp(const Register& rdn, const PRegisterWithLaneSize& pg);
4483 
4484   // Increment vector by active predicate element count.
4485   void incp(const ZRegister& zdn, const PRegister& pg);
4486 
4487   // Increment scalar by multiple of predicate constraint element count.
4488   void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4489 
4490   // Increment vector by multiple of predicate constraint element count.
4491   void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4492 
4493   // Create index starting from and incremented by immediate.
4494   void index(const ZRegister& zd, int start, int step);
4495 
4496   // Create index starting from and incremented by general-purpose register.
4497   void index(const ZRegister& zd, const Register& rn, const Register& rm);
4498 
4499   // Create index starting from general-purpose register and incremented by
4500   // immediate.
4501   void index(const ZRegister& zd, const Register& rn, int imm5);
4502 
4503   // Create index starting from immediate and incremented by general-purpose
4504   // register.
4505   void index(const ZRegister& zd, int imm5, const Register& rm);
4506 
4507   // Insert general-purpose register in shifted vector.
4508   void insr(const ZRegister& zdn, const Register& rm);
4509 
4510   // Insert SIMD&FP scalar register in shifted vector.
4511   void insr(const ZRegister& zdn, const VRegister& vm);
4512 
4513   // Extract element after last to general-purpose register.
4514   void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn);
4515 
4516   // Extract element after last to SIMD&FP scalar register.
4517   void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4518 
4519   // Extract last element to general-purpose register.
4520   void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn);
4521 
4522   // Extract last element to SIMD&FP scalar register.
4523   void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4524 
4525   // Contiguous/gather load bytes to vector.
4526   void ld1b(const ZRegister& zt,
4527             const PRegisterZ& pg,
4528             const SVEMemOperand& addr);
4529 
4530   // Contiguous/gather load halfwords to vector.
4531   void ld1h(const ZRegister& zt,
4532             const PRegisterZ& pg,
4533             const SVEMemOperand& addr);
4534 
4535   // Contiguous/gather load words to vector.
4536   void ld1w(const ZRegister& zt,
4537             const PRegisterZ& pg,
4538             const SVEMemOperand& addr);
4539 
4540   // Contiguous/gather load doublewords to vector.
4541   void ld1d(const ZRegister& zt,
4542             const PRegisterZ& pg,
4543             const SVEMemOperand& addr);
4544 
4545   // TODO: Merge other loads into the SVEMemOperand versions.
4546 
4547   // Load and broadcast unsigned byte to vector.
4548   void ld1rb(const ZRegister& zt,
4549              const PRegisterZ& pg,
4550              const SVEMemOperand& addr);
4551 
4552   // Load and broadcast unsigned halfword to vector.
4553   void ld1rh(const ZRegister& zt,
4554              const PRegisterZ& pg,
4555              const SVEMemOperand& addr);
4556 
4557   // Load and broadcast unsigned word to vector.
4558   void ld1rw(const ZRegister& zt,
4559              const PRegisterZ& pg,
4560              const SVEMemOperand& addr);
4561 
4562   // Load and broadcast doubleword to vector.
4563   void ld1rd(const ZRegister& zt,
4564              const PRegisterZ& pg,
4565              const SVEMemOperand& addr);
4566 
4567   // Contiguous load and replicate sixteen bytes.
4568   void ld1rqb(const ZRegister& zt,
4569               const PRegisterZ& pg,
4570               const SVEMemOperand& addr);
4571 
4572   // Contiguous load and replicate eight halfwords.
4573   void ld1rqh(const ZRegister& zt,
4574               const PRegisterZ& pg,
4575               const SVEMemOperand& addr);
4576 
4577   // Contiguous load and replicate four words.
4578   void ld1rqw(const ZRegister& zt,
4579               const PRegisterZ& pg,
4580               const SVEMemOperand& addr);
4581 
4582   // Contiguous load and replicate two doublewords.
4583   void ld1rqd(const ZRegister& zt,
4584               const PRegisterZ& pg,
4585               const SVEMemOperand& addr);
4586 
4587   // Load and broadcast signed byte to vector.
4588   void ld1rsb(const ZRegister& zt,
4589               const PRegisterZ& pg,
4590               const SVEMemOperand& addr);
4591 
4592   // Load and broadcast signed halfword to vector.
4593   void ld1rsh(const ZRegister& zt,
4594               const PRegisterZ& pg,
4595               const SVEMemOperand& addr);
4596 
4597   // Load and broadcast signed word to vector.
4598   void ld1rsw(const ZRegister& zt,
4599               const PRegisterZ& pg,
4600               const SVEMemOperand& addr);
4601 
4602   // Contiguous/gather load signed bytes to vector.
4603   void ld1sb(const ZRegister& zt,
4604              const PRegisterZ& pg,
4605              const SVEMemOperand& addr);
4606 
4607   // Contiguous/gather load signed halfwords to vector.
4608   void ld1sh(const ZRegister& zt,
4609              const PRegisterZ& pg,
4610              const SVEMemOperand& addr);
4611 
4612   // Contiguous/gather load signed words to vector.
4613   void ld1sw(const ZRegister& zt,
4614              const PRegisterZ& pg,
4615              const SVEMemOperand& addr);
4616 
4617   // TODO: Merge other loads into the SVEMemOperand versions.
4618 
4619   // Contiguous load two-byte structures to two vectors.
4620   void ld2b(const ZRegister& zt1,
4621             const ZRegister& zt2,
4622             const PRegisterZ& pg,
4623             const SVEMemOperand& addr);
4624 
4625   // Contiguous load two-halfword structures to two vectors.
4626   void ld2h(const ZRegister& zt1,
4627             const ZRegister& zt2,
4628             const PRegisterZ& pg,
4629             const SVEMemOperand& addr);
4630 
4631   // Contiguous load two-word structures to two vectors.
4632   void ld2w(const ZRegister& zt1,
4633             const ZRegister& zt2,
4634             const PRegisterZ& pg,
4635             const SVEMemOperand& addr);
4636 
4637   // Contiguous load two-doubleword structures to two vectors.
4638   void ld2d(const ZRegister& zt1,
4639             const ZRegister& zt2,
4640             const PRegisterZ& pg,
4641             const SVEMemOperand& addr);
4642 
4643   // Contiguous load three-byte structures to three vectors.
4644   void ld3b(const ZRegister& zt1,
4645             const ZRegister& zt2,
4646             const ZRegister& zt3,
4647             const PRegisterZ& pg,
4648             const SVEMemOperand& addr);
4649 
4650   // Contiguous load three-halfword structures to three vectors.
4651   void ld3h(const ZRegister& zt1,
4652             const ZRegister& zt2,
4653             const ZRegister& zt3,
4654             const PRegisterZ& pg,
4655             const SVEMemOperand& addr);
4656 
4657   // Contiguous load three-word structures to three vectors.
4658   void ld3w(const ZRegister& zt1,
4659             const ZRegister& zt2,
4660             const ZRegister& zt3,
4661             const PRegisterZ& pg,
4662             const SVEMemOperand& addr);
4663 
4664   // Contiguous load three-doubleword structures to three vectors.
4665   void ld3d(const ZRegister& zt1,
4666             const ZRegister& zt2,
4667             const ZRegister& zt3,
4668             const PRegisterZ& pg,
4669             const SVEMemOperand& addr);
4670 
4671   // Contiguous load four-byte structures to four vectors.
4672   void ld4b(const ZRegister& zt1,
4673             const ZRegister& zt2,
4674             const ZRegister& zt3,
4675             const ZRegister& zt4,
4676             const PRegisterZ& pg,
4677             const SVEMemOperand& addr);
4678 
4679   // Contiguous load four-halfword structures to four vectors.
4680   void ld4h(const ZRegister& zt1,
4681             const ZRegister& zt2,
4682             const ZRegister& zt3,
4683             const ZRegister& zt4,
4684             const PRegisterZ& pg,
4685             const SVEMemOperand& addr);
4686 
4687   // Contiguous load four-word structures to four vectors.
4688   void ld4w(const ZRegister& zt1,
4689             const ZRegister& zt2,
4690             const ZRegister& zt3,
4691             const ZRegister& zt4,
4692             const PRegisterZ& pg,
4693             const SVEMemOperand& addr);
4694 
4695   // Contiguous load four-doubleword structures to four vectors.
4696   void ld4d(const ZRegister& zt1,
4697             const ZRegister& zt2,
4698             const ZRegister& zt3,
4699             const ZRegister& zt4,
4700             const PRegisterZ& pg,
4701             const SVEMemOperand& addr);
4702 
4703   // Contiguous load first-fault unsigned bytes to vector.
4704   void ldff1b(const ZRegister& zt,
4705               const PRegisterZ& pg,
4706               const SVEMemOperand& addr);
4707 
4708   // Contiguous load first-fault unsigned halfwords to vector.
4709   void ldff1h(const ZRegister& zt,
4710               const PRegisterZ& pg,
4711               const SVEMemOperand& addr);
4712 
4713   // Contiguous load first-fault unsigned words to vector.
4714   void ldff1w(const ZRegister& zt,
4715               const PRegisterZ& pg,
4716               const SVEMemOperand& addr);
4717 
4718   // Contiguous load first-fault doublewords to vector.
4719   void ldff1d(const ZRegister& zt,
4720               const PRegisterZ& pg,
4721               const SVEMemOperand& addr);
4722 
4723   // Contiguous load first-fault signed bytes to vector.
4724   void ldff1sb(const ZRegister& zt,
4725                const PRegisterZ& pg,
4726                const SVEMemOperand& addr);
4727 
4728   // Contiguous load first-fault signed halfwords to vector.
4729   void ldff1sh(const ZRegister& zt,
4730                const PRegisterZ& pg,
4731                const SVEMemOperand& addr);
4732 
4733   // Contiguous load first-fault signed words to vector.
4734   void ldff1sw(const ZRegister& zt,
4735                const PRegisterZ& pg,
4736                const SVEMemOperand& addr);
4737 
4738   // Gather load first-fault unsigned bytes to vector.
4739   void ldff1b(const ZRegister& zt,
4740               const PRegisterZ& pg,
4741               const Register& xn,
4742               const ZRegister& zm);
4743 
4744   // Gather load first-fault unsigned bytes to vector (immediate index).
4745   void ldff1b(const ZRegister& zt,
4746               const PRegisterZ& pg,
4747               const ZRegister& zn,
4748               int imm5);
4749 
4750   // Gather load first-fault doublewords to vector (vector index).
4751   void ldff1d(const ZRegister& zt,
4752               const PRegisterZ& pg,
4753               const Register& xn,
4754               const ZRegister& zm);
4755 
4756   // Gather load first-fault doublewords to vector (immediate index).
4757   void ldff1d(const ZRegister& zt,
4758               const PRegisterZ& pg,
4759               const ZRegister& zn,
4760               int imm5);
4761 
4762   // Gather load first-fault unsigned halfwords to vector (vector index).
4763   void ldff1h(const ZRegister& zt,
4764               const PRegisterZ& pg,
4765               const Register& xn,
4766               const ZRegister& zm);
4767 
4768   // Gather load first-fault unsigned halfwords to vector (immediate index).
4769   void ldff1h(const ZRegister& zt,
4770               const PRegisterZ& pg,
4771               const ZRegister& zn,
4772               int imm5);
4773 
4774   // Gather load first-fault signed bytes to vector (vector index).
4775   void ldff1sb(const ZRegister& zt,
4776                const PRegisterZ& pg,
4777                const Register& xn,
4778                const ZRegister& zm);
4779 
4780   // Gather load first-fault signed bytes to vector (immediate index).
4781   void ldff1sb(const ZRegister& zt,
4782                const PRegisterZ& pg,
4783                const ZRegister& zn,
4784                int imm5);
4785 
4786   // Gather load first-fault signed halfwords to vector (vector index).
4787   void ldff1sh(const ZRegister& zt,
4788                const PRegisterZ& pg,
4789                const Register& xn,
4790                const ZRegister& zm);
4791 
4792   // Gather load first-fault signed halfwords to vector (immediate index).
4793   void ldff1sh(const ZRegister& zt,
4794                const PRegisterZ& pg,
4795                const ZRegister& zn,
4796                int imm5);
4797 
4798   // Gather load first-fault signed words to vector (vector index).
4799   void ldff1sw(const ZRegister& zt,
4800                const PRegisterZ& pg,
4801                const Register& xn,
4802                const ZRegister& zm);
4803 
4804   // Gather load first-fault signed words to vector (immediate index).
4805   void ldff1sw(const ZRegister& zt,
4806                const PRegisterZ& pg,
4807                const ZRegister& zn,
4808                int imm5);
4809 
4810   // Gather load first-fault unsigned words to vector (vector index).
4811   void ldff1w(const ZRegister& zt,
4812               const PRegisterZ& pg,
4813               const Register& xn,
4814               const ZRegister& zm);
4815 
4816   // Gather load first-fault unsigned words to vector (immediate index).
4817   void ldff1w(const ZRegister& zt,
4818               const PRegisterZ& pg,
4819               const ZRegister& zn,
4820               int imm5);
4821 
4822   // Contiguous load non-fault unsigned bytes to vector (immediate index).
4823   void ldnf1b(const ZRegister& zt,
4824               const PRegisterZ& pg,
4825               const SVEMemOperand& addr);
4826 
4827   // Contiguous load non-fault doublewords to vector (immediate index).
4828   void ldnf1d(const ZRegister& zt,
4829               const PRegisterZ& pg,
4830               const SVEMemOperand& addr);
4831 
4832   // Contiguous load non-fault unsigned halfwords to vector (immediate
4833   // index).
4834   void ldnf1h(const ZRegister& zt,
4835               const PRegisterZ& pg,
4836               const SVEMemOperand& addr);
4837 
4838   // Contiguous load non-fault signed bytes to vector (immediate index).
4839   void ldnf1sb(const ZRegister& zt,
4840                const PRegisterZ& pg,
4841                const SVEMemOperand& addr);
4842 
4843   // Contiguous load non-fault signed halfwords to vector (immediate index).
4844   void ldnf1sh(const ZRegister& zt,
4845                const PRegisterZ& pg,
4846                const SVEMemOperand& addr);
4847 
4848   // Contiguous load non-fault signed words to vector (immediate index).
4849   void ldnf1sw(const ZRegister& zt,
4850                const PRegisterZ& pg,
4851                const SVEMemOperand& addr);
4852 
4853   // Contiguous load non-fault unsigned words to vector (immediate index).
4854   void ldnf1w(const ZRegister& zt,
4855               const PRegisterZ& pg,
4856               const SVEMemOperand& addr);
4857 
4858   // Contiguous load non-temporal bytes to vector.
4859   void ldnt1b(const ZRegister& zt,
4860               const PRegisterZ& pg,
4861               const SVEMemOperand& addr);
4862 
4863   // Contiguous load non-temporal halfwords to vector.
4864   void ldnt1h(const ZRegister& zt,
4865               const PRegisterZ& pg,
4866               const SVEMemOperand& addr);
4867 
4868   // Contiguous load non-temporal words to vector.
4869   void ldnt1w(const ZRegister& zt,
4870               const PRegisterZ& pg,
4871               const SVEMemOperand& addr);
4872 
4873   // Contiguous load non-temporal doublewords to vector.
4874   void ldnt1d(const ZRegister& zt,
4875               const PRegisterZ& pg,
4876               const SVEMemOperand& addr);
4877 
4878   // Load SVE predicate/vector register.
4879   void ldr(const CPURegister& rt, const SVEMemOperand& addr);
4880 
4881   // Logical shift left by immediate (predicated).
4882   void lsl(const ZRegister& zd,
4883            const PRegisterM& pg,
4884            const ZRegister& zn,
4885            int shift);
4886 
4887   // Logical shift left by 64-bit wide elements (predicated).
4888   void lsl(const ZRegister& zd,
4889            const PRegisterM& pg,
4890            const ZRegister& zn,
4891            const ZRegister& zm);
4892 
4893   // Logical shift left by immediate (unpredicated).
4894   void lsl(const ZRegister& zd, const ZRegister& zn, int shift);
4895 
4896   // Logical shift left by 64-bit wide elements (unpredicated).
4897   void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4898 
4899   // Reversed logical shift left by vector (predicated).
4900   void lslr(const ZRegister& zd,
4901             const PRegisterM& pg,
4902             const ZRegister& zn,
4903             const ZRegister& zm);
4904 
4905   // Logical shift right by immediate (predicated).
4906   void lsr(const ZRegister& zd,
4907            const PRegisterM& pg,
4908            const ZRegister& zn,
4909            int shift);
4910 
4911   // Logical shift right by 64-bit wide elements (predicated).
4912   void lsr(const ZRegister& zd,
4913            const PRegisterM& pg,
4914            const ZRegister& zn,
4915            const ZRegister& zm);
4916 
4917   // Logical shift right by immediate (unpredicated).
4918   void lsr(const ZRegister& zd, const ZRegister& zn, int shift);
4919 
4920   // Logical shift right by 64-bit wide elements (unpredicated).
4921   void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4922 
4923   // Reversed logical shift right by vector (predicated).
4924   void lsrr(const ZRegister& zd,
4925             const PRegisterM& pg,
4926             const ZRegister& zn,
4927             const ZRegister& zm);
4928 
4929   // Bitwise invert predicate.
4930   void not_(const PRegisterWithLaneSize& pd,
4931             const PRegisterZ& pg,
4932             const PRegisterWithLaneSize& pn);
4933 
4934   // Bitwise invert predicate, setting the condition flags.
4935   void nots(const PRegisterWithLaneSize& pd,
4936             const PRegisterZ& pg,
4937             const PRegisterWithLaneSize& pn);
4938 
4939   // Multiply-add vectors (predicated), writing multiplicand
4940   // [Zdn = Za + Zdn * Zm].
4941   void mad(const ZRegister& zdn,
4942            const PRegisterM& pg,
4943            const ZRegister& zm,
4944            const ZRegister& za);
4945 
4946   // Multiply-add vectors (predicated), writing addend
4947   // [Zda = Zda + Zn * Zm].
4948   void mla(const ZRegister& zda,
4949            const PRegisterM& pg,
4950            const ZRegister& zn,
4951            const ZRegister& zm);
4952 
4953   // Multiply-subtract vectors (predicated), writing addend
4954   // [Zda = Zda - Zn * Zm].
4955   void mls(const ZRegister& zda,
4956            const PRegisterM& pg,
4957            const ZRegister& zn,
4958            const ZRegister& zm);
4959 
4960   // Move predicates (unpredicated)
4961   void mov(const PRegister& pd, const PRegister& pn);
4962 
4963   // Move predicates (merging)
4964   void mov(const PRegisterWithLaneSize& pd,
4965            const PRegisterM& pg,
4966            const PRegisterWithLaneSize& pn);
4967 
4968   // Move predicates (zeroing)
4969   void mov(const PRegisterWithLaneSize& pd,
4970            const PRegisterZ& pg,
4971            const PRegisterWithLaneSize& pn);
4972 
4973   // Move general-purpose register to vector elements (unpredicated)
4974   void mov(const ZRegister& zd, const Register& xn);
4975 
4976   // Move SIMD&FP scalar register to vector elements (unpredicated)
4977   void mov(const ZRegister& zd, const VRegister& vn);
4978 
4979   // Move vector register (unpredicated)
4980   void mov(const ZRegister& zd, const ZRegister& zn);
4981 
4982   // Move indexed element to vector elements (unpredicated)
4983   void mov(const ZRegister& zd, const ZRegister& zn, unsigned index);
4984 
4985   // Move general-purpose register to vector elements (predicated)
4986   void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
4987 
4988   // Move SIMD&FP scalar register to vector elements (predicated)
4989   void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
4990 
4991   // Move vector elements (predicated)
4992   void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4993 
4994   // Move signed integer immediate to vector elements (predicated)
4995   void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
4996 
4997   // Move signed immediate to vector elements (unpredicated).
4998   void mov(const ZRegister& zd, int imm8, int shift);
4999 
5000   // Move logical bitmask immediate to vector (unpredicated).
5001   void mov(const ZRegister& zd, uint64_t imm);
5002 
5003   // Move predicate (unpredicated), setting the condition flags
5004   void movs(const PRegister& pd, const PRegister& pn);
5005 
5006   // Move predicates (zeroing), setting the condition flags
5007   void movs(const PRegisterWithLaneSize& pd,
5008             const PRegisterZ& pg,
5009             const PRegisterWithLaneSize& pn);
5010 
5011   // Move prefix (predicated).
5012   void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
5013 
5014   // Move prefix (unpredicated).
5015   void movprfx(const ZRegister& zd, const ZRegister& zn);
5016 
5017   // Multiply-subtract vectors (predicated), writing multiplicand
5018   // [Zdn = Za - Zdn * Zm].
5019   void msb(const ZRegister& zdn,
5020            const PRegisterM& pg,
5021            const ZRegister& zm,
5022            const ZRegister& za);
5023 
5024   // Multiply vectors (predicated).
5025   void mul(const ZRegister& zd,
5026            const PRegisterM& pg,
5027            const ZRegister& zn,
5028            const ZRegister& zm);
5029 
5030   // Multiply by immediate (unpredicated).
5031   void mul(const ZRegister& zd, const ZRegister& zn, int imm8);
5032 
5033   // Bitwise NAND predicates.
5034   void nand(const PRegisterWithLaneSize& pd,
5035             const PRegisterZ& pg,
5036             const PRegisterWithLaneSize& pn,
5037             const PRegisterWithLaneSize& pm);
5038 
5039   // Bitwise NAND predicates.
5040   void nands(const PRegisterWithLaneSize& pd,
5041              const PRegisterZ& pg,
5042              const PRegisterWithLaneSize& pn,
5043              const PRegisterWithLaneSize& pm);
5044 
5045   // Negate (predicated).
5046   void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5047 
5048   // Bitwise NOR predicates.
5049   void nor(const PRegisterWithLaneSize& pd,
5050            const PRegisterZ& pg,
5051            const PRegisterWithLaneSize& pn,
5052            const PRegisterWithLaneSize& pm);
5053 
5054   // Bitwise NOR predicates.
5055   void nors(const PRegisterWithLaneSize& pd,
5056             const PRegisterZ& pg,
5057             const PRegisterWithLaneSize& pn,
5058             const PRegisterWithLaneSize& pm);
5059 
5060   // Bitwise invert vector (predicated).
5061   void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5062 
5063   // Bitwise OR inverted predicate.
5064   void orn(const PRegisterWithLaneSize& pd,
5065            const PRegisterZ& pg,
5066            const PRegisterWithLaneSize& pn,
5067            const PRegisterWithLaneSize& pm);
5068 
5069   // Bitwise OR inverted predicate.
5070   void orns(const PRegisterWithLaneSize& pd,
5071             const PRegisterZ& pg,
5072             const PRegisterWithLaneSize& pn,
5073             const PRegisterWithLaneSize& pm);
5074 
5075   // Bitwise OR with inverted immediate (unpredicated).
5076   void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5077 
5078   // Bitwise OR predicate.
5079   void orr(const PRegisterWithLaneSize& pd,
5080            const PRegisterZ& pg,
5081            const PRegisterWithLaneSize& pn,
5082            const PRegisterWithLaneSize& pm);
5083 
5084   // Bitwise OR vectors (predicated).
5085   void orr(const ZRegister& zd,
5086            const PRegisterM& pg,
5087            const ZRegister& zn,
5088            const ZRegister& zm);
5089 
5090   // Bitwise OR with immediate (unpredicated).
5091   void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5092 
5093   // Bitwise OR vectors (unpredicated).
5094   void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5095 
5096   // Bitwise OR predicate.
5097   void orrs(const PRegisterWithLaneSize& pd,
5098             const PRegisterZ& pg,
5099             const PRegisterWithLaneSize& pn,
5100             const PRegisterWithLaneSize& pm);
5101 
5102   // Bitwise OR reduction to scalar.
5103   void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5104 
5105   // Set all predicate elements to false.
5106   void pfalse(const PRegisterWithLaneSize& pd);
5107 
5108   // Set the first active predicate element to true.
5109   void pfirst(const PRegisterWithLaneSize& pd,
5110               const PRegister& pg,
5111               const PRegisterWithLaneSize& pn);
5112 
5113   // Find next active predicate.
5114   void pnext(const PRegisterWithLaneSize& pd,
5115              const PRegister& pg,
5116              const PRegisterWithLaneSize& pn);
5117 
5118   // Prefetch bytes.
5119   void prfb(PrefetchOperation prfop,
5120             const PRegister& pg,
5121             const SVEMemOperand& addr);
5122 
5123   // Prefetch halfwords.
5124   void prfh(PrefetchOperation prfop,
5125             const PRegister& pg,
5126             const SVEMemOperand& addr);
5127 
5128   // Prefetch words.
5129   void prfw(PrefetchOperation prfop,
5130             const PRegister& pg,
5131             const SVEMemOperand& addr);
5132 
5133   // Prefetch doublewords.
5134   void prfd(PrefetchOperation prfop,
5135             const PRegister& pg,
5136             const SVEMemOperand& addr);
5137 
5138   // Set condition flags for predicate.
5139   void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn);
5140 
5141   // Initialise predicate from named constraint.
5142   void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5143 
5144   // Initialise predicate from named constraint.
5145   void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5146 
5147   // Unpack and widen half of predicate.
5148   void punpkhi(const PRegisterWithLaneSize& pd,
5149                const PRegisterWithLaneSize& pn);
5150 
5151   // Unpack and widen half of predicate.
5152   void punpklo(const PRegisterWithLaneSize& pd,
5153                const PRegisterWithLaneSize& pn);
5154 
5155   // Reverse bits (predicated).
5156   void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5157 
5158   // Read the first-fault register.
5159   void rdffr(const PRegisterWithLaneSize& pd);
5160 
5161   // Return predicate of succesfully loaded elements.
5162   void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5163 
5164   // Return predicate of succesfully loaded elements.
5165   void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5166 
5167   // Read multiple of vector register size to scalar register.
5168   void rdvl(const Register& xd, int imm6);
5169 
5170   // Reverse all elements in a predicate.
5171   void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn);
5172 
5173   // Reverse all elements in a vector (unpredicated).
5174   void rev(const ZRegister& zd, const ZRegister& zn);
5175 
5176   // Reverse bytes / halfwords / words within elements (predicated).
5177   void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5178 
5179   // Reverse bytes / halfwords / words within elements (predicated).
5180   void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5181 
5182   // Reverse bytes / halfwords / words within elements (predicated).
5183   void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5184 
5185   // Signed absolute difference (predicated).
5186   void sabd(const ZRegister& zd,
5187             const PRegisterM& pg,
5188             const ZRegister& zn,
5189             const ZRegister& zm);
5190 
5191   // Signed add reduction to scalar.
5192   void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5193 
5194   // Signed integer convert to floating-point (predicated).
5195   void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5196 
5197   // Signed divide (predicated).
5198   void sdiv(const ZRegister& zd,
5199             const PRegisterM& pg,
5200             const ZRegister& zn,
5201             const ZRegister& zm);
5202 
5203   // Signed reversed divide (predicated).
5204   void sdivr(const ZRegister& zd,
5205              const PRegisterM& pg,
5206              const ZRegister& zn,
5207              const ZRegister& zm);
5208 
5209   // Signed dot product by indexed quadtuplet.
5210   void sdot(const ZRegister& zda,
5211             const ZRegister& zn,
5212             const ZRegister& zm,
5213             int index);
5214 
5215   // Signed dot product.
5216   void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5217 
5218   // Conditionally select elements from two predicates.
5219   void sel(const PRegisterWithLaneSize& pd,
5220            const PRegister& pg,
5221            const PRegisterWithLaneSize& pn,
5222            const PRegisterWithLaneSize& pm);
5223 
5224   // Conditionally select elements from two vectors.
5225   void sel(const ZRegister& zd,
5226            const PRegister& pg,
5227            const ZRegister& zn,
5228            const ZRegister& zm);
5229 
5230   // Initialise the first-fault register to all true.
5231   void setffr();
5232 
5233   // Signed maximum vectors (predicated).
5234   void smax(const ZRegister& zd,
5235             const PRegisterM& pg,
5236             const ZRegister& zn,
5237             const ZRegister& zm);
5238 
5239   // Signed maximum with immediate (unpredicated).
5240   void smax(const ZRegister& zd, const ZRegister& zn, int imm8);
5241 
5242   // Signed maximum reduction to scalar.
5243   void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5244 
5245   // Signed minimum vectors (predicated).
5246   void smin(const ZRegister& zd,
5247             const PRegisterM& pg,
5248             const ZRegister& zn,
5249             const ZRegister& zm);
5250 
5251   // Signed minimum with immediate (unpredicated).
5252   void smin(const ZRegister& zd, const ZRegister& zn, int imm8);
5253 
5254   // Signed minimum reduction to scalar.
5255   void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5256 
5257   // Signed multiply returning high half (predicated).
5258   void smulh(const ZRegister& zd,
5259              const PRegisterM& pg,
5260              const ZRegister& zn,
5261              const ZRegister& zm);
5262 
5263   // Splice two vectors under predicate control.
5264   void splice(const ZRegister& zd,
5265               const PRegister& pg,
5266               const ZRegister& zn,
5267               const ZRegister& zm);
5268 
5269   // Signed saturating add vectors (unpredicated).
5270   void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5271 
5272   // Signed saturating add immediate (unpredicated).
5273   void sqadd(const ZRegister& zd,
5274              const ZRegister& zn,
5275              int imm8,
5276              int shift = -1);
5277 
5278   // Signed saturating decrement scalar by multiple of 8-bit predicate
5279   // constraint element count.
5280   void sqdecb(const Register& xd,
5281               const Register& wn,
5282               int pattern,
5283               int multiplier);
5284 
5285   // Signed saturating decrement scalar by multiple of 8-bit predicate
5286   // constraint element count.
5287   void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5288 
5289   // Signed saturating decrement scalar by multiple of 64-bit predicate
5290   // constraint element count.
5291   void sqdecd(const Register& xd,
5292               const Register& wn,
5293               int pattern = SVE_ALL,
5294               int multiplier = 1);
5295 
5296   // Signed saturating decrement scalar by multiple of 64-bit predicate
5297   // constraint element count.
5298   void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5299 
5300   // Signed saturating decrement vector by multiple of 64-bit predicate
5301   // constraint element count.
5302   void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5303 
5304   // Signed saturating decrement scalar by multiple of 16-bit predicate
5305   // constraint element count.
5306   void sqdech(const Register& xd,
5307               const Register& wn,
5308               int pattern = SVE_ALL,
5309               int multiplier = 1);
5310 
5311   // Signed saturating decrement scalar by multiple of 16-bit predicate
5312   // constraint element count.
5313   void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5314 
5315   // Signed saturating decrement vector by multiple of 16-bit predicate
5316   // constraint element count.
5317   void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5318 
5319   // Signed saturating decrement scalar by active predicate element count.
5320   void sqdecp(const Register& xd,
5321               const PRegisterWithLaneSize& pg,
5322               const Register& wn);
5323 
5324   // Signed saturating decrement scalar by active predicate element count.
5325   void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg);
5326 
5327   // Signed saturating decrement vector by active predicate element count.
5328   void sqdecp(const ZRegister& zdn, const PRegister& pg);
5329 
5330   // Signed saturating decrement scalar by multiple of 32-bit predicate
5331   // constraint element count.
5332   void sqdecw(const Register& xd,
5333               const Register& wn,
5334               int pattern = SVE_ALL,
5335               int multiplier = 1);
5336 
5337   // Signed saturating decrement scalar by multiple of 32-bit predicate
5338   // constraint element count.
5339   void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5340 
5341   // Signed saturating decrement vector by multiple of 32-bit predicate
5342   // constraint element count.
5343   void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5344 
5345   // Signed saturating increment scalar by multiple of 8-bit predicate
5346   // constraint element count.
5347   void sqincb(const Register& xd,
5348               const Register& wn,
5349               int pattern = SVE_ALL,
5350               int multiplier = 1);
5351 
5352   // Signed saturating increment scalar by multiple of 8-bit predicate
5353   // constraint element count.
5354   void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5355 
5356   // Signed saturating increment scalar by multiple of 64-bit predicate
5357   // constraint element count.
5358   void sqincd(const Register& xd,
5359               const Register& wn,
5360               int pattern,
5361               int multiplier);
5362 
5363   // Signed saturating increment scalar by multiple of 64-bit predicate
5364   // constraint element count.
5365   void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5366 
5367   // Signed saturating increment vector by multiple of 64-bit predicate
5368   // constraint element count.
5369   void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5370 
5371   // Signed saturating increment scalar by multiple of 16-bit predicate
5372   // constraint element count.
5373   void sqinch(const Register& xd,
5374               const Register& wn,
5375               int pattern = SVE_ALL,
5376               int multiplier = 1);
5377 
5378   // Signed saturating increment scalar by multiple of 16-bit predicate
5379   // constraint element count.
5380   void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5381 
5382   // Signed saturating increment vector by multiple of 16-bit predicate
5383   // constraint element count.
5384   void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5385 
5386   // Signed saturating increment scalar by active predicate element count.
5387   void sqincp(const Register& xd,
5388               const PRegisterWithLaneSize& pg,
5389               const Register& wn);
5390 
5391   // Signed saturating increment scalar by active predicate element count.
5392   void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg);
5393 
5394   // Signed saturating increment vector by active predicate element count.
5395   void sqincp(const ZRegister& zdn, const PRegister& pg);
5396 
5397   // Signed saturating increment scalar by multiple of 32-bit predicate
5398   // constraint element count.
5399   void sqincw(const Register& xd,
5400               const Register& wn,
5401               int pattern = SVE_ALL,
5402               int multiplier = 1);
5403 
5404   // Signed saturating increment scalar by multiple of 32-bit predicate
5405   // constraint element count.
5406   void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5407 
5408   // Signed saturating increment vector by multiple of 32-bit predicate
5409   // constraint element count.
5410   void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5411 
5412   // Signed saturating subtract vectors (unpredicated).
5413   void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5414 
5415   // Signed saturating subtract immediate (unpredicated).
5416   void sqsub(const ZRegister& zd,
5417              const ZRegister& zn,
5418              int imm8,
5419              int shift = -1);
5420 
5421   // Contiguous/scatter store bytes from vector.
5422   void st1b(const ZRegister& zt,
5423             const PRegister& pg,
5424             const SVEMemOperand& addr);
5425 
5426   // Contiguous/scatter store halfwords from vector.
5427   void st1h(const ZRegister& zt,
5428             const PRegister& pg,
5429             const SVEMemOperand& addr);
5430 
5431   // Contiguous/scatter store words from vector.
5432   void st1w(const ZRegister& zt,
5433             const PRegister& pg,
5434             const SVEMemOperand& addr);
5435 
5436   // Contiguous/scatter store doublewords from vector.
5437   void st1d(const ZRegister& zt,
5438             const PRegister& pg,
5439             const SVEMemOperand& addr);
5440 
5441   // Contiguous store two-byte structures from two vectors.
5442   void st2b(const ZRegister& zt1,
5443             const ZRegister& zt2,
5444             const PRegister& pg,
5445             const SVEMemOperand& addr);
5446 
5447   // Contiguous store two-halfword structures from two vectors.
5448   void st2h(const ZRegister& zt1,
5449             const ZRegister& zt2,
5450             const PRegister& pg,
5451             const SVEMemOperand& addr);
5452 
5453   // Contiguous store two-word structures from two vectors.
5454   void st2w(const ZRegister& zt1,
5455             const ZRegister& zt2,
5456             const PRegister& pg,
5457             const SVEMemOperand& addr);
5458 
5459   // Contiguous store two-doubleword structures from two vectors,
5460   void st2d(const ZRegister& zt1,
5461             const ZRegister& zt2,
5462             const PRegister& pg,
5463             const SVEMemOperand& addr);
5464 
5465   // Contiguous store three-byte structures from three vectors.
5466   void st3b(const ZRegister& zt1,
5467             const ZRegister& zt2,
5468             const ZRegister& zt3,
5469             const PRegister& pg,
5470             const SVEMemOperand& addr);
5471 
5472   // Contiguous store three-halfword structures from three vectors.
5473   void st3h(const ZRegister& zt1,
5474             const ZRegister& zt2,
5475             const ZRegister& zt3,
5476             const PRegister& pg,
5477             const SVEMemOperand& addr);
5478 
5479   // Contiguous store three-word structures from three vectors.
5480   void st3w(const ZRegister& zt1,
5481             const ZRegister& zt2,
5482             const ZRegister& zt3,
5483             const PRegister& pg,
5484             const SVEMemOperand& addr);
5485 
5486   // Contiguous store three-doubleword structures from three vectors.
5487   void st3d(const ZRegister& zt1,
5488             const ZRegister& zt2,
5489             const ZRegister& zt3,
5490             const PRegister& pg,
5491             const SVEMemOperand& addr);
5492 
5493   // Contiguous store four-byte structures from four vectors.
5494   void st4b(const ZRegister& zt1,
5495             const ZRegister& zt2,
5496             const ZRegister& zt3,
5497             const ZRegister& zt4,
5498             const PRegister& pg,
5499             const SVEMemOperand& addr);
5500 
5501   // Contiguous store four-halfword structures from four vectors.
5502   void st4h(const ZRegister& zt1,
5503             const ZRegister& zt2,
5504             const ZRegister& zt3,
5505             const ZRegister& zt4,
5506             const PRegister& pg,
5507             const SVEMemOperand& addr);
5508 
5509   // Contiguous store four-word structures from four vectors.
5510   void st4w(const ZRegister& zt1,
5511             const ZRegister& zt2,
5512             const ZRegister& zt3,
5513             const ZRegister& zt4,
5514             const PRegister& pg,
5515             const SVEMemOperand& addr);
5516 
5517   // Contiguous store four-doubleword structures from four vectors.
5518   void st4d(const ZRegister& zt1,
5519             const ZRegister& zt2,
5520             const ZRegister& zt3,
5521             const ZRegister& zt4,
5522             const PRegister& pg,
5523             const SVEMemOperand& addr);
5524 
5525   // Contiguous store non-temporal bytes from vector.
5526   void stnt1b(const ZRegister& zt,
5527               const PRegister& pg,
5528               const SVEMemOperand& addr);
5529 
5530   // Contiguous store non-temporal halfwords from vector.
5531   void stnt1h(const ZRegister& zt,
5532               const PRegister& pg,
5533               const SVEMemOperand& addr);
5534 
5535   // Contiguous store non-temporal words from vector.
5536   void stnt1w(const ZRegister& zt,
5537               const PRegister& pg,
5538               const SVEMemOperand& addr);
5539 
5540   // Contiguous store non-temporal doublewords from vector.
5541   void stnt1d(const ZRegister& zt,
5542               const PRegister& pg,
5543               const SVEMemOperand& addr);
5544 
5545   // Store SVE predicate/vector register.
5546   void str(const CPURegister& rt, const SVEMemOperand& addr);
5547 
5548   // Subtract vectors (predicated).
5549   void sub(const ZRegister& zd,
5550            const PRegisterM& pg,
5551            const ZRegister& zn,
5552            const ZRegister& zm);
5553 
5554   // Subtract vectors (unpredicated).
5555   void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5556 
5557   // Subtract immediate (unpredicated).
5558   void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5559 
5560   // Reversed subtract vectors (predicated).
5561   void subr(const ZRegister& zd,
5562             const PRegisterM& pg,
5563             const ZRegister& zn,
5564             const ZRegister& zm);
5565 
5566   // Reversed subtract from immediate (unpredicated).
5567   void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5568 
5569   // Signed unpack and extend half of vector.
5570   void sunpkhi(const ZRegister& zd, const ZRegister& zn);
5571 
5572   // Signed unpack and extend half of vector.
5573   void sunpklo(const ZRegister& zd, const ZRegister& zn);
5574 
5575   // Signed byte extend (predicated).
5576   void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5577 
5578   // Signed halfword extend (predicated).
5579   void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5580 
5581   // Signed word extend (predicated).
5582   void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5583 
5584   // Programmable table lookup/permute using vector of indices into a
5585   // vector.
5586   void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5587 
5588   // Interleave even or odd elements from two predicates.
5589   void trn1(const PRegisterWithLaneSize& pd,
5590             const PRegisterWithLaneSize& pn,
5591             const PRegisterWithLaneSize& pm);
5592 
5593   // Interleave even or odd elements from two vectors.
5594   void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5595 
5596   // Interleave even or odd elements from two predicates.
5597   void trn2(const PRegisterWithLaneSize& pd,
5598             const PRegisterWithLaneSize& pn,
5599             const PRegisterWithLaneSize& pm);
5600 
5601   // Interleave even or odd elements from two vectors.
5602   void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5603 
5604   // Unsigned absolute difference (predicated).
5605   void uabd(const ZRegister& zd,
5606             const PRegisterM& pg,
5607             const ZRegister& zn,
5608             const ZRegister& zm);
5609 
5610   // Unsigned add reduction to scalar.
5611   void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5612 
5613   // Unsigned integer convert to floating-point (predicated).
5614   void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5615 
5616   // Unsigned divide (predicated).
5617   void udiv(const ZRegister& zd,
5618             const PRegisterM& pg,
5619             const ZRegister& zn,
5620             const ZRegister& zm);
5621 
5622   // Unsigned reversed divide (predicated).
5623   void udivr(const ZRegister& zd,
5624              const PRegisterM& pg,
5625              const ZRegister& zn,
5626              const ZRegister& zm);
5627 
5628   // Unsigned dot product by indexed quadtuplet.
5629   void udot(const ZRegister& zda,
5630             const ZRegister& zn,
5631             const ZRegister& zm,
5632             int index);
5633 
5634   // Unsigned dot product.
5635   void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5636 
5637   // Unsigned maximum vectors (predicated).
5638   void umax(const ZRegister& zd,
5639             const PRegisterM& pg,
5640             const ZRegister& zn,
5641             const ZRegister& zm);
5642 
5643   // Unsigned maximum with immediate (unpredicated).
5644   void umax(const ZRegister& zd, const ZRegister& zn, int imm8);
5645 
5646   // Unsigned maximum reduction to scalar.
5647   void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5648 
5649   // Unsigned minimum vectors (predicated).
5650   void umin(const ZRegister& zd,
5651             const PRegisterM& pg,
5652             const ZRegister& zn,
5653             const ZRegister& zm);
5654 
5655   // Unsigned minimum with immediate (unpredicated).
5656   void umin(const ZRegister& zd, const ZRegister& zn, int imm8);
5657 
5658   // Unsigned minimum reduction to scalar.
5659   void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5660 
5661   // Unsigned multiply returning high half (predicated).
5662   void umulh(const ZRegister& zd,
5663              const PRegisterM& pg,
5664              const ZRegister& zn,
5665              const ZRegister& zm);
5666 
5667   // Unsigned saturating add vectors (unpredicated).
5668   void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5669 
5670   // Unsigned saturating add immediate (unpredicated).
5671   void uqadd(const ZRegister& zd,
5672              const ZRegister& zn,
5673              int imm8,
5674              int shift = -1);
5675 
5676   // Unsigned saturating decrement scalar by multiple of 8-bit predicate
5677   // constraint element count.
5678   void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5679 
5680   // Unsigned saturating decrement scalar by multiple of 64-bit predicate
5681   // constraint element count.
5682   void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5683 
5684   // Unsigned saturating decrement vector by multiple of 64-bit predicate
5685   // constraint element count.
5686   void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5687 
5688   // Unsigned saturating decrement scalar by multiple of 16-bit predicate
5689   // constraint element count.
5690   void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5691 
5692   // Unsigned saturating decrement vector by multiple of 16-bit predicate
5693   // constraint element count.
5694   void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5695 
5696   // Unsigned saturating decrement scalar by active predicate element count.
5697   void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg);
5698 
5699   // Unsigned saturating decrement vector by active predicate element count.
5700   void uqdecp(const ZRegister& zdn, const PRegister& pg);
5701 
5702   // Unsigned saturating decrement scalar by multiple of 32-bit predicate
5703   // constraint element count.
5704   void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5705 
5706   // Unsigned saturating decrement vector by multiple of 32-bit predicate
5707   // constraint element count.
5708   void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5709 
5710   // Unsigned saturating increment scalar by multiple of 8-bit predicate
5711   // constraint element count.
5712   void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5713 
5714   // Unsigned saturating increment scalar by multiple of 64-bit predicate
5715   // constraint element count.
5716   void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5717 
5718   // Unsigned saturating increment vector by multiple of 64-bit predicate
5719   // constraint element count.
5720   void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5721 
5722   // Unsigned saturating increment scalar by multiple of 16-bit predicate
5723   // constraint element count.
5724   void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5725 
5726   // Unsigned saturating increment vector by multiple of 16-bit predicate
5727   // constraint element count.
5728   void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5729 
5730   // Unsigned saturating increment scalar by active predicate element count.
5731   void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg);
5732 
5733   // Unsigned saturating increment vector by active predicate element count.
5734   void uqincp(const ZRegister& zdn, const PRegister& pg);
5735 
5736   // Unsigned saturating increment scalar by multiple of 32-bit predicate
5737   // constraint element count.
5738   void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5739 
5740   // Unsigned saturating increment vector by multiple of 32-bit predicate
5741   // constraint element count.
5742   void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5743 
5744   // Unsigned saturating subtract vectors (unpredicated).
5745   void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5746 
5747   // Unsigned saturating subtract immediate (unpredicated).
5748   void uqsub(const ZRegister& zd,
5749              const ZRegister& zn,
5750              int imm8,
5751              int shift = -1);
5752 
5753   // Unsigned unpack and extend half of vector.
5754   void uunpkhi(const ZRegister& zd, const ZRegister& zn);
5755 
5756   // Unsigned unpack and extend half of vector.
5757   void uunpklo(const ZRegister& zd, const ZRegister& zn);
5758 
5759   // Unsigned byte extend (predicated).
5760   void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5761 
5762   // Unsigned halfword extend (predicated).
5763   void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5764 
5765   // Unsigned word extend (predicated).
5766   void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5767 
5768   // Concatenate even or odd elements from two predicates.
5769   void uzp1(const PRegisterWithLaneSize& pd,
5770             const PRegisterWithLaneSize& pn,
5771             const PRegisterWithLaneSize& pm);
5772 
5773   // Concatenate even or odd elements from two vectors.
5774   void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5775 
5776   // Concatenate even or odd elements from two predicates.
5777   void uzp2(const PRegisterWithLaneSize& pd,
5778             const PRegisterWithLaneSize& pn,
5779             const PRegisterWithLaneSize& pm);
5780 
5781   // Concatenate even or odd elements from two vectors.
5782   void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5783 
5784   // While incrementing signed scalar less than or equal to scalar.
5785   void whilele(const PRegisterWithLaneSize& pd,
5786                const Register& rn,
5787                const Register& rm);
5788 
5789   // While incrementing unsigned scalar lower than scalar.
5790   void whilelo(const PRegisterWithLaneSize& pd,
5791                const Register& rn,
5792                const Register& rm);
5793 
5794   // While incrementing unsigned scalar lower or same as scalar.
5795   void whilels(const PRegisterWithLaneSize& pd,
5796                const Register& rn,
5797                const Register& rm);
5798 
5799   // While incrementing signed scalar less than scalar.
5800   void whilelt(const PRegisterWithLaneSize& pd,
5801                const Register& rn,
5802                const Register& rm);
5803 
5804   // Write the first-fault register.
5805   void wrffr(const PRegisterWithLaneSize& pn);
5806 
5807   // Interleave elements from two half predicates.
5808   void zip1(const PRegisterWithLaneSize& pd,
5809             const PRegisterWithLaneSize& pn,
5810             const PRegisterWithLaneSize& pm);
5811 
5812   // Interleave elements from two half vectors.
5813   void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5814 
5815   // Interleave elements from two half predicates.
5816   void zip2(const PRegisterWithLaneSize& pd,
5817             const PRegisterWithLaneSize& pn,
5818             const PRegisterWithLaneSize& pm);
5819 
5820   // Interleave elements from two half vectors.
5821   void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5822 
5823   // Emit generic instructions.
5824 
5825   // Emit raw instructions into the instruction stream.
dci(Instr raw_inst)5826   void dci(Instr raw_inst) { Emit(raw_inst); }
5827 
5828   // Emit 32 bits of data into the instruction stream.
dc32(uint32_t data)5829   void dc32(uint32_t data) { dc(data); }
5830 
5831   // Emit 64 bits of data into the instruction stream.
dc64(uint64_t data)5832   void dc64(uint64_t data) { dc(data); }
5833 
5834   // Emit data in the instruction stream.
5835   template <typename T>
dc(T data)5836   void dc(T data) {
5837     VIXL_ASSERT(AllowAssembler());
5838     GetBuffer()->Emit<T>(data);
5839   }
5840 
5841   // Copy a string into the instruction stream, including the terminating NULL
5842   // character. The instruction pointer is then aligned correctly for
5843   // subsequent instructions.
EmitString(const char * string)5844   void EmitString(const char* string) {
5845     VIXL_ASSERT(string != NULL);
5846     VIXL_ASSERT(AllowAssembler());
5847 
5848     GetBuffer()->EmitString(string);
5849     GetBuffer()->Align();
5850   }
5851 
5852   // Code generation helpers.
5853 
5854   // Register encoding.
5855   template <int hibit, int lobit>
Rx(CPURegister rx)5856   static Instr Rx(CPURegister rx) {
5857     VIXL_ASSERT(rx.GetCode() != kSPRegInternalCode);
5858     return ImmUnsignedField<hibit, lobit>(rx.GetCode());
5859   }
5860 
5861 #define CPU_REGISTER_FIELD_NAMES(V) V(d) V(n) V(m) V(a) V(t) V(t2) V(s)
5862 #define REGISTER_ENCODER(N)                                           \
5863   static Instr R##N(CPURegister r##N) {                               \
5864     return Rx<R##N##_offset + R##N##_width - 1, R##N##_offset>(r##N); \
5865   }
CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)5866   CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)
5867 #undef REGISTER_ENCODER
5868 #undef CPU_REGISTER_FIELD_NAMES
5869 
5870   static Instr RmNot31(CPURegister rm) {
5871     VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
5872     VIXL_ASSERT(!rm.IsZero());
5873     return Rm(rm);
5874   }
5875 
5876   // These encoding functions allow the stack pointer to be encoded, and
5877   // disallow the zero register.
RdSP(Register rd)5878   static Instr RdSP(Register rd) {
5879     VIXL_ASSERT(!rd.IsZero());
5880     return (rd.GetCode() & kRegCodeMask) << Rd_offset;
5881   }
5882 
RnSP(Register rn)5883   static Instr RnSP(Register rn) {
5884     VIXL_ASSERT(!rn.IsZero());
5885     return (rn.GetCode() & kRegCodeMask) << Rn_offset;
5886   }
5887 
RmSP(Register rm)5888   static Instr RmSP(Register rm) {
5889     VIXL_ASSERT(!rm.IsZero());
5890     return (rm.GetCode() & kRegCodeMask) << Rm_offset;
5891   }
5892 
Pd(PRegister pd)5893   static Instr Pd(PRegister pd) {
5894     return Rx<Pd_offset + Pd_width - 1, Pd_offset>(pd);
5895   }
5896 
Pm(PRegister pm)5897   static Instr Pm(PRegister pm) {
5898     return Rx<Pm_offset + Pm_width - 1, Pm_offset>(pm);
5899   }
5900 
Pn(PRegister pn)5901   static Instr Pn(PRegister pn) {
5902     return Rx<Pn_offset + Pn_width - 1, Pn_offset>(pn);
5903   }
5904 
PgLow8(PRegister pg)5905   static Instr PgLow8(PRegister pg) {
5906     // Governing predicates can be merging, zeroing, or unqualified. They should
5907     // never have a lane size.
5908     VIXL_ASSERT(!pg.HasLaneSize());
5909     return Rx<PgLow8_offset + PgLow8_width - 1, PgLow8_offset>(pg);
5910   }
5911 
5912   template <int hibit, int lobit>
Pg(PRegister pg)5913   static Instr Pg(PRegister pg) {
5914     // Governing predicates can be merging, zeroing, or unqualified. They should
5915     // never have a lane size.
5916     VIXL_ASSERT(!pg.HasLaneSize());
5917     return Rx<hibit, lobit>(pg);
5918   }
5919 
5920   // Flags encoding.
Flags(FlagsUpdate S)5921   static Instr Flags(FlagsUpdate S) {
5922     if (S == SetFlags) {
5923       return 1 << FlagsUpdate_offset;
5924     } else if (S == LeaveFlags) {
5925       return 0 << FlagsUpdate_offset;
5926     }
5927     VIXL_UNREACHABLE();
5928     return 0;
5929   }
5930 
Cond(Condition cond)5931   static Instr Cond(Condition cond) { return cond << Condition_offset; }
5932 
5933   // Generic immediate encoding.
5934   template <int hibit, int lobit>
ImmField(int64_t imm)5935   static Instr ImmField(int64_t imm) {
5936     VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
5937     VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
5938     int fieldsize = hibit - lobit + 1;
5939     VIXL_ASSERT(IsIntN(fieldsize, imm));
5940     return static_cast<Instr>(TruncateToUintN(fieldsize, imm) << lobit);
5941   }
5942 
5943   // For unsigned immediate encoding.
5944   // TODO: Handle signed and unsigned immediate in satisfactory way.
5945   template <int hibit, int lobit>
ImmUnsignedField(uint64_t imm)5946   static Instr ImmUnsignedField(uint64_t imm) {
5947     VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
5948     VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
5949     VIXL_ASSERT(IsUintN(hibit - lobit + 1, imm));
5950     return static_cast<Instr>(imm << lobit);
5951   }
5952 
5953   // PC-relative address encoding.
ImmPCRelAddress(int64_t imm21)5954   static Instr ImmPCRelAddress(int64_t imm21) {
5955     VIXL_ASSERT(IsInt21(imm21));
5956     Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
5957     Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
5958     Instr immlo = imm << ImmPCRelLo_offset;
5959     return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
5960   }
5961 
5962   // Branch encoding.
ImmUncondBranch(int64_t imm26)5963   static Instr ImmUncondBranch(int64_t imm26) {
5964     VIXL_ASSERT(IsInt26(imm26));
5965     return TruncateToUint26(imm26) << ImmUncondBranch_offset;
5966   }
5967 
ImmCondBranch(int64_t imm19)5968   static Instr ImmCondBranch(int64_t imm19) {
5969     VIXL_ASSERT(IsInt19(imm19));
5970     return TruncateToUint19(imm19) << ImmCondBranch_offset;
5971   }
5972 
ImmCmpBranch(int64_t imm19)5973   static Instr ImmCmpBranch(int64_t imm19) {
5974     VIXL_ASSERT(IsInt19(imm19));
5975     return TruncateToUint19(imm19) << ImmCmpBranch_offset;
5976   }
5977 
ImmTestBranch(int64_t imm14)5978   static Instr ImmTestBranch(int64_t imm14) {
5979     VIXL_ASSERT(IsInt14(imm14));
5980     return TruncateToUint14(imm14) << ImmTestBranch_offset;
5981   }
5982 
ImmTestBranchBit(unsigned bit_pos)5983   static Instr ImmTestBranchBit(unsigned bit_pos) {
5984     VIXL_ASSERT(IsUint6(bit_pos));
5985     // Subtract five from the shift offset, as we need bit 5 from bit_pos.
5986     unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5);
5987     unsigned b40 = bit_pos << ImmTestBranchBit40_offset;
5988     b5 &= ImmTestBranchBit5_mask;
5989     b40 &= ImmTestBranchBit40_mask;
5990     return b5 | b40;
5991   }
5992 
5993   // Data Processing encoding.
SF(Register rd)5994   static Instr SF(Register rd) {
5995     return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
5996   }
5997 
ImmAddSub(int imm)5998   static Instr ImmAddSub(int imm) {
5999     VIXL_ASSERT(IsImmAddSub(imm));
6000     if (IsUint12(imm)) {  // No shift required.
6001       imm <<= ImmAddSub_offset;
6002     } else {
6003       imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ImmAddSubShift_offset);
6004     }
6005     return imm;
6006   }
6007 
SVEImmSetBits(unsigned imms,unsigned lane_size)6008   static Instr SVEImmSetBits(unsigned imms, unsigned lane_size) {
6009     VIXL_ASSERT(IsUint6(imms));
6010     VIXL_ASSERT((lane_size == kDRegSize) || IsUint6(imms + 3));
6011     USE(lane_size);
6012     return imms << SVEImmSetBits_offset;
6013   }
6014 
SVEImmRotate(unsigned immr,unsigned lane_size)6015   static Instr SVEImmRotate(unsigned immr, unsigned lane_size) {
6016     VIXL_ASSERT(IsUintN(WhichPowerOf2(lane_size), immr));
6017     USE(lane_size);
6018     return immr << SVEImmRotate_offset;
6019   }
6020 
SVEBitN(unsigned bitn)6021   static Instr SVEBitN(unsigned bitn) {
6022     VIXL_ASSERT(IsUint1(bitn));
6023     return bitn << SVEBitN_offset;
6024   }
6025 
6026   static Instr SVEDtype(unsigned msize_in_bytes_log2,
6027                         unsigned esize_in_bytes_log2,
6028                         bool is_signed,
6029                         int dtype_h_lsb = 23,
6030                         int dtype_l_lsb = 21) {
6031     VIXL_ASSERT(msize_in_bytes_log2 <= kDRegSizeInBytesLog2);
6032     VIXL_ASSERT(esize_in_bytes_log2 <= kDRegSizeInBytesLog2);
6033     Instr dtype_h = msize_in_bytes_log2;
6034     Instr dtype_l = esize_in_bytes_log2;
6035     // Signed forms use the encodings where msize would be greater than esize.
6036     if (is_signed) {
6037       dtype_h = dtype_h ^ 0x3;
6038       dtype_l = dtype_l ^ 0x3;
6039     }
6040     VIXL_ASSERT(IsUint2(dtype_h));
6041     VIXL_ASSERT(IsUint2(dtype_l));
6042     VIXL_ASSERT((dtype_h > dtype_l) == is_signed);
6043 
6044     return (dtype_h << dtype_h_lsb) | (dtype_l << dtype_l_lsb);
6045   }
6046 
SVEDtypeSplit(unsigned msize_in_bytes_log2,unsigned esize_in_bytes_log2,bool is_signed)6047   static Instr SVEDtypeSplit(unsigned msize_in_bytes_log2,
6048                              unsigned esize_in_bytes_log2,
6049                              bool is_signed) {
6050     return SVEDtype(msize_in_bytes_log2,
6051                     esize_in_bytes_log2,
6052                     is_signed,
6053                     23,
6054                     13);
6055   }
6056 
ImmS(unsigned imms,unsigned reg_size)6057   static Instr ImmS(unsigned imms, unsigned reg_size) {
6058     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
6059                 ((reg_size == kWRegSize) && IsUint5(imms)));
6060     USE(reg_size);
6061     return imms << ImmS_offset;
6062   }
6063 
ImmR(unsigned immr,unsigned reg_size)6064   static Instr ImmR(unsigned immr, unsigned reg_size) {
6065     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
6066                 ((reg_size == kWRegSize) && IsUint5(immr)));
6067     USE(reg_size);
6068     VIXL_ASSERT(IsUint6(immr));
6069     return immr << ImmR_offset;
6070   }
6071 
ImmSetBits(unsigned imms,unsigned reg_size)6072   static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
6073     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
6074     VIXL_ASSERT(IsUint6(imms));
6075     VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
6076     USE(reg_size);
6077     return imms << ImmSetBits_offset;
6078   }
6079 
ImmRotate(unsigned immr,unsigned reg_size)6080   static Instr ImmRotate(unsigned immr, unsigned reg_size) {
6081     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
6082     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
6083                 ((reg_size == kWRegSize) && IsUint5(immr)));
6084     USE(reg_size);
6085     return immr << ImmRotate_offset;
6086   }
6087 
ImmLLiteral(int64_t imm19)6088   static Instr ImmLLiteral(int64_t imm19) {
6089     VIXL_ASSERT(IsInt19(imm19));
6090     return TruncateToUint19(imm19) << ImmLLiteral_offset;
6091   }
6092 
BitN(unsigned bitn,unsigned reg_size)6093   static Instr BitN(unsigned bitn, unsigned reg_size) {
6094     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
6095     VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
6096     USE(reg_size);
6097     return bitn << BitN_offset;
6098   }
6099 
ShiftDP(Shift shift)6100   static Instr ShiftDP(Shift shift) {
6101     VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
6102     return shift << ShiftDP_offset;
6103   }
6104 
ImmDPShift(unsigned amount)6105   static Instr ImmDPShift(unsigned amount) {
6106     VIXL_ASSERT(IsUint6(amount));
6107     return amount << ImmDPShift_offset;
6108   }
6109 
ExtendMode(Extend extend)6110   static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; }
6111 
ImmExtendShift(unsigned left_shift)6112   static Instr ImmExtendShift(unsigned left_shift) {
6113     VIXL_ASSERT(left_shift <= 4);
6114     return left_shift << ImmExtendShift_offset;
6115   }
6116 
ImmCondCmp(unsigned imm)6117   static Instr ImmCondCmp(unsigned imm) {
6118     VIXL_ASSERT(IsUint5(imm));
6119     return imm << ImmCondCmp_offset;
6120   }
6121 
Nzcv(StatusFlags nzcv)6122   static Instr Nzcv(StatusFlags nzcv) {
6123     return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
6124   }
6125 
6126   // MemOperand offset encoding.
ImmLSUnsigned(int64_t imm12)6127   static Instr ImmLSUnsigned(int64_t imm12) {
6128     VIXL_ASSERT(IsUint12(imm12));
6129     return TruncateToUint12(imm12) << ImmLSUnsigned_offset;
6130   }
6131 
ImmLS(int64_t imm9)6132   static Instr ImmLS(int64_t imm9) {
6133     VIXL_ASSERT(IsInt9(imm9));
6134     return TruncateToUint9(imm9) << ImmLS_offset;
6135   }
6136 
ImmLSPair(int64_t imm7,unsigned access_size_in_bytes_log2)6137   static Instr ImmLSPair(int64_t imm7, unsigned access_size_in_bytes_log2) {
6138     VIXL_ASSERT(IsMultiple(imm7, 1 << access_size_in_bytes_log2));
6139     int64_t scaled_imm7 = imm7 / (1 << access_size_in_bytes_log2);
6140     VIXL_ASSERT(IsInt7(scaled_imm7));
6141     return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
6142   }
6143 
ImmShiftLS(unsigned shift_amount)6144   static Instr ImmShiftLS(unsigned shift_amount) {
6145     VIXL_ASSERT(IsUint1(shift_amount));
6146     return shift_amount << ImmShiftLS_offset;
6147   }
6148 
ImmLSPAC(int64_t imm10)6149   static Instr ImmLSPAC(int64_t imm10) {
6150     VIXL_ASSERT(IsMultiple(imm10, 1 << 3));
6151     int64_t scaled_imm10 = imm10 / (1 << 3);
6152     VIXL_ASSERT(IsInt10(scaled_imm10));
6153     uint32_t s_bit = (scaled_imm10 >> 9) & 1;
6154     return (s_bit << ImmLSPACHi_offset) |
6155            (TruncateToUint9(scaled_imm10) << ImmLSPACLo_offset);
6156   }
6157 
ImmPrefetchOperation(int imm5)6158   static Instr ImmPrefetchOperation(int imm5) {
6159     VIXL_ASSERT(IsUint5(imm5));
6160     return imm5 << ImmPrefetchOperation_offset;
6161   }
6162 
ImmException(int imm16)6163   static Instr ImmException(int imm16) {
6164     VIXL_ASSERT(IsUint16(imm16));
6165     return imm16 << ImmException_offset;
6166   }
6167 
ImmUdf(int imm16)6168   static Instr ImmUdf(int imm16) {
6169     VIXL_ASSERT(IsUint16(imm16));
6170     return imm16 << ImmUdf_offset;
6171   }
6172 
ImmSystemRegister(int imm16)6173   static Instr ImmSystemRegister(int imm16) {
6174     VIXL_ASSERT(IsUint16(imm16));
6175     return imm16 << ImmSystemRegister_offset;
6176   }
6177 
ImmRMIFRotation(int imm6)6178   static Instr ImmRMIFRotation(int imm6) {
6179     VIXL_ASSERT(IsUint6(imm6));
6180     return imm6 << ImmRMIFRotation_offset;
6181   }
6182 
ImmHint(int imm7)6183   static Instr ImmHint(int imm7) {
6184     VIXL_ASSERT(IsUint7(imm7));
6185     return imm7 << ImmHint_offset;
6186   }
6187 
CRm(int imm4)6188   static Instr CRm(int imm4) {
6189     VIXL_ASSERT(IsUint4(imm4));
6190     return imm4 << CRm_offset;
6191   }
6192 
CRn(int imm4)6193   static Instr CRn(int imm4) {
6194     VIXL_ASSERT(IsUint4(imm4));
6195     return imm4 << CRn_offset;
6196   }
6197 
SysOp(int imm14)6198   static Instr SysOp(int imm14) {
6199     VIXL_ASSERT(IsUint14(imm14));
6200     return imm14 << SysOp_offset;
6201   }
6202 
ImmSysOp1(int imm3)6203   static Instr ImmSysOp1(int imm3) {
6204     VIXL_ASSERT(IsUint3(imm3));
6205     return imm3 << SysOp1_offset;
6206   }
6207 
ImmSysOp2(int imm3)6208   static Instr ImmSysOp2(int imm3) {
6209     VIXL_ASSERT(IsUint3(imm3));
6210     return imm3 << SysOp2_offset;
6211   }
6212 
ImmBarrierDomain(int imm2)6213   static Instr ImmBarrierDomain(int imm2) {
6214     VIXL_ASSERT(IsUint2(imm2));
6215     return imm2 << ImmBarrierDomain_offset;
6216   }
6217 
ImmBarrierType(int imm2)6218   static Instr ImmBarrierType(int imm2) {
6219     VIXL_ASSERT(IsUint2(imm2));
6220     return imm2 << ImmBarrierType_offset;
6221   }
6222 
6223   // Move immediates encoding.
ImmMoveWide(uint64_t imm)6224   static Instr ImmMoveWide(uint64_t imm) {
6225     VIXL_ASSERT(IsUint16(imm));
6226     return static_cast<Instr>(imm << ImmMoveWide_offset);
6227   }
6228 
ShiftMoveWide(int64_t shift)6229   static Instr ShiftMoveWide(int64_t shift) {
6230     VIXL_ASSERT(IsUint2(shift));
6231     return static_cast<Instr>(shift << ShiftMoveWide_offset);
6232   }
6233 
6234   // FP Immediates.
6235   static Instr ImmFP16(Float16 imm);
6236   static Instr ImmFP32(float imm);
6237   static Instr ImmFP64(double imm);
6238 
6239   // FP register type.
FPType(VRegister fd)6240   static Instr FPType(VRegister fd) {
6241     VIXL_ASSERT(fd.IsScalar());
6242     switch (fd.GetSizeInBits()) {
6243       case 16:
6244         return FP16;
6245       case 32:
6246         return FP32;
6247       case 64:
6248         return FP64;
6249       default:
6250         VIXL_UNREACHABLE();
6251         return 0;
6252     }
6253   }
6254 
FPScale(unsigned scale)6255   static Instr FPScale(unsigned scale) {
6256     VIXL_ASSERT(IsUint6(scale));
6257     return scale << FPScale_offset;
6258   }
6259 
6260   // Immediate field checking helpers.
6261   static bool IsImmAddSub(int64_t immediate);
6262   static bool IsImmConditionalCompare(int64_t immediate);
6263   static bool IsImmFP16(Float16 imm);
6264   static bool IsImmFP32(float imm);
6265   static bool IsImmFP64(double imm);
6266   static bool IsImmLogical(uint64_t value,
6267                            unsigned width,
6268                            unsigned* n = NULL,
6269                            unsigned* imm_s = NULL,
6270                            unsigned* imm_r = NULL);
6271   static bool IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2);
6272   static bool IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2);
6273   static bool IsImmLSUnscaled(int64_t offset);
6274   static bool IsImmMovn(uint64_t imm, unsigned reg_size);
6275   static bool IsImmMovz(uint64_t imm, unsigned reg_size);
6276 
6277   // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)6278   static Instr VFormat(VRegister vd) {
6279     if (vd.Is64Bits()) {
6280       switch (vd.GetLanes()) {
6281         case 2:
6282           return NEON_2S;
6283         case 4:
6284           return NEON_4H;
6285         case 8:
6286           return NEON_8B;
6287         default:
6288           return 0xffffffff;
6289       }
6290     } else {
6291       VIXL_ASSERT(vd.Is128Bits());
6292       switch (vd.GetLanes()) {
6293         case 2:
6294           return NEON_2D;
6295         case 4:
6296           return NEON_4S;
6297         case 8:
6298           return NEON_8H;
6299         case 16:
6300           return NEON_16B;
6301         default:
6302           return 0xffffffff;
6303       }
6304     }
6305   }
6306 
6307   // Instruction bits for vector format in floating point data processing
6308   // operations.
FPFormat(VRegister vd)6309   static Instr FPFormat(VRegister vd) {
6310     switch (vd.GetLanes()) {
6311       case 1:
6312         // Floating point scalar formats.
6313         switch (vd.GetSizeInBits()) {
6314           case 16:
6315             return FP16;
6316           case 32:
6317             return FP32;
6318           case 64:
6319             return FP64;
6320           default:
6321             VIXL_UNREACHABLE();
6322         }
6323         break;
6324       case 2:
6325         // Two lane floating point vector formats.
6326         switch (vd.GetSizeInBits()) {
6327           case 64:
6328             return NEON_FP_2S;
6329           case 128:
6330             return NEON_FP_2D;
6331           default:
6332             VIXL_UNREACHABLE();
6333         }
6334         break;
6335       case 4:
6336         // Four lane floating point vector formats.
6337         switch (vd.GetSizeInBits()) {
6338           case 64:
6339             return NEON_FP_4H;
6340           case 128:
6341             return NEON_FP_4S;
6342           default:
6343             VIXL_UNREACHABLE();
6344         }
6345         break;
6346       case 8:
6347         // Eight lane floating point vector format.
6348         VIXL_ASSERT(vd.Is128Bits());
6349         return NEON_FP_8H;
6350       default:
6351         VIXL_UNREACHABLE();
6352         return 0;
6353     }
6354     VIXL_UNREACHABLE();
6355     return 0;
6356   }
6357 
6358   // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)6359   static Instr LSVFormat(VRegister vd) {
6360     if (vd.Is64Bits()) {
6361       switch (vd.GetLanes()) {
6362         case 1:
6363           return LS_NEON_1D;
6364         case 2:
6365           return LS_NEON_2S;
6366         case 4:
6367           return LS_NEON_4H;
6368         case 8:
6369           return LS_NEON_8B;
6370         default:
6371           return 0xffffffff;
6372       }
6373     } else {
6374       VIXL_ASSERT(vd.Is128Bits());
6375       switch (vd.GetLanes()) {
6376         case 2:
6377           return LS_NEON_2D;
6378         case 4:
6379           return LS_NEON_4S;
6380         case 8:
6381           return LS_NEON_8H;
6382         case 16:
6383           return LS_NEON_16B;
6384         default:
6385           return 0xffffffff;
6386       }
6387     }
6388   }
6389 
6390   // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)6391   static Instr SFormat(VRegister vd) {
6392     VIXL_ASSERT(vd.GetLanes() == 1);
6393     switch (vd.GetSizeInBytes()) {
6394       case 1:
6395         return NEON_B;
6396       case 2:
6397         return NEON_H;
6398       case 4:
6399         return NEON_S;
6400       case 8:
6401         return NEON_D;
6402       default:
6403         return 0xffffffff;
6404     }
6405   }
6406 
6407   template <typename T>
SVESize(const T & rd)6408   static Instr SVESize(const T& rd) {
6409     VIXL_ASSERT(rd.IsZRegister() || rd.IsPRegister());
6410     VIXL_ASSERT(rd.HasLaneSize());
6411     switch (rd.GetLaneSizeInBytes()) {
6412       case 1:
6413         return SVE_B;
6414       case 2:
6415         return SVE_H;
6416       case 4:
6417         return SVE_S;
6418       case 8:
6419         return SVE_D;
6420       default:
6421         return 0xffffffff;
6422     }
6423   }
6424 
ImmSVEPredicateConstraint(int pattern)6425   static Instr ImmSVEPredicateConstraint(int pattern) {
6426     VIXL_ASSERT(IsUint5(pattern));
6427     return (pattern << ImmSVEPredicateConstraint_offset) &
6428            ImmSVEPredicateConstraint_mask;
6429   }
6430 
ImmNEONHLM(int index,int num_bits)6431   static Instr ImmNEONHLM(int index, int num_bits) {
6432     int h, l, m;
6433     if (num_bits == 3) {
6434       VIXL_ASSERT(IsUint3(index));
6435       h = (index >> 2) & 1;
6436       l = (index >> 1) & 1;
6437       m = (index >> 0) & 1;
6438     } else if (num_bits == 2) {
6439       VIXL_ASSERT(IsUint2(index));
6440       h = (index >> 1) & 1;
6441       l = (index >> 0) & 1;
6442       m = 0;
6443     } else {
6444       VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
6445       h = (index >> 0) & 1;
6446       l = 0;
6447       m = 0;
6448     }
6449     return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
6450   }
6451 
ImmRotFcadd(int rot)6452   static Instr ImmRotFcadd(int rot) {
6453     VIXL_ASSERT(rot == 90 || rot == 270);
6454     return (((rot == 270) ? 1 : 0) << ImmRotFcadd_offset);
6455   }
6456 
ImmRotFcmlaSca(int rot)6457   static Instr ImmRotFcmlaSca(int rot) {
6458     VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
6459     return (rot / 90) << ImmRotFcmlaSca_offset;
6460   }
6461 
ImmRotFcmlaVec(int rot)6462   static Instr ImmRotFcmlaVec(int rot) {
6463     VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
6464     return (rot / 90) << ImmRotFcmlaVec_offset;
6465   }
6466 
ImmNEONExt(int imm4)6467   static Instr ImmNEONExt(int imm4) {
6468     VIXL_ASSERT(IsUint4(imm4));
6469     return imm4 << ImmNEONExt_offset;
6470   }
6471 
ImmNEON5(Instr format,int index)6472   static Instr ImmNEON5(Instr format, int index) {
6473     VIXL_ASSERT(IsUint4(index));
6474     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
6475     int imm5 = (index << (s + 1)) | (1 << s);
6476     return imm5 << ImmNEON5_offset;
6477   }
6478 
ImmNEON4(Instr format,int index)6479   static Instr ImmNEON4(Instr format, int index) {
6480     VIXL_ASSERT(IsUint4(index));
6481     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
6482     int imm4 = index << s;
6483     return imm4 << ImmNEON4_offset;
6484   }
6485 
ImmNEONabcdefgh(int imm8)6486   static Instr ImmNEONabcdefgh(int imm8) {
6487     VIXL_ASSERT(IsUint8(imm8));
6488     Instr instr;
6489     instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
6490     instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
6491     return instr;
6492   }
6493 
NEONCmode(int cmode)6494   static Instr NEONCmode(int cmode) {
6495     VIXL_ASSERT(IsUint4(cmode));
6496     return cmode << NEONCmode_offset;
6497   }
6498 
NEONModImmOp(int op)6499   static Instr NEONModImmOp(int op) {
6500     VIXL_ASSERT(IsUint1(op));
6501     return op << NEONModImmOp_offset;
6502   }
6503 
6504   // Size of the code generated since label to the current position.
GetSizeOfCodeGeneratedSince(Label * label)6505   size_t GetSizeOfCodeGeneratedSince(Label* label) const {
6506     VIXL_ASSERT(label->IsBound());
6507     return GetBuffer().GetOffsetFrom(label->GetLocation());
6508   }
6509   VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince",
6510                   size_t SizeOfCodeGeneratedSince(Label* label) const) {
6511     return GetSizeOfCodeGeneratedSince(label);
6512   }
6513 
6514   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
6515                   size_t GetBufferCapacity() const) {
6516     return GetBuffer().GetCapacity();
6517   }
6518   VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) {
6519     return GetBuffer().GetCapacity();
6520   }
6521 
6522   VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
6523                   size_t GetRemainingBufferSpace() const) {
6524     return GetBuffer().GetRemainingBytes();
6525   }
6526   VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
6527                   size_t RemainingBufferSpace() const) {
6528     return GetBuffer().GetRemainingBytes();
6529   }
6530 
GetPic()6531   PositionIndependentCodeOption GetPic() const { return pic_; }
6532   VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) {
6533     return GetPic();
6534   }
6535 
GetCPUFeatures()6536   CPUFeatures* GetCPUFeatures() { return &cpu_features_; }
6537 
SetCPUFeatures(const CPUFeatures & cpu_features)6538   void SetCPUFeatures(const CPUFeatures& cpu_features) {
6539     cpu_features_ = cpu_features;
6540   }
6541 
AllowPageOffsetDependentCode()6542   bool AllowPageOffsetDependentCode() const {
6543     return (GetPic() == PageOffsetDependentCode) ||
6544            (GetPic() == PositionDependentCode);
6545   }
6546 
AppropriateZeroRegFor(const CPURegister & reg)6547   static Register AppropriateZeroRegFor(const CPURegister& reg) {
6548     return reg.Is64Bits() ? Register(xzr) : Register(wzr);
6549   }
6550 
6551  protected:
6552   void LoadStore(const CPURegister& rt,
6553                  const MemOperand& addr,
6554                  LoadStoreOp op,
6555                  LoadStoreScalingOption option = PreferScaledOffset);
6556 
6557   void LoadStorePAC(const Register& xt,
6558                     const MemOperand& addr,
6559                     LoadStorePACOp op);
6560 
6561   void LoadStorePair(const CPURegister& rt,
6562                      const CPURegister& rt2,
6563                      const MemOperand& addr,
6564                      LoadStorePairOp op);
6565   void LoadStoreStruct(const VRegister& vt,
6566                        const MemOperand& addr,
6567                        NEONLoadStoreMultiStructOp op);
6568   void LoadStoreStruct1(const VRegister& vt,
6569                         int reg_count,
6570                         const MemOperand& addr);
6571   void LoadStoreStructSingle(const VRegister& vt,
6572                              uint32_t lane,
6573                              const MemOperand& addr,
6574                              NEONLoadStoreSingleStructOp op);
6575   void LoadStoreStructSingleAllLanes(const VRegister& vt,
6576                                      const MemOperand& addr,
6577                                      NEONLoadStoreSingleStructOp op);
6578   void LoadStoreStructVerify(const VRegister& vt,
6579                              const MemOperand& addr,
6580                              Instr op);
6581 
6582   // Set `is_load` to false in default as it's only used in the
6583   // scalar-plus-vector form.
6584   Instr SVEMemOperandHelper(unsigned msize_in_bytes_log2,
6585                             int num_regs,
6586                             const SVEMemOperand& addr,
6587                             bool is_load = false);
6588 
6589   // E.g. st1b, st1h, ...
6590   // This supports both contiguous and scatter stores.
6591   void SVESt1Helper(unsigned msize_in_bytes_log2,
6592                     const ZRegister& zt,
6593                     const PRegister& pg,
6594                     const SVEMemOperand& addr);
6595 
6596   // E.g. ld1b, ld1h, ...
6597   // This supports both contiguous and gather loads.
6598   void SVELd1Helper(unsigned msize_in_bytes_log2,
6599                     const ZRegister& zt,
6600                     const PRegisterZ& pg,
6601                     const SVEMemOperand& addr,
6602                     bool is_signed);
6603 
6604   // E.g. ld1rb, ld1rh, ...
6605   void SVELd1BroadcastHelper(unsigned msize_in_bytes_log2,
6606                              const ZRegister& zt,
6607                              const PRegisterZ& pg,
6608                              const SVEMemOperand& addr,
6609                              bool is_signed);
6610 
6611   // E.g. ldff1b, ldff1h, ...
6612   // This supports both contiguous and gather loads.
6613   void SVELdff1Helper(unsigned msize_in_bytes_log2,
6614                       const ZRegister& zt,
6615                       const PRegisterZ& pg,
6616                       const SVEMemOperand& addr,
6617                       bool is_signed);
6618 
6619   // Common code for the helpers above.
6620   void SVELdSt1Helper(unsigned msize_in_bytes_log2,
6621                       const ZRegister& zt,
6622                       const PRegister& pg,
6623                       const SVEMemOperand& addr,
6624                       bool is_signed,
6625                       Instr op);
6626 
6627   // Common code for the helpers above.
6628   void SVEScatterGatherHelper(unsigned msize_in_bytes_log2,
6629                               const ZRegister& zt,
6630                               const PRegister& pg,
6631                               const SVEMemOperand& addr,
6632                               bool is_load,
6633                               bool is_signed,
6634                               bool is_first_fault);
6635 
6636   // E.g. st2b, st3h, ...
6637   void SVESt234Helper(int num_regs,
6638                       const ZRegister& zt1,
6639                       const PRegister& pg,
6640                       const SVEMemOperand& addr);
6641 
6642   // E.g. ld2b, ld3h, ...
6643   void SVELd234Helper(int num_regs,
6644                       const ZRegister& zt1,
6645                       const PRegisterZ& pg,
6646                       const SVEMemOperand& addr);
6647 
6648   // Common code for the helpers above.
6649   void SVELdSt234Helper(int num_regs,
6650                         const ZRegister& zt1,
6651                         const PRegister& pg,
6652                         const SVEMemOperand& addr,
6653                         Instr op);
6654 
6655   // E.g. ld1qb, ld1qh, ldnt1b, ...
6656   void SVELd1St1ScaImmHelper(const ZRegister& zt,
6657                              const PRegister& pg,
6658                              const SVEMemOperand& addr,
6659                              Instr regoffset_op,
6660                              Instr immoffset_op,
6661                              int imm_divisor = 1);
6662 
6663   void Prefetch(PrefetchOperation op,
6664                 const MemOperand& addr,
6665                 LoadStoreScalingOption option = PreferScaledOffset);
6666   void Prefetch(int op,
6667                 const MemOperand& addr,
6668                 LoadStoreScalingOption option = PreferScaledOffset);
6669 
6670   // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
6671   // reports a bogus uninitialised warning then.
6672   void Logical(const Register& rd,
6673                const Register& rn,
6674                const Operand operand,
6675                LogicalOp op);
6676 
6677   void SVELogicalImmediate(const ZRegister& zd, uint64_t imm, Instr op);
6678 
6679   void LogicalImmediate(const Register& rd,
6680                         const Register& rn,
6681                         unsigned n,
6682                         unsigned imm_s,
6683                         unsigned imm_r,
6684                         LogicalOp op);
6685 
6686   void ConditionalCompare(const Register& rn,
6687                           const Operand& operand,
6688                           StatusFlags nzcv,
6689                           Condition cond,
6690                           ConditionalCompareOp op);
6691 
6692   void AddSubWithCarry(const Register& rd,
6693                        const Register& rn,
6694                        const Operand& operand,
6695                        FlagsUpdate S,
6696                        AddSubWithCarryOp op);
6697 
6698   void CompareVectors(const PRegisterWithLaneSize& pd,
6699                       const PRegisterZ& pg,
6700                       const ZRegister& zn,
6701                       const ZRegister& zm,
6702                       SVEIntCompareVectorsOp op);
6703 
6704   void CompareVectors(const PRegisterWithLaneSize& pd,
6705                       const PRegisterZ& pg,
6706                       const ZRegister& zn,
6707                       int imm,
6708                       SVEIntCompareSignedImmOp op);
6709 
6710   void CompareVectors(const PRegisterWithLaneSize& pd,
6711                       const PRegisterZ& pg,
6712                       const ZRegister& zn,
6713                       unsigned imm,
6714                       SVEIntCompareUnsignedImmOp op);
6715 
6716   void SVEIntAddSubtractImmUnpredicatedHelper(
6717       SVEIntAddSubtractImm_UnpredicatedOp op,
6718       const ZRegister& zd,
6719       int imm8,
6720       int shift);
6721 
6722   void SVEElementCountToRegisterHelper(Instr op,
6723                                        const Register& rd,
6724                                        int pattern,
6725                                        int multiplier);
6726 
6727   Instr EncodeSVEShiftImmediate(Shift shift_op,
6728                                 int shift,
6729                                 int lane_size_in_bits);
6730 
6731   void SVEBitwiseShiftImmediate(const ZRegister& zd,
6732                                 const ZRegister& zn,
6733                                 Instr encoded_imm,
6734                                 SVEBitwiseShiftUnpredicatedOp op);
6735 
6736   void SVEBitwiseShiftImmediatePred(const ZRegister& zdn,
6737                                     const PRegisterM& pg,
6738                                     Instr encoded_imm,
6739                                     SVEBitwiseShiftByImm_PredicatedOp op);
6740 
6741   Instr SVEFPMulIndexHelper(unsigned lane_size_in_bytes_log2,
6742                             const ZRegister& zm,
6743                             int index,
6744                             Instr op_h,
6745                             Instr op_s,
6746                             Instr op_d);
6747 
6748 
6749   void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop,
6750                                                    const PRegister& pg,
6751                                                    const SVEMemOperand& addr,
6752                                                    int prefetch_size);
6753 
6754   void SVEContiguousPrefetchScalarPlusVectorHelper(PrefetchOperation prfop,
6755                                                    const PRegister& pg,
6756                                                    const SVEMemOperand& addr,
6757                                                    int prefetch_size);
6758 
6759   void SVEGatherPrefetchVectorPlusImmediateHelper(PrefetchOperation prfop,
6760                                                   const PRegister& pg,
6761                                                   const SVEMemOperand& addr,
6762                                                   int prefetch_size);
6763 
6764   void SVEGatherPrefetchScalarPlusImmediateHelper(PrefetchOperation prfop,
6765                                                   const PRegister& pg,
6766                                                   const SVEMemOperand& addr,
6767                                                   int prefetch_size);
6768 
6769   void SVEPrefetchHelper(PrefetchOperation prfop,
6770                          const PRegister& pg,
6771                          const SVEMemOperand& addr,
6772                          int prefetch_size);
6773 
SVEImmPrefetchOperation(PrefetchOperation prfop)6774   static Instr SVEImmPrefetchOperation(PrefetchOperation prfop) {
6775     // SVE only supports PLD and PST, not PLI.
6776     VIXL_ASSERT(((prfop >= PLDL1KEEP) && (prfop <= PLDL3STRM)) ||
6777                 ((prfop >= PSTL1KEEP) && (prfop <= PSTL3STRM)));
6778     // Check that we can simply map bits.
6779     VIXL_STATIC_ASSERT(PLDL1KEEP == 0b00000);
6780     VIXL_STATIC_ASSERT(PSTL1KEEP == 0b10000);
6781     // Remaining operations map directly.
6782     return ((prfop & 0b10000) >> 1) | (prfop & 0b00111);
6783   }
6784 
6785   // Functions for emulating operands not directly supported by the instruction
6786   // set.
6787   void EmitShift(const Register& rd,
6788                  const Register& rn,
6789                  Shift shift,
6790                  unsigned amount);
6791   void EmitExtendShift(const Register& rd,
6792                        const Register& rn,
6793                        Extend extend,
6794                        unsigned left_shift);
6795 
6796   void AddSub(const Register& rd,
6797               const Register& rn,
6798               const Operand& operand,
6799               FlagsUpdate S,
6800               AddSubOp op);
6801 
6802   void NEONTable(const VRegister& vd,
6803                  const VRegister& vn,
6804                  const VRegister& vm,
6805                  NEONTableOp op);
6806 
6807   // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
6808   // registers. Only simple loads are supported; sign- and zero-extension (such
6809   // as in LDPSW_x or LDRB_w) are not supported.
6810   static LoadStoreOp LoadOpFor(const CPURegister& rt);
6811   static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
6812                                        const CPURegister& rt2);
6813   static LoadStoreOp StoreOpFor(const CPURegister& rt);
6814   static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
6815                                         const CPURegister& rt2);
6816   static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
6817       const CPURegister& rt, const CPURegister& rt2);
6818   static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
6819       const CPURegister& rt, const CPURegister& rt2);
6820   static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
6821 
6822   // Convenience pass-through for CPU feature checks.
6823   bool CPUHas(CPUFeatures::Feature feature0,
6824               CPUFeatures::Feature feature1 = CPUFeatures::kNone,
6825               CPUFeatures::Feature feature2 = CPUFeatures::kNone,
6826               CPUFeatures::Feature feature3 = CPUFeatures::kNone) const {
6827     return cpu_features_.Has(feature0, feature1, feature2, feature3);
6828   }
6829 
6830   // Determine whether the target CPU has the specified registers, based on the
6831   // currently-enabled CPU features. Presence of a register does not imply
6832   // support for arbitrary operations on it. For example, CPUs with FP have H
6833   // registers, but most half-precision operations require the FPHalf feature.
6834   //
6835   // These are used to check CPU features in loads and stores that have the same
6836   // entry point for both integer and FP registers.
6837   bool CPUHas(const CPURegister& rt) const;
6838   bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const;
6839 
6840   bool CPUHas(SystemRegister sysreg) const;
6841 
6842  private:
6843   static uint32_t FP16ToImm8(Float16 imm);
6844   static uint32_t FP32ToImm8(float imm);
6845   static uint32_t FP64ToImm8(double imm);
6846 
6847   // Instruction helpers.
6848   void MoveWide(const Register& rd,
6849                 uint64_t imm,
6850                 int shift,
6851                 MoveWideImmediateOp mov_op);
6852   void DataProcShiftedRegister(const Register& rd,
6853                                const Register& rn,
6854                                const Operand& operand,
6855                                FlagsUpdate S,
6856                                Instr op);
6857   void DataProcExtendedRegister(const Register& rd,
6858                                 const Register& rn,
6859                                 const Operand& operand,
6860                                 FlagsUpdate S,
6861                                 Instr op);
6862   void LoadStorePairNonTemporal(const CPURegister& rt,
6863                                 const CPURegister& rt2,
6864                                 const MemOperand& addr,
6865                                 LoadStorePairNonTemporalOp op);
6866   void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
6867   void ConditionalSelect(const Register& rd,
6868                          const Register& rn,
6869                          const Register& rm,
6870                          Condition cond,
6871                          ConditionalSelectOp op);
6872   void DataProcessing1Source(const Register& rd,
6873                              const Register& rn,
6874                              DataProcessing1SourceOp op);
6875   void DataProcessing3Source(const Register& rd,
6876                              const Register& rn,
6877                              const Register& rm,
6878                              const Register& ra,
6879                              DataProcessing3SourceOp op);
6880   void FPDataProcessing1Source(const VRegister& fd,
6881                                const VRegister& fn,
6882                                FPDataProcessing1SourceOp op);
6883   void FPDataProcessing3Source(const VRegister& fd,
6884                                const VRegister& fn,
6885                                const VRegister& fm,
6886                                const VRegister& fa,
6887                                FPDataProcessing3SourceOp op);
6888   void NEONAcrossLanesL(const VRegister& vd,
6889                         const VRegister& vn,
6890                         NEONAcrossLanesOp op);
6891   void NEONAcrossLanes(const VRegister& vd,
6892                        const VRegister& vn,
6893                        NEONAcrossLanesOp op,
6894                        Instr op_half);
6895   void NEONModifiedImmShiftLsl(const VRegister& vd,
6896                                const int imm8,
6897                                const int left_shift,
6898                                NEONModifiedImmediateOp op);
6899   void NEONModifiedImmShiftMsl(const VRegister& vd,
6900                                const int imm8,
6901                                const int shift_amount,
6902                                NEONModifiedImmediateOp op);
6903   void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop);
6904   void NEON3Same(const VRegister& vd,
6905                  const VRegister& vn,
6906                  const VRegister& vm,
6907                  NEON3SameOp vop);
6908   void NEON3SameFP16(const VRegister& vd,
6909                      const VRegister& vn,
6910                      const VRegister& vm,
6911                      Instr op);
6912   void NEONFP3Same(const VRegister& vd,
6913                    const VRegister& vn,
6914                    const VRegister& vm,
6915                    Instr op);
6916   void NEON3DifferentL(const VRegister& vd,
6917                        const VRegister& vn,
6918                        const VRegister& vm,
6919                        NEON3DifferentOp vop);
6920   void NEON3DifferentW(const VRegister& vd,
6921                        const VRegister& vn,
6922                        const VRegister& vm,
6923                        NEON3DifferentOp vop);
6924   void NEON3DifferentHN(const VRegister& vd,
6925                         const VRegister& vn,
6926                         const VRegister& vm,
6927                         NEON3DifferentOp vop);
6928   void NEONFP2RegMisc(const VRegister& vd,
6929                       const VRegister& vn,
6930                       NEON2RegMiscOp vop,
6931                       double value = 0.0);
6932   void NEONFP2RegMiscFP16(const VRegister& vd,
6933                           const VRegister& vn,
6934                           NEON2RegMiscFP16Op vop,
6935                           double value = 0.0);
6936   void NEON2RegMisc(const VRegister& vd,
6937                     const VRegister& vn,
6938                     NEON2RegMiscOp vop,
6939                     int value = 0);
6940   void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
6941   void NEONFP2RegMiscFP16(const VRegister& vd, const VRegister& vn, Instr op);
6942   void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
6943   void NEONPerm(const VRegister& vd,
6944                 const VRegister& vn,
6945                 const VRegister& vm,
6946                 NEONPermOp op);
6947   void NEONFPByElement(const VRegister& vd,
6948                        const VRegister& vn,
6949                        const VRegister& vm,
6950                        int vm_index,
6951                        NEONByIndexedElementOp op,
6952                        NEONByIndexedElementOp op_half);
6953   void NEONByElement(const VRegister& vd,
6954                      const VRegister& vn,
6955                      const VRegister& vm,
6956                      int vm_index,
6957                      NEONByIndexedElementOp op);
6958   void NEONByElementL(const VRegister& vd,
6959                       const VRegister& vn,
6960                       const VRegister& vm,
6961                       int vm_index,
6962                       NEONByIndexedElementOp op);
6963   void NEONShiftImmediate(const VRegister& vd,
6964                           const VRegister& vn,
6965                           NEONShiftImmediateOp op,
6966                           int immh_immb);
6967   void NEONShiftLeftImmediate(const VRegister& vd,
6968                               const VRegister& vn,
6969                               int shift,
6970                               NEONShiftImmediateOp op);
6971   void NEONShiftRightImmediate(const VRegister& vd,
6972                                const VRegister& vn,
6973                                int shift,
6974                                NEONShiftImmediateOp op);
6975   void NEONShiftImmediateL(const VRegister& vd,
6976                            const VRegister& vn,
6977                            int shift,
6978                            NEONShiftImmediateOp op);
6979   void NEONShiftImmediateN(const VRegister& vd,
6980                            const VRegister& vn,
6981                            int shift,
6982                            NEONShiftImmediateOp op);
6983   void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
6984 
6985   // If *shift is -1, find values of *imm8 and *shift such that IsInt8(*imm8)
6986   // and *shift is either 0 or 8. Otherwise, leave the values unchanged.
6987   void ResolveSVEImm8Shift(int* imm8, int* shift);
6988 
6989   Instr LoadStoreStructAddrModeField(const MemOperand& addr);
6990 
6991   // Encode the specified MemOperand for the specified access size and scaling
6992   // preference.
6993   Instr LoadStoreMemOperand(const MemOperand& addr,
6994                             unsigned access_size_in_bytes_log2,
6995                             LoadStoreScalingOption option);
6996 
6997   // Link the current (not-yet-emitted) instruction to the specified label, then
6998   // return an offset to be encoded in the instruction. If the label is not yet
6999   // bound, an offset of 0 is returned.
7000   ptrdiff_t LinkAndGetByteOffsetTo(Label* label);
7001   ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label);
7002   ptrdiff_t LinkAndGetPageOffsetTo(Label* label);
7003 
7004   // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
7005   template <int element_shift>
7006   ptrdiff_t LinkAndGetOffsetTo(Label* label);
7007 
7008   // Literal load offset are in words (32-bit).
7009   ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
7010 
7011   // Emit the instruction in buffer_.
Emit(Instr instruction)7012   void Emit(Instr instruction) {
7013     VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
7014     VIXL_ASSERT(AllowAssembler());
7015     GetBuffer()->Emit32(instruction);
7016   }
7017 
7018   PositionIndependentCodeOption pic_;
7019 
7020   CPUFeatures cpu_features_;
7021 };
7022 
7023 
7024 template <typename T>
UpdateValue(T new_value,const Assembler * assembler)7025 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) {
7026   return UpdateValue(new_value,
7027                      assembler->GetBuffer().GetStartAddress<uint8_t*>());
7028 }
7029 
7030 
7031 template <typename T>
UpdateValue(T high64,T low64,const Assembler * assembler)7032 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) {
7033   return UpdateValue(high64,
7034                      low64,
7035                      assembler->GetBuffer().GetStartAddress<uint8_t*>());
7036 }
7037 
7038 
7039 }  // namespace aarch64
7040 
7041 // Required InvalSet template specialisations.
7042 // TODO: These template specialisations should not live in this file.  Move
7043 // Label out of the aarch64 namespace in order to share its implementation
7044 // later.
7045 #define INVAL_SET_TEMPLATE_PARAMETERS                                \
7046   ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t,         \
7047       aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \
7048       aarch64::Label::kReclaimFactor
7049 template <>
GetKey(const ptrdiff_t & element)7050 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::GetKey(
7051     const ptrdiff_t& element) {
7052   return element;
7053 }
7054 template <>
SetKey(ptrdiff_t * element,ptrdiff_t key)7055 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(ptrdiff_t* element,
7056                                                             ptrdiff_t key) {
7057   *element = key;
7058 }
7059 #undef INVAL_SET_TEMPLATE_PARAMETERS
7060 
7061 }  // namespace vixl
7062 
7063 #endif  // VIXL_AARCH64_ASSEMBLER_AARCH64_H_
7064