1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_
28 #define VIXL_AARCH64_ASSEMBLER_AARCH64_H_
29 
30 #include "../assembler-base-vixl.h"
31 #include "../code-generation-scopes-vixl.h"
32 #include "../cpu-features.h"
33 #include "../globals-vixl.h"
34 #include "../invalset-vixl.h"
35 #include "../utils-vixl.h"
36 #include "operands-aarch64.h"
37 
38 namespace vixl {
39 namespace aarch64 {
40 
41 class LabelTestHelper;  // Forward declaration.
42 
43 
44 class Label {
45  public:
Label()46   Label() : location_(kLocationUnbound) {}
~Label()47   ~Label() {
48     // All links to a label must have been resolved before it is destructed.
49     VIXL_ASSERT(!IsLinked());
50   }
51 
IsBound()52   bool IsBound() const { return location_ >= 0; }
IsLinked()53   bool IsLinked() const { return !links_.empty(); }
54 
GetLocation()55   ptrdiff_t GetLocation() const { return location_; }
56   VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) {
57     return GetLocation();
58   }
59 
60   static const int kNPreallocatedLinks = 4;
61   static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
62   static const size_t kReclaimFrom = 512;
63   static const size_t kReclaimFactor = 2;
64 
65   typedef InvalSet<ptrdiff_t,
66                    kNPreallocatedLinks,
67                    ptrdiff_t,
68                    kInvalidLinkKey,
69                    kReclaimFrom,
70                    kReclaimFactor>
71       LinksSetBase;
72   typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
73 
74  private:
75   class LinksSet : public LinksSetBase {
76    public:
LinksSet()77     LinksSet() : LinksSetBase() {}
78   };
79 
80   // Allows iterating over the links of a label. The behaviour is undefined if
81   // the list of links is modified in any way while iterating.
82   class LabelLinksIterator : public LabelLinksIteratorBase {
83    public:
LabelLinksIterator(Label * label)84     explicit LabelLinksIterator(Label* label)
85         : LabelLinksIteratorBase(&label->links_) {}
86 
87     // TODO: Remove these and use the STL-like interface instead.
88     using LabelLinksIteratorBase::Advance;
89     using LabelLinksIteratorBase::Current;
90   };
91 
Bind(ptrdiff_t location)92   void Bind(ptrdiff_t location) {
93     // Labels can only be bound once.
94     VIXL_ASSERT(!IsBound());
95     location_ = location;
96   }
97 
AddLink(ptrdiff_t instruction)98   void AddLink(ptrdiff_t instruction) {
99     // If a label is bound, the assembler already has the information it needs
100     // to write the instruction, so there is no need to add it to links_.
101     VIXL_ASSERT(!IsBound());
102     links_.insert(instruction);
103   }
104 
DeleteLink(ptrdiff_t instruction)105   void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); }
106 
ClearAllLinks()107   void ClearAllLinks() { links_.clear(); }
108 
109   // TODO: The comment below considers average case complexity for our
110   // usual use-cases. The elements of interest are:
111   // - Branches to a label are emitted in order: branch instructions to a label
112   // are generated at an offset in the code generation buffer greater than any
113   // other branch to that same label already generated. As an example, this can
114   // be broken when an instruction is patched to become a branch. Note that the
115   // code will still work, but the complexity considerations below may locally
116   // not apply any more.
117   // - Veneers are generated in order: for multiple branches of the same type
118   // branching to the same unbound label going out of range, veneers are
119   // generated in growing order of the branch instruction offset from the start
120   // of the buffer.
121   //
122   // When creating a veneer for a branch going out of range, the link for this
123   // branch needs to be removed from this `links_`. Since all branches are
124   // tracked in one underlying InvalSet, the complexity for this deletion is the
125   // same as for finding the element, ie. O(n), where n is the number of links
126   // in the set.
127   // This could be reduced to O(1) by using the same trick as used when tracking
128   // branch information for veneers: split the container to use one set per type
129   // of branch. With that setup, when a veneer is created and the link needs to
130   // be deleted, if the two points above hold, it must be the minimum element of
131   // the set for its type of branch, and that minimum element will be accessible
132   // in O(1).
133 
134   // The offsets of the instructions that have linked to this label.
135   LinksSet links_;
136   // The label location.
137   ptrdiff_t location_;
138 
139   static const ptrdiff_t kLocationUnbound = -1;
140 
141 // It is not safe to copy labels, so disable the copy constructor and operator
142 // by declaring them private (without an implementation).
143 #if __cplusplus >= 201103L
144   Label(const Label&) = delete;
145   void operator=(const Label&) = delete;
146 #else
147   Label(const Label&);
148   void operator=(const Label&);
149 #endif
150 
151   // The Assembler class is responsible for binding and linking labels, since
152   // the stored offsets need to be consistent with the Assembler's buffer.
153   friend class Assembler;
154   // The MacroAssembler and VeneerPool handle resolution of branches to distant
155   // targets.
156   friend class MacroAssembler;
157   friend class VeneerPool;
158 };
159 
160 
161 class Assembler;
162 class LiteralPool;
163 
164 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
165 // stream and loaded through a pc relative load. The same literal can be
166 // referred to by multiple instructions but a literal can only reside at one
167 // place in memory. A literal can be used by a load before or after being
168 // placed in memory.
169 //
170 // Internally an offset of 0 is associated with a literal which has been
171 // neither used nor placed. Then two possibilities arise:
172 //  1) the label is placed, the offset (stored as offset + 1) is used to
173 //     resolve any subsequent load using the label.
174 //  2) the label is not placed and offset is the offset of the last load using
175 //     the literal (stored as -offset -1). If multiple loads refer to this
176 //     literal then the last load holds the offset of the preceding load and
177 //     all loads form a chain. Once the offset is placed all the loads in the
178 //     chain are resolved and future loads fall back to possibility 1.
179 class RawLiteral {
180  public:
181   enum DeletionPolicy {
182     kDeletedOnPlacementByPool,
183     kDeletedOnPoolDestruction,
184     kManuallyDeleted
185   };
186 
187   RawLiteral(size_t size,
188              LiteralPool* literal_pool,
189              DeletionPolicy deletion_policy = kManuallyDeleted);
190 
191   // The literal pool only sees and deletes `RawLiteral*` pointers, but they are
192   // actually pointing to `Literal<T>` objects.
~RawLiteral()193   virtual ~RawLiteral() {}
194 
GetSize()195   size_t GetSize() const {
196     VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
197     VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
198     VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) ||
199                 (size_ == kQRegSizeInBytes));
200     return size_;
201   }
202   VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); }
203 
GetRawValue128Low64()204   uint64_t GetRawValue128Low64() const {
205     VIXL_ASSERT(size_ == kQRegSizeInBytes);
206     return low64_;
207   }
208   VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) {
209     return GetRawValue128Low64();
210   }
211 
GetRawValue128High64()212   uint64_t GetRawValue128High64() const {
213     VIXL_ASSERT(size_ == kQRegSizeInBytes);
214     return high64_;
215   }
216   VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) {
217     return GetRawValue128High64();
218   }
219 
GetRawValue64()220   uint64_t GetRawValue64() const {
221     VIXL_ASSERT(size_ == kXRegSizeInBytes);
222     VIXL_ASSERT(high64_ == 0);
223     return low64_;
224   }
225   VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) {
226     return GetRawValue64();
227   }
228 
GetRawValue32()229   uint32_t GetRawValue32() const {
230     VIXL_ASSERT(size_ == kWRegSizeInBytes);
231     VIXL_ASSERT(high64_ == 0);
232     VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_));
233     return static_cast<uint32_t>(low64_);
234   }
235   VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) {
236     return GetRawValue32();
237   }
238 
IsUsed()239   bool IsUsed() const { return offset_ < 0; }
IsPlaced()240   bool IsPlaced() const { return offset_ > 0; }
241 
GetLiteralPool()242   LiteralPool* GetLiteralPool() const { return literal_pool_; }
243 
GetOffset()244   ptrdiff_t GetOffset() const {
245     VIXL_ASSERT(IsPlaced());
246     return offset_ - 1;
247   }
248   VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); }
249 
250  protected:
SetOffset(ptrdiff_t offset)251   void SetOffset(ptrdiff_t offset) {
252     VIXL_ASSERT(offset >= 0);
253     VIXL_ASSERT(IsWordAligned(offset));
254     VIXL_ASSERT(!IsPlaced());
255     offset_ = offset + 1;
256   }
set_offset(ptrdiff_t offset)257   VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) {
258     SetOffset(offset);
259   }
260 
GetLastUse()261   ptrdiff_t GetLastUse() const {
262     VIXL_ASSERT(IsUsed());
263     return -offset_ - 1;
264   }
265   VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); }
266 
SetLastUse(ptrdiff_t offset)267   void SetLastUse(ptrdiff_t offset) {
268     VIXL_ASSERT(offset >= 0);
269     VIXL_ASSERT(IsWordAligned(offset));
270     VIXL_ASSERT(!IsPlaced());
271     offset_ = -offset - 1;
272   }
set_last_use(ptrdiff_t offset)273   VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) {
274     SetLastUse(offset);
275   }
276 
277   size_t size_;
278   ptrdiff_t offset_;
279   uint64_t low64_;
280   uint64_t high64_;
281 
282  private:
283   LiteralPool* literal_pool_;
284   DeletionPolicy deletion_policy_;
285 
286   friend class Assembler;
287   friend class LiteralPool;
288 };
289 
290 
291 template <typename T>
292 class Literal : public RawLiteral {
293  public:
294   explicit Literal(T value,
295                    LiteralPool* literal_pool = NULL,
296                    RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(sizeof (value),literal_pool,ownership)297       : RawLiteral(sizeof(value), literal_pool, ownership) {
298     VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes);
299     UpdateValue(value);
300   }
301 
302   Literal(T high64,
303           T low64,
304           LiteralPool* literal_pool = NULL,
305           RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(kQRegSizeInBytes,literal_pool,ownership)306       : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) {
307     VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2));
308     UpdateValue(high64, low64);
309   }
310 
~Literal()311   virtual ~Literal() {}
312 
313   // Update the value of this literal, if necessary by rewriting the value in
314   // the pool.
315   // If the literal has already been placed in a literal pool, the address of
316   // the start of the code buffer must be provided, as the literal only knows it
317   // offset from there. This also allows patching the value after the code has
318   // been moved in memory.
319   void UpdateValue(T new_value, uint8_t* code_buffer = NULL) {
320     VIXL_ASSERT(sizeof(new_value) == size_);
321     memcpy(&low64_, &new_value, sizeof(new_value));
322     if (IsPlaced()) {
323       VIXL_ASSERT(code_buffer != NULL);
324       RewriteValueInCode(code_buffer);
325     }
326   }
327 
328   void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) {
329     VIXL_ASSERT(sizeof(low64) == size_ / 2);
330     memcpy(&low64_, &low64, sizeof(low64));
331     memcpy(&high64_, &high64, sizeof(high64));
332     if (IsPlaced()) {
333       VIXL_ASSERT(code_buffer != NULL);
334       RewriteValueInCode(code_buffer);
335     }
336   }
337 
338   void UpdateValue(T new_value, const Assembler* assembler);
339   void UpdateValue(T high64, T low64, const Assembler* assembler);
340 
341  private:
RewriteValueInCode(uint8_t * code_buffer)342   void RewriteValueInCode(uint8_t* code_buffer) {
343     VIXL_ASSERT(IsPlaced());
344     VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
345     switch (GetSize()) {
346       case kSRegSizeInBytes:
347         *reinterpret_cast<uint32_t*>(code_buffer + GetOffset()) =
348             GetRawValue32();
349         break;
350       case kDRegSizeInBytes:
351         *reinterpret_cast<uint64_t*>(code_buffer + GetOffset()) =
352             GetRawValue64();
353         break;
354       default:
355         VIXL_ASSERT(GetSize() == kQRegSizeInBytes);
356         uint64_t* base_address =
357             reinterpret_cast<uint64_t*>(code_buffer + GetOffset());
358         *base_address = GetRawValue128Low64();
359         *(base_address + 1) = GetRawValue128High64();
360     }
361   }
362 };
363 
364 
365 // Control whether or not position-independent code should be emitted.
366 enum PositionIndependentCodeOption {
367   // All code generated will be position-independent; all branches and
368   // references to labels generated with the Label class will use PC-relative
369   // addressing.
370   PositionIndependentCode,
371 
372   // Allow VIXL to generate code that refers to absolute addresses. With this
373   // option, it will not be possible to copy the code buffer and run it from a
374   // different address; code must be generated in its final location.
375   PositionDependentCode,
376 
377   // Allow VIXL to assume that the bottom 12 bits of the address will be
378   // constant, but that the top 48 bits may change. This allows `adrp` to
379   // function in systems which copy code between pages, but otherwise maintain
380   // 4KB page alignment.
381   PageOffsetDependentCode
382 };
383 
384 
385 // Control how scaled- and unscaled-offset loads and stores are generated.
386 enum LoadStoreScalingOption {
387   // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
388   // register-offset, pre-index or post-index instructions if necessary.
389   PreferScaledOffset,
390 
391   // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
392   // register-offset, pre-index or post-index instructions if necessary.
393   PreferUnscaledOffset,
394 
395   // Require scaled-immediate-offset instructions.
396   RequireScaledOffset,
397 
398   // Require unscaled-immediate-offset instructions.
399   RequireUnscaledOffset
400 };
401 
402 
403 // Assembler.
404 class Assembler : public vixl::internal::AssemblerBase {
405  public:
406   explicit Assembler(
407       PositionIndependentCodeOption pic = PositionIndependentCode)
pic_(pic)408       : pic_(pic), cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
409   explicit Assembler(
410       size_t capacity,
411       PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(capacity)412       : AssemblerBase(capacity),
413         pic_(pic),
414         cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
415   Assembler(byte* buffer,
416             size_t capacity,
417             PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(buffer,capacity)418       : AssemblerBase(buffer, capacity),
419         pic_(pic),
420         cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
421 
422   // Upon destruction, the code will assert that one of the following is true:
423   //  * The Assembler object has not been used.
424   //  * Nothing has been emitted since the last Reset() call.
425   //  * Nothing has been emitted since the last FinalizeCode() call.
~Assembler()426   ~Assembler() {}
427 
428   // System functions.
429 
430   // Start generating code from the beginning of the buffer, discarding any code
431   // and data that has already been emitted into the buffer.
432   void Reset();
433 
434   // Bind a label to the current PC.
435   void bind(Label* label);
436 
437   // Bind a label to a specified offset from the start of the buffer.
438   void BindToOffset(Label* label, ptrdiff_t offset);
439 
440   // Place a literal at the current PC.
441   void place(RawLiteral* literal);
442 
443   VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) {
444     return GetCursorOffset();
445   }
446 
447   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
448                   ptrdiff_t GetBufferEndOffset() const) {
449     return static_cast<ptrdiff_t>(GetBuffer().GetCapacity());
450   }
451   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
452                   ptrdiff_t BufferEndOffset() const) {
453     return GetBuffer().GetCapacity();
454   }
455 
456   // Return the address of a bound label.
457   template <typename T>
GetLabelAddress(const Label * label)458   T GetLabelAddress(const Label* label) const {
459     VIXL_ASSERT(label->IsBound());
460     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
461     return GetBuffer().GetOffsetAddress<T>(label->GetLocation());
462   }
463 
GetInstructionAt(ptrdiff_t instruction_offset)464   Instruction* GetInstructionAt(ptrdiff_t instruction_offset) {
465     return GetBuffer()->GetOffsetAddress<Instruction*>(instruction_offset);
466   }
467   VIXL_DEPRECATED("GetInstructionAt",
468                   Instruction* InstructionAt(ptrdiff_t instruction_offset)) {
469     return GetInstructionAt(instruction_offset);
470   }
471 
GetInstructionOffset(Instruction * instruction)472   ptrdiff_t GetInstructionOffset(Instruction* instruction) {
473     VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
474     ptrdiff_t offset =
475         instruction - GetBuffer()->GetStartAddress<Instruction*>();
476     VIXL_ASSERT((0 <= offset) &&
477                 (offset < static_cast<ptrdiff_t>(GetBuffer()->GetCapacity())));
478     return offset;
479   }
480   VIXL_DEPRECATED("GetInstructionOffset",
481                   ptrdiff_t InstructionOffset(Instruction* instruction)) {
482     return GetInstructionOffset(instruction);
483   }
484 
485   // Instruction set functions.
486 
487   // Branch / Jump instructions.
488 
489   // Branch to register.
490   void br(const Register& xn);
491 
492   // Branch with link to register.
493   void blr(const Register& xn);
494 
495   // Branch to register with return hint.
496   void ret(const Register& xn = lr);
497 
498   // Branch to register, with pointer authentication. Using key A and a modifier
499   // of zero [Armv8.3].
500   void braaz(const Register& xn);
501 
502   // Branch to register, with pointer authentication. Using key B and a modifier
503   // of zero [Armv8.3].
504   void brabz(const Register& xn);
505 
506   // Branch with link to register, with pointer authentication. Using key A and
507   // a modifier of zero [Armv8.3].
508   void blraaz(const Register& xn);
509 
510   // Branch with link to register, with pointer authentication. Using key B and
511   // a modifier of zero [Armv8.3].
512   void blrabz(const Register& xn);
513 
514   // Return from subroutine, with pointer authentication. Using key A [Armv8.3].
515   void retaa();
516 
517   // Return from subroutine, with pointer authentication. Using key B [Armv8.3].
518   void retab();
519 
520   // Branch to register, with pointer authentication. Using key A [Armv8.3].
521   void braa(const Register& xn, const Register& xm);
522 
523   // Branch to register, with pointer authentication. Using key B [Armv8.3].
524   void brab(const Register& xn, const Register& xm);
525 
526   // Branch with link to register, with pointer authentication. Using key A
527   // [Armv8.3].
528   void blraa(const Register& xn, const Register& xm);
529 
530   // Branch with link to register, with pointer authentication. Using key B
531   // [Armv8.3].
532   void blrab(const Register& xn, const Register& xm);
533 
534   // Unconditional branch to label.
535   void b(Label* label);
536 
537   // Conditional branch to label.
538   void b(Label* label, Condition cond);
539 
540   // Unconditional branch to PC offset.
541   void b(int64_t imm26);
542 
543   // Conditional branch to PC offset.
544   void b(int64_t imm19, Condition cond);
545 
546   // Branch with link to label.
547   void bl(Label* label);
548 
549   // Branch with link to PC offset.
550   void bl(int64_t imm26);
551 
552   // Compare and branch to label if zero.
553   void cbz(const Register& rt, Label* label);
554 
555   // Compare and branch to PC offset if zero.
556   void cbz(const Register& rt, int64_t imm19);
557 
558   // Compare and branch to label if not zero.
559   void cbnz(const Register& rt, Label* label);
560 
561   // Compare and branch to PC offset if not zero.
562   void cbnz(const Register& rt, int64_t imm19);
563 
564   // Table lookup from one register.
565   void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
566 
567   // Table lookup from two registers.
568   void tbl(const VRegister& vd,
569            const VRegister& vn,
570            const VRegister& vn2,
571            const VRegister& vm);
572 
573   // Table lookup from three registers.
574   void tbl(const VRegister& vd,
575            const VRegister& vn,
576            const VRegister& vn2,
577            const VRegister& vn3,
578            const VRegister& vm);
579 
580   // Table lookup from four registers.
581   void tbl(const VRegister& vd,
582            const VRegister& vn,
583            const VRegister& vn2,
584            const VRegister& vn3,
585            const VRegister& vn4,
586            const VRegister& vm);
587 
588   // Table lookup extension from one register.
589   void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
590 
591   // Table lookup extension from two registers.
592   void tbx(const VRegister& vd,
593            const VRegister& vn,
594            const VRegister& vn2,
595            const VRegister& vm);
596 
597   // Table lookup extension from three registers.
598   void tbx(const VRegister& vd,
599            const VRegister& vn,
600            const VRegister& vn2,
601            const VRegister& vn3,
602            const VRegister& vm);
603 
604   // Table lookup extension from four registers.
605   void tbx(const VRegister& vd,
606            const VRegister& vn,
607            const VRegister& vn2,
608            const VRegister& vn3,
609            const VRegister& vn4,
610            const VRegister& vm);
611 
612   // Test bit and branch to label if zero.
613   void tbz(const Register& rt, unsigned bit_pos, Label* label);
614 
615   // Test bit and branch to PC offset if zero.
616   void tbz(const Register& rt, unsigned bit_pos, int64_t imm14);
617 
618   // Test bit and branch to label if not zero.
619   void tbnz(const Register& rt, unsigned bit_pos, Label* label);
620 
621   // Test bit and branch to PC offset if not zero.
622   void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14);
623 
624   // Address calculation instructions.
625   // Calculate a PC-relative address. Unlike for branches the offset in adr is
626   // unscaled (i.e. the result can be unaligned).
627 
628   // Calculate the address of a label.
629   void adr(const Register& xd, Label* label);
630 
631   // Calculate the address of a PC offset.
632   void adr(const Register& xd, int64_t imm21);
633 
634   // Calculate the page address of a label.
635   void adrp(const Register& xd, Label* label);
636 
637   // Calculate the page address of a PC offset.
638   void adrp(const Register& xd, int64_t imm21);
639 
640   // Data Processing instructions.
641 
642   // Add.
643   void add(const Register& rd, const Register& rn, const Operand& operand);
644 
645   // Add and update status flags.
646   void adds(const Register& rd, const Register& rn, const Operand& operand);
647 
648   // Compare negative.
649   void cmn(const Register& rn, const Operand& operand);
650 
651   // Subtract.
652   void sub(const Register& rd, const Register& rn, const Operand& operand);
653 
654   // Subtract and update status flags.
655   void subs(const Register& rd, const Register& rn, const Operand& operand);
656 
657   // Compare.
658   void cmp(const Register& rn, const Operand& operand);
659 
660   // Negate.
661   void neg(const Register& rd, const Operand& operand);
662 
663   // Negate and update status flags.
664   void negs(const Register& rd, const Operand& operand);
665 
666   // Add with carry bit.
667   void adc(const Register& rd, const Register& rn, const Operand& operand);
668 
669   // Add with carry bit and update status flags.
670   void adcs(const Register& rd, const Register& rn, const Operand& operand);
671 
672   // Subtract with carry bit.
673   void sbc(const Register& rd, const Register& rn, const Operand& operand);
674 
675   // Subtract with carry bit and update status flags.
676   void sbcs(const Register& rd, const Register& rn, const Operand& operand);
677 
678   // Rotate register right and insert into NZCV flags under the control of a
679   // mask [Armv8.4].
680   void rmif(const Register& xn, unsigned rotation, StatusFlags flags);
681 
682   // Set NZCV flags from register, treated as an 8-bit value [Armv8.4].
683   void setf8(const Register& rn);
684 
685   // Set NZCV flags from register, treated as an 16-bit value [Armv8.4].
686   void setf16(const Register& rn);
687 
688   // Negate with carry bit.
689   void ngc(const Register& rd, const Operand& operand);
690 
691   // Negate with carry bit and update status flags.
692   void ngcs(const Register& rd, const Operand& operand);
693 
694   // Logical instructions.
695 
696   // Bitwise and (A & B).
697   void and_(const Register& rd, const Register& rn, const Operand& operand);
698 
699   // Bitwise and (A & B) and update status flags.
700   void ands(const Register& rd, const Register& rn, const Operand& operand);
701 
702   // Bit test and set flags.
703   void tst(const Register& rn, const Operand& operand);
704 
705   // Bit clear (A & ~B).
706   void bic(const Register& rd, const Register& rn, const Operand& operand);
707 
708   // Bit clear (A & ~B) and update status flags.
709   void bics(const Register& rd, const Register& rn, const Operand& operand);
710 
711   // Bitwise or (A | B).
712   void orr(const Register& rd, const Register& rn, const Operand& operand);
713 
714   // Bitwise nor (A | ~B).
715   void orn(const Register& rd, const Register& rn, const Operand& operand);
716 
717   // Bitwise eor/xor (A ^ B).
718   void eor(const Register& rd, const Register& rn, const Operand& operand);
719 
720   // Bitwise enor/xnor (A ^ ~B).
721   void eon(const Register& rd, const Register& rn, const Operand& operand);
722 
723   // Logical shift left by variable.
724   void lslv(const Register& rd, const Register& rn, const Register& rm);
725 
726   // Logical shift right by variable.
727   void lsrv(const Register& rd, const Register& rn, const Register& rm);
728 
729   // Arithmetic shift right by variable.
730   void asrv(const Register& rd, const Register& rn, const Register& rm);
731 
732   // Rotate right by variable.
733   void rorv(const Register& rd, const Register& rn, const Register& rm);
734 
735   // Bitfield instructions.
736 
737   // Bitfield move.
738   void bfm(const Register& rd,
739            const Register& rn,
740            unsigned immr,
741            unsigned imms);
742 
743   // Signed bitfield move.
744   void sbfm(const Register& rd,
745             const Register& rn,
746             unsigned immr,
747             unsigned imms);
748 
749   // Unsigned bitfield move.
750   void ubfm(const Register& rd,
751             const Register& rn,
752             unsigned immr,
753             unsigned imms);
754 
755   // Bfm aliases.
756 
757   // Bitfield insert.
bfi(const Register & rd,const Register & rn,unsigned lsb,unsigned width)758   void bfi(const Register& rd,
759            const Register& rn,
760            unsigned lsb,
761            unsigned width) {
762     VIXL_ASSERT(width >= 1);
763     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
764     bfm(rd,
765         rn,
766         (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
767         width - 1);
768   }
769 
770   // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,unsigned lsb,unsigned width)771   void bfxil(const Register& rd,
772              const Register& rn,
773              unsigned lsb,
774              unsigned width) {
775     VIXL_ASSERT(width >= 1);
776     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
777     bfm(rd, rn, lsb, lsb + width - 1);
778   }
779 
780   // Bitfield clear [Armv8.2].
bfc(const Register & rd,unsigned lsb,unsigned width)781   void bfc(const Register& rd, unsigned lsb, unsigned width) {
782     bfi(rd, AppropriateZeroRegFor(rd), lsb, width);
783   }
784 
785   // Sbfm aliases.
786 
787   // Arithmetic shift right.
asr(const Register & rd,const Register & rn,unsigned shift)788   void asr(const Register& rd, const Register& rn, unsigned shift) {
789     VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
790     sbfm(rd, rn, shift, rd.GetSizeInBits() - 1);
791   }
792 
793   // Signed bitfield insert with zero at right.
sbfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)794   void sbfiz(const Register& rd,
795              const Register& rn,
796              unsigned lsb,
797              unsigned width) {
798     VIXL_ASSERT(width >= 1);
799     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
800     sbfm(rd,
801          rn,
802          (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
803          width - 1);
804   }
805 
806   // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)807   void sbfx(const Register& rd,
808             const Register& rn,
809             unsigned lsb,
810             unsigned width) {
811     VIXL_ASSERT(width >= 1);
812     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
813     sbfm(rd, rn, lsb, lsb + width - 1);
814   }
815 
816   // Signed extend byte.
sxtb(const Register & rd,const Register & rn)817   void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); }
818 
819   // Signed extend halfword.
sxth(const Register & rd,const Register & rn)820   void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); }
821 
822   // Signed extend word.
sxtw(const Register & rd,const Register & rn)823   void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); }
824 
825   // Ubfm aliases.
826 
827   // Logical shift left.
lsl(const Register & rd,const Register & rn,unsigned shift)828   void lsl(const Register& rd, const Register& rn, unsigned shift) {
829     unsigned reg_size = rd.GetSizeInBits();
830     VIXL_ASSERT(shift < reg_size);
831     ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
832   }
833 
834   // Logical shift right.
lsr(const Register & rd,const Register & rn,unsigned shift)835   void lsr(const Register& rd, const Register& rn, unsigned shift) {
836     VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
837     ubfm(rd, rn, shift, rd.GetSizeInBits() - 1);
838   }
839 
840   // Unsigned bitfield insert with zero at right.
ubfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)841   void ubfiz(const Register& rd,
842              const Register& rn,
843              unsigned lsb,
844              unsigned width) {
845     VIXL_ASSERT(width >= 1);
846     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
847     ubfm(rd,
848          rn,
849          (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
850          width - 1);
851   }
852 
853   // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)854   void ubfx(const Register& rd,
855             const Register& rn,
856             unsigned lsb,
857             unsigned width) {
858     VIXL_ASSERT(width >= 1);
859     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
860     ubfm(rd, rn, lsb, lsb + width - 1);
861   }
862 
863   // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)864   void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); }
865 
866   // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)867   void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); }
868 
869   // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)870   void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); }
871 
872   // Extract.
873   void extr(const Register& rd,
874             const Register& rn,
875             const Register& rm,
876             unsigned lsb);
877 
878   // Conditional select: rd = cond ? rn : rm.
879   void csel(const Register& rd,
880             const Register& rn,
881             const Register& rm,
882             Condition cond);
883 
884   // Conditional select increment: rd = cond ? rn : rm + 1.
885   void csinc(const Register& rd,
886              const Register& rn,
887              const Register& rm,
888              Condition cond);
889 
890   // Conditional select inversion: rd = cond ? rn : ~rm.
891   void csinv(const Register& rd,
892              const Register& rn,
893              const Register& rm,
894              Condition cond);
895 
896   // Conditional select negation: rd = cond ? rn : -rm.
897   void csneg(const Register& rd,
898              const Register& rn,
899              const Register& rm,
900              Condition cond);
901 
902   // Conditional set: rd = cond ? 1 : 0.
903   void cset(const Register& rd, Condition cond);
904 
905   // Conditional set mask: rd = cond ? -1 : 0.
906   void csetm(const Register& rd, Condition cond);
907 
908   // Conditional increment: rd = cond ? rn + 1 : rn.
909   void cinc(const Register& rd, const Register& rn, Condition cond);
910 
911   // Conditional invert: rd = cond ? ~rn : rn.
912   void cinv(const Register& rd, const Register& rn, Condition cond);
913 
914   // Conditional negate: rd = cond ? -rn : rn.
915   void cneg(const Register& rd, const Register& rn, Condition cond);
916 
917   // Rotate right.
ror(const Register & rd,const Register & rs,unsigned shift)918   void ror(const Register& rd, const Register& rs, unsigned shift) {
919     extr(rd, rs, rs, shift);
920   }
921 
922   // Conditional comparison.
923 
924   // Conditional compare negative.
925   void ccmn(const Register& rn,
926             const Operand& operand,
927             StatusFlags nzcv,
928             Condition cond);
929 
930   // Conditional compare.
931   void ccmp(const Register& rn,
932             const Operand& operand,
933             StatusFlags nzcv,
934             Condition cond);
935 
936   // CRC-32 checksum from byte.
937   void crc32b(const Register& wd, const Register& wn, const Register& wm);
938 
939   // CRC-32 checksum from half-word.
940   void crc32h(const Register& wd, const Register& wn, const Register& wm);
941 
942   // CRC-32 checksum from word.
943   void crc32w(const Register& wd, const Register& wn, const Register& wm);
944 
945   // CRC-32 checksum from double word.
946   void crc32x(const Register& wd, const Register& wn, const Register& xm);
947 
948   // CRC-32 C checksum from byte.
949   void crc32cb(const Register& wd, const Register& wn, const Register& wm);
950 
951   // CRC-32 C checksum from half-word.
952   void crc32ch(const Register& wd, const Register& wn, const Register& wm);
953 
954   // CRC-32 C checksum from word.
955   void crc32cw(const Register& wd, const Register& wn, const Register& wm);
956 
957   // CRC-32C checksum from double word.
958   void crc32cx(const Register& wd, const Register& wn, const Register& xm);
959 
960   // Multiply.
961   void mul(const Register& rd, const Register& rn, const Register& rm);
962 
963   // Negated multiply.
964   void mneg(const Register& rd, const Register& rn, const Register& rm);
965 
966   // Signed long multiply: 32 x 32 -> 64-bit.
967   void smull(const Register& xd, const Register& wn, const Register& wm);
968 
969   // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
970   void smulh(const Register& xd, const Register& xn, const Register& xm);
971 
972   // Multiply and accumulate.
973   void madd(const Register& rd,
974             const Register& rn,
975             const Register& rm,
976             const Register& ra);
977 
978   // Multiply and subtract.
979   void msub(const Register& rd,
980             const Register& rn,
981             const Register& rm,
982             const Register& ra);
983 
984   // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
985   void smaddl(const Register& xd,
986               const Register& wn,
987               const Register& wm,
988               const Register& xa);
989 
990   // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
991   void umaddl(const Register& xd,
992               const Register& wn,
993               const Register& wm,
994               const Register& xa);
995 
996   // Unsigned long multiply: 32 x 32 -> 64-bit.
umull(const Register & xd,const Register & wn,const Register & wm)997   void umull(const Register& xd, const Register& wn, const Register& wm) {
998     umaddl(xd, wn, wm, xzr);
999   }
1000 
1001   // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
1002   void umulh(const Register& xd, const Register& xn, const Register& xm);
1003 
1004   // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1005   void smsubl(const Register& xd,
1006               const Register& wn,
1007               const Register& wm,
1008               const Register& xa);
1009 
1010   // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1011   void umsubl(const Register& xd,
1012               const Register& wn,
1013               const Register& wm,
1014               const Register& xa);
1015 
1016   // Signed integer divide.
1017   void sdiv(const Register& rd, const Register& rn, const Register& rm);
1018 
1019   // Unsigned integer divide.
1020   void udiv(const Register& rd, const Register& rn, const Register& rm);
1021 
1022   // Bit reverse.
1023   void rbit(const Register& rd, const Register& rn);
1024 
1025   // Reverse bytes in 16-bit half words.
1026   void rev16(const Register& rd, const Register& rn);
1027 
1028   // Reverse bytes in 32-bit words.
1029   void rev32(const Register& xd, const Register& xn);
1030 
1031   // Reverse bytes in 64-bit general purpose register, an alias for rev
1032   // [Armv8.2].
rev64(const Register & xd,const Register & xn)1033   void rev64(const Register& xd, const Register& xn) {
1034     VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits());
1035     rev(xd, xn);
1036   }
1037 
1038   // Reverse bytes.
1039   void rev(const Register& rd, const Register& rn);
1040 
1041   // Count leading zeroes.
1042   void clz(const Register& rd, const Register& rn);
1043 
1044   // Count leading sign bits.
1045   void cls(const Register& rd, const Register& rn);
1046 
1047   // Pointer Authentication Code for Instruction address, using key A [Armv8.3].
1048   void pacia(const Register& xd, const Register& rn);
1049 
1050   // Pointer Authentication Code for Instruction address, using key A and a
1051   // modifier of zero [Armv8.3].
1052   void paciza(const Register& xd);
1053 
1054   // Pointer Authentication Code for Instruction address, using key A, with
1055   // address in x17 and modifier in x16 [Armv8.3].
1056   void pacia1716();
1057 
1058   // Pointer Authentication Code for Instruction address, using key A, with
1059   // address in LR and modifier in SP [Armv8.3].
1060   void paciasp();
1061 
1062   // Pointer Authentication Code for Instruction address, using key A, with
1063   // address in LR and a modifier of zero [Armv8.3].
1064   void paciaz();
1065 
1066   // Pointer Authentication Code for Instruction address, using key B [Armv8.3].
1067   void pacib(const Register& xd, const Register& xn);
1068 
1069   // Pointer Authentication Code for Instruction address, using key B and a
1070   // modifier of zero [Armv8.3].
1071   void pacizb(const Register& xd);
1072 
1073   // Pointer Authentication Code for Instruction address, using key B, with
1074   // address in x17 and modifier in x16 [Armv8.3].
1075   void pacib1716();
1076 
1077   // Pointer Authentication Code for Instruction address, using key B, with
1078   // address in LR and modifier in SP [Armv8.3].
1079   void pacibsp();
1080 
1081   // Pointer Authentication Code for Instruction address, using key B, with
1082   // address in LR and a modifier of zero [Armv8.3].
1083   void pacibz();
1084 
1085   // Pointer Authentication Code for Data address, using key A [Armv8.3].
1086   void pacda(const Register& xd, const Register& xn);
1087 
1088   // Pointer Authentication Code for Data address, using key A and a modifier of
1089   // zero [Armv8.3].
1090   void pacdza(const Register& xd);
1091 
1092   // Pointer Authentication Code for Data address, using key B [Armv8.3].
1093   void pacdb(const Register& xd, const Register& xn);
1094 
1095   // Pointer Authentication Code for Data address, using key B and a modifier of
1096   // zero [Armv8.3].
1097   void pacdzb(const Register& xd);
1098 
1099   // Pointer Authentication Code, using Generic key [Armv8.3].
1100   void pacga(const Register& xd, const Register& xn, const Register& xm);
1101 
1102   // Authenticate Instruction address, using key A [Armv8.3].
1103   void autia(const Register& xd, const Register& xn);
1104 
1105   // Authenticate Instruction address, using key A and a modifier of zero
1106   // [Armv8.3].
1107   void autiza(const Register& xd);
1108 
1109   // Authenticate Instruction address, using key A, with address in x17 and
1110   // modifier in x16 [Armv8.3].
1111   void autia1716();
1112 
1113   // Authenticate Instruction address, using key A, with address in LR and
1114   // modifier in SP [Armv8.3].
1115   void autiasp();
1116 
1117   // Authenticate Instruction address, using key A, with address in LR and a
1118   // modifier of zero [Armv8.3].
1119   void autiaz();
1120 
1121   // Authenticate Instruction address, using key B [Armv8.3].
1122   void autib(const Register& xd, const Register& xn);
1123 
1124   // Authenticate Instruction address, using key B and a modifier of zero
1125   // [Armv8.3].
1126   void autizb(const Register& xd);
1127 
1128   // Authenticate Instruction address, using key B, with address in x17 and
1129   // modifier in x16 [Armv8.3].
1130   void autib1716();
1131 
1132   // Authenticate Instruction address, using key B, with address in LR and
1133   // modifier in SP [Armv8.3].
1134   void autibsp();
1135 
1136   // Authenticate Instruction address, using key B, with address in LR and a
1137   // modifier of zero [Armv8.3].
1138   void autibz();
1139 
1140   // Authenticate Data address, using key A [Armv8.3].
1141   void autda(const Register& xd, const Register& xn);
1142 
1143   // Authenticate Data address, using key A and a modifier of zero [Armv8.3].
1144   void autdza(const Register& xd);
1145 
1146   // Authenticate Data address, using key B [Armv8.3].
1147   void autdb(const Register& xd, const Register& xn);
1148 
1149   // Authenticate Data address, using key B and a modifier of zero [Armv8.3].
1150   void autdzb(const Register& xd);
1151 
1152   // Strip Pointer Authentication Code of Data address [Armv8.3].
1153   void xpacd(const Register& xd);
1154 
1155   // Strip Pointer Authentication Code of Instruction address [Armv8.3].
1156   void xpaci(const Register& xd);
1157 
1158   // Strip Pointer Authentication Code of Instruction address in LR [Armv8.3].
1159   void xpaclri();
1160 
1161   // Memory instructions.
1162 
1163   // Load integer or FP register.
1164   void ldr(const CPURegister& rt,
1165            const MemOperand& src,
1166            LoadStoreScalingOption option = PreferScaledOffset);
1167 
1168   // Store integer or FP register.
1169   void str(const CPURegister& rt,
1170            const MemOperand& dst,
1171            LoadStoreScalingOption option = PreferScaledOffset);
1172 
1173   // Load word with sign extension.
1174   void ldrsw(const Register& xt,
1175              const MemOperand& src,
1176              LoadStoreScalingOption option = PreferScaledOffset);
1177 
1178   // Load byte.
1179   void ldrb(const Register& rt,
1180             const MemOperand& src,
1181             LoadStoreScalingOption option = PreferScaledOffset);
1182 
1183   // Store byte.
1184   void strb(const Register& rt,
1185             const MemOperand& dst,
1186             LoadStoreScalingOption option = PreferScaledOffset);
1187 
1188   // Load byte with sign extension.
1189   void ldrsb(const Register& rt,
1190              const MemOperand& src,
1191              LoadStoreScalingOption option = PreferScaledOffset);
1192 
1193   // Load half-word.
1194   void ldrh(const Register& rt,
1195             const MemOperand& src,
1196             LoadStoreScalingOption option = PreferScaledOffset);
1197 
1198   // Store half-word.
1199   void strh(const Register& rt,
1200             const MemOperand& dst,
1201             LoadStoreScalingOption option = PreferScaledOffset);
1202 
1203   // Load half-word with sign extension.
1204   void ldrsh(const Register& rt,
1205              const MemOperand& src,
1206              LoadStoreScalingOption option = PreferScaledOffset);
1207 
1208   // Load integer or FP register (with unscaled offset).
1209   void ldur(const CPURegister& rt,
1210             const MemOperand& src,
1211             LoadStoreScalingOption option = PreferUnscaledOffset);
1212 
1213   // Store integer or FP register (with unscaled offset).
1214   void stur(const CPURegister& rt,
1215             const MemOperand& src,
1216             LoadStoreScalingOption option = PreferUnscaledOffset);
1217 
1218   // Load word with sign extension.
1219   void ldursw(const Register& xt,
1220               const MemOperand& src,
1221               LoadStoreScalingOption option = PreferUnscaledOffset);
1222 
1223   // Load byte (with unscaled offset).
1224   void ldurb(const Register& rt,
1225              const MemOperand& src,
1226              LoadStoreScalingOption option = PreferUnscaledOffset);
1227 
1228   // Store byte (with unscaled offset).
1229   void sturb(const Register& rt,
1230              const MemOperand& dst,
1231              LoadStoreScalingOption option = PreferUnscaledOffset);
1232 
1233   // Load byte with sign extension (and unscaled offset).
1234   void ldursb(const Register& rt,
1235               const MemOperand& src,
1236               LoadStoreScalingOption option = PreferUnscaledOffset);
1237 
1238   // Load half-word (with unscaled offset).
1239   void ldurh(const Register& rt,
1240              const MemOperand& src,
1241              LoadStoreScalingOption option = PreferUnscaledOffset);
1242 
1243   // Store half-word (with unscaled offset).
1244   void sturh(const Register& rt,
1245              const MemOperand& dst,
1246              LoadStoreScalingOption option = PreferUnscaledOffset);
1247 
1248   // Load half-word with sign extension (and unscaled offset).
1249   void ldursh(const Register& rt,
1250               const MemOperand& src,
1251               LoadStoreScalingOption option = PreferUnscaledOffset);
1252 
1253   // Load double-word with pointer authentication, using data key A and a
1254   // modifier of zero [Armv8.3].
1255   void ldraa(const Register& xt, const MemOperand& src);
1256 
1257   // Load double-word with pointer authentication, using data key B and a
1258   // modifier of zero [Armv8.3].
1259   void ldrab(const Register& xt, const MemOperand& src);
1260 
1261   // Load integer or FP register pair.
1262   void ldp(const CPURegister& rt,
1263            const CPURegister& rt2,
1264            const MemOperand& src);
1265 
1266   // Store integer or FP register pair.
1267   void stp(const CPURegister& rt,
1268            const CPURegister& rt2,
1269            const MemOperand& dst);
1270 
1271   // Load word pair with sign extension.
1272   void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src);
1273 
1274   // Load integer or FP register pair, non-temporal.
1275   void ldnp(const CPURegister& rt,
1276             const CPURegister& rt2,
1277             const MemOperand& src);
1278 
1279   // Store integer or FP register pair, non-temporal.
1280   void stnp(const CPURegister& rt,
1281             const CPURegister& rt2,
1282             const MemOperand& dst);
1283 
1284   // Load integer or FP register from literal pool.
1285   void ldr(const CPURegister& rt, RawLiteral* literal);
1286 
1287   // Load word with sign extension from literal pool.
1288   void ldrsw(const Register& xt, RawLiteral* literal);
1289 
1290   // Load integer or FP register from pc + imm19 << 2.
1291   void ldr(const CPURegister& rt, int64_t imm19);
1292 
1293   // Load word with sign extension from pc + imm19 << 2.
1294   void ldrsw(const Register& xt, int64_t imm19);
1295 
1296   // Store exclusive byte.
1297   void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1298 
1299   // Store exclusive half-word.
1300   void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1301 
1302   // Store exclusive register.
1303   void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1304 
1305   // Load exclusive byte.
1306   void ldxrb(const Register& rt, const MemOperand& src);
1307 
1308   // Load exclusive half-word.
1309   void ldxrh(const Register& rt, const MemOperand& src);
1310 
1311   // Load exclusive register.
1312   void ldxr(const Register& rt, const MemOperand& src);
1313 
1314   // Store exclusive register pair.
1315   void stxp(const Register& rs,
1316             const Register& rt,
1317             const Register& rt2,
1318             const MemOperand& dst);
1319 
1320   // Load exclusive register pair.
1321   void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1322 
1323   // Store-release exclusive byte.
1324   void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1325 
1326   // Store-release exclusive half-word.
1327   void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1328 
1329   // Store-release exclusive register.
1330   void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1331 
1332   // Load-acquire exclusive byte.
1333   void ldaxrb(const Register& rt, const MemOperand& src);
1334 
1335   // Load-acquire exclusive half-word.
1336   void ldaxrh(const Register& rt, const MemOperand& src);
1337 
1338   // Load-acquire exclusive register.
1339   void ldaxr(const Register& rt, const MemOperand& src);
1340 
1341   // Store-release exclusive register pair.
1342   void stlxp(const Register& rs,
1343              const Register& rt,
1344              const Register& rt2,
1345              const MemOperand& dst);
1346 
1347   // Load-acquire exclusive register pair.
1348   void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1349 
1350   // Store-release byte.
1351   void stlrb(const Register& rt, const MemOperand& dst);
1352 
1353   // Store-release half-word.
1354   void stlrh(const Register& rt, const MemOperand& dst);
1355 
1356   // Store-release register.
1357   void stlr(const Register& rt, const MemOperand& dst);
1358 
1359   // Load-acquire byte.
1360   void ldarb(const Register& rt, const MemOperand& src);
1361 
1362   // Load-acquire half-word.
1363   void ldarh(const Register& rt, const MemOperand& src);
1364 
1365   // Load-acquire register.
1366   void ldar(const Register& rt, const MemOperand& src);
1367 
1368   // Store LORelease byte [Armv8.1].
1369   void stllrb(const Register& rt, const MemOperand& dst);
1370 
1371   // Store LORelease half-word [Armv8.1].
1372   void stllrh(const Register& rt, const MemOperand& dst);
1373 
1374   // Store LORelease register [Armv8.1].
1375   void stllr(const Register& rt, const MemOperand& dst);
1376 
1377   // Load LORelease byte [Armv8.1].
1378   void ldlarb(const Register& rt, const MemOperand& src);
1379 
1380   // Load LORelease half-word [Armv8.1].
1381   void ldlarh(const Register& rt, const MemOperand& src);
1382 
1383   // Load LORelease register [Armv8.1].
1384   void ldlar(const Register& rt, const MemOperand& src);
1385 
1386   // Compare and Swap word or doubleword in memory [Armv8.1].
1387   void cas(const Register& rs, const Register& rt, const MemOperand& src);
1388 
1389   // Compare and Swap word or doubleword in memory [Armv8.1].
1390   void casa(const Register& rs, const Register& rt, const MemOperand& src);
1391 
1392   // Compare and Swap word or doubleword in memory [Armv8.1].
1393   void casl(const Register& rs, const Register& rt, const MemOperand& src);
1394 
1395   // Compare and Swap word or doubleword in memory [Armv8.1].
1396   void casal(const Register& rs, const Register& rt, const MemOperand& src);
1397 
1398   // Compare and Swap byte in memory [Armv8.1].
1399   void casb(const Register& rs, const Register& rt, const MemOperand& src);
1400 
1401   // Compare and Swap byte in memory [Armv8.1].
1402   void casab(const Register& rs, const Register& rt, const MemOperand& src);
1403 
1404   // Compare and Swap byte in memory [Armv8.1].
1405   void caslb(const Register& rs, const Register& rt, const MemOperand& src);
1406 
1407   // Compare and Swap byte in memory [Armv8.1].
1408   void casalb(const Register& rs, const Register& rt, const MemOperand& src);
1409 
1410   // Compare and Swap halfword in memory [Armv8.1].
1411   void cash(const Register& rs, const Register& rt, const MemOperand& src);
1412 
1413   // Compare and Swap halfword in memory [Armv8.1].
1414   void casah(const Register& rs, const Register& rt, const MemOperand& src);
1415 
1416   // Compare and Swap halfword in memory [Armv8.1].
1417   void caslh(const Register& rs, const Register& rt, const MemOperand& src);
1418 
1419   // Compare and Swap halfword in memory [Armv8.1].
1420   void casalh(const Register& rs, const Register& rt, const MemOperand& src);
1421 
1422   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1423   void casp(const Register& rs,
1424             const Register& rs2,
1425             const Register& rt,
1426             const Register& rt2,
1427             const MemOperand& src);
1428 
1429   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1430   void caspa(const Register& rs,
1431              const Register& rs2,
1432              const Register& rt,
1433              const Register& rt2,
1434              const MemOperand& src);
1435 
1436   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1437   void caspl(const Register& rs,
1438              const Register& rs2,
1439              const Register& rt,
1440              const Register& rt2,
1441              const MemOperand& src);
1442 
1443   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1444   void caspal(const Register& rs,
1445               const Register& rs2,
1446               const Register& rt,
1447               const Register& rt2,
1448               const MemOperand& src);
1449 
1450   // Store-release byte (with unscaled offset) [Armv8.4].
1451   void stlurb(const Register& rt, const MemOperand& dst);
1452 
1453   // Load-acquire RCpc Register byte (with unscaled offset) [Armv8.4].
1454   void ldapurb(const Register& rt, const MemOperand& src);
1455 
1456   // Load-acquire RCpc Register signed byte (with unscaled offset) [Armv8.4].
1457   void ldapursb(const Register& rt, const MemOperand& src);
1458 
1459   // Store-release half-word (with unscaled offset) [Armv8.4].
1460   void stlurh(const Register& rt, const MemOperand& dst);
1461 
1462   // Load-acquire RCpc Register half-word (with unscaled offset) [Armv8.4].
1463   void ldapurh(const Register& rt, const MemOperand& src);
1464 
1465   // Load-acquire RCpc Register signed half-word (with unscaled offset)
1466   // [Armv8.4].
1467   void ldapursh(const Register& rt, const MemOperand& src);
1468 
1469   // Store-release word or double-word (with unscaled offset) [Armv8.4].
1470   void stlur(const Register& rt, const MemOperand& dst);
1471 
1472   // Load-acquire RCpc Register word or double-word (with unscaled offset)
1473   // [Armv8.4].
1474   void ldapur(const Register& rt, const MemOperand& src);
1475 
1476   // Load-acquire RCpc Register signed word (with unscaled offset) [Armv8.4].
1477   void ldapursw(const Register& xt, const MemOperand& src);
1478 
1479   // Atomic add on byte in memory [Armv8.1]
1480   void ldaddb(const Register& rs, const Register& rt, const MemOperand& src);
1481 
1482   // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1]
1483   void ldaddab(const Register& rs, const Register& rt, const MemOperand& src);
1484 
1485   // Atomic add on byte in memory, with Store-release semantics [Armv8.1]
1486   void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src);
1487 
1488   // Atomic add on byte in memory, with Load-acquire and Store-release semantics
1489   // [Armv8.1]
1490   void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src);
1491 
1492   // Atomic add on halfword in memory [Armv8.1]
1493   void ldaddh(const Register& rs, const Register& rt, const MemOperand& src);
1494 
1495   // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1]
1496   void ldaddah(const Register& rs, const Register& rt, const MemOperand& src);
1497 
1498   // Atomic add on halfword in memory, with Store-release semantics [Armv8.1]
1499   void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src);
1500 
1501   // Atomic add on halfword in memory, with Load-acquire and Store-release
1502   // semantics [Armv8.1]
1503   void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src);
1504 
1505   // Atomic add on word or doubleword in memory [Armv8.1]
1506   void ldadd(const Register& rs, const Register& rt, const MemOperand& src);
1507 
1508   // Atomic add on word or doubleword in memory, with Load-acquire semantics
1509   // [Armv8.1]
1510   void ldadda(const Register& rs, const Register& rt, const MemOperand& src);
1511 
1512   // Atomic add on word or doubleword in memory, with Store-release semantics
1513   // [Armv8.1]
1514   void ldaddl(const Register& rs, const Register& rt, const MemOperand& src);
1515 
1516   // Atomic add on word or doubleword in memory, with Load-acquire and
1517   // Store-release semantics [Armv8.1]
1518   void ldaddal(const Register& rs, const Register& rt, const MemOperand& src);
1519 
1520   // Atomic bit clear on byte in memory [Armv8.1]
1521   void ldclrb(const Register& rs, const Register& rt, const MemOperand& src);
1522 
1523   // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1]
1524   void ldclrab(const Register& rs, const Register& rt, const MemOperand& src);
1525 
1526   // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1]
1527   void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src);
1528 
1529   // Atomic bit clear on byte in memory, with Load-acquire and Store-release
1530   // semantics [Armv8.1]
1531   void ldclralb(const Register& rs, const Register& rt, const MemOperand& src);
1532 
1533   // Atomic bit clear on halfword in memory [Armv8.1]
1534   void ldclrh(const Register& rs, const Register& rt, const MemOperand& src);
1535 
1536   // Atomic bit clear on halfword in memory, with Load-acquire semantics
1537   // [Armv8.1]
1538   void ldclrah(const Register& rs, const Register& rt, const MemOperand& src);
1539 
1540   // Atomic bit clear on halfword in memory, with Store-release semantics
1541   // [Armv8.1]
1542   void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src);
1543 
1544   // Atomic bit clear on halfword in memory, with Load-acquire and Store-release
1545   // semantics [Armv8.1]
1546   void ldclralh(const Register& rs, const Register& rt, const MemOperand& src);
1547 
1548   // Atomic bit clear on word or doubleword in memory [Armv8.1]
1549   void ldclr(const Register& rs, const Register& rt, const MemOperand& src);
1550 
1551   // Atomic bit clear on word or doubleword in memory, with Load-acquire
1552   // semantics [Armv8.1]
1553   void ldclra(const Register& rs, const Register& rt, const MemOperand& src);
1554 
1555   // Atomic bit clear on word or doubleword in memory, with Store-release
1556   // semantics [Armv8.1]
1557   void ldclrl(const Register& rs, const Register& rt, const MemOperand& src);
1558 
1559   // Atomic bit clear on word or doubleword in memory, with Load-acquire and
1560   // Store-release semantics [Armv8.1]
1561   void ldclral(const Register& rs, const Register& rt, const MemOperand& src);
1562 
1563   // Atomic exclusive OR on byte in memory [Armv8.1]
1564   void ldeorb(const Register& rs, const Register& rt, const MemOperand& src);
1565 
1566   // Atomic exclusive OR on byte in memory, with Load-acquire semantics
1567   // [Armv8.1]
1568   void ldeorab(const Register& rs, const Register& rt, const MemOperand& src);
1569 
1570   // Atomic exclusive OR on byte in memory, with Store-release semantics
1571   // [Armv8.1]
1572   void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src);
1573 
1574   // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release
1575   // semantics [Armv8.1]
1576   void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src);
1577 
1578   // Atomic exclusive OR on halfword in memory [Armv8.1]
1579   void ldeorh(const Register& rs, const Register& rt, const MemOperand& src);
1580 
1581   // Atomic exclusive OR on halfword in memory, with Load-acquire semantics
1582   // [Armv8.1]
1583   void ldeorah(const Register& rs, const Register& rt, const MemOperand& src);
1584 
1585   // Atomic exclusive OR on halfword in memory, with Store-release semantics
1586   // [Armv8.1]
1587   void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src);
1588 
1589   // Atomic exclusive OR on halfword in memory, with Load-acquire and
1590   // Store-release semantics [Armv8.1]
1591   void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src);
1592 
1593   // Atomic exclusive OR on word or doubleword in memory [Armv8.1]
1594   void ldeor(const Register& rs, const Register& rt, const MemOperand& src);
1595 
1596   // Atomic exclusive OR on word or doubleword in memory, with Load-acquire
1597   // semantics [Armv8.1]
1598   void ldeora(const Register& rs, const Register& rt, const MemOperand& src);
1599 
1600   // Atomic exclusive OR on word or doubleword in memory, with Store-release
1601   // semantics [Armv8.1]
1602   void ldeorl(const Register& rs, const Register& rt, const MemOperand& src);
1603 
1604   // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and
1605   // Store-release semantics [Armv8.1]
1606   void ldeoral(const Register& rs, const Register& rt, const MemOperand& src);
1607 
1608   // Atomic bit set on byte in memory [Armv8.1]
1609   void ldsetb(const Register& rs, const Register& rt, const MemOperand& src);
1610 
1611   // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1]
1612   void ldsetab(const Register& rs, const Register& rt, const MemOperand& src);
1613 
1614   // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1]
1615   void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src);
1616 
1617   // Atomic bit set on byte in memory, with Load-acquire and Store-release
1618   // semantics [Armv8.1]
1619   void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src);
1620 
1621   // Atomic bit set on halfword in memory [Armv8.1]
1622   void ldseth(const Register& rs, const Register& rt, const MemOperand& src);
1623 
1624   // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1]
1625   void ldsetah(const Register& rs, const Register& rt, const MemOperand& src);
1626 
1627   // Atomic bit set on halfword in memory, with Store-release semantics
1628   // [Armv8.1]
1629   void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src);
1630 
1631   // Atomic bit set on halfword in memory, with Load-acquire and Store-release
1632   // semantics [Armv8.1]
1633   void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src);
1634 
1635   // Atomic bit set on word or doubleword in memory [Armv8.1]
1636   void ldset(const Register& rs, const Register& rt, const MemOperand& src);
1637 
1638   // Atomic bit set on word or doubleword in memory, with Load-acquire semantics
1639   // [Armv8.1]
1640   void ldseta(const Register& rs, const Register& rt, const MemOperand& src);
1641 
1642   // Atomic bit set on word or doubleword in memory, with Store-release
1643   // semantics [Armv8.1]
1644   void ldsetl(const Register& rs, const Register& rt, const MemOperand& src);
1645 
1646   // Atomic bit set on word or doubleword in memory, with Load-acquire and
1647   // Store-release semantics [Armv8.1]
1648   void ldsetal(const Register& rs, const Register& rt, const MemOperand& src);
1649 
1650   // Atomic signed maximum on byte in memory [Armv8.1]
1651   void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src);
1652 
1653   // Atomic signed maximum on byte in memory, with Load-acquire semantics
1654   // [Armv8.1]
1655   void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src);
1656 
1657   // Atomic signed maximum on byte in memory, with Store-release semantics
1658   // [Armv8.1]
1659   void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1660 
1661   // Atomic signed maximum on byte in memory, with Load-acquire and
1662   // Store-release semantics [Armv8.1]
1663   void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1664 
1665   // Atomic signed maximum on halfword in memory [Armv8.1]
1666   void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src);
1667 
1668   // Atomic signed maximum on halfword in memory, with Load-acquire semantics
1669   // [Armv8.1]
1670   void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src);
1671 
1672   // Atomic signed maximum on halfword in memory, with Store-release semantics
1673   // [Armv8.1]
1674   void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1675 
1676   // Atomic signed maximum on halfword in memory, with Load-acquire and
1677   // Store-release semantics [Armv8.1]
1678   void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1679 
1680   // Atomic signed maximum on word or doubleword in memory [Armv8.1]
1681   void ldsmax(const Register& rs, const Register& rt, const MemOperand& src);
1682 
1683   // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1684   // semantics [Armv8.1]
1685   void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src);
1686 
1687   // Atomic signed maximum on word or doubleword in memory, with Store-release
1688   // semantics [Armv8.1]
1689   void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src);
1690 
1691   // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1692   // and Store-release semantics [Armv8.1]
1693   void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src);
1694 
1695   // Atomic signed minimum on byte in memory [Armv8.1]
1696   void ldsminb(const Register& rs, const Register& rt, const MemOperand& src);
1697 
1698   // Atomic signed minimum on byte in memory, with Load-acquire semantics
1699   // [Armv8.1]
1700   void ldsminab(const Register& rs, const Register& rt, const MemOperand& src);
1701 
1702   // Atomic signed minimum on byte in memory, with Store-release semantics
1703   // [Armv8.1]
1704   void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src);
1705 
1706   // Atomic signed minimum on byte in memory, with Load-acquire and
1707   // Store-release semantics [Armv8.1]
1708   void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src);
1709 
1710   // Atomic signed minimum on halfword in memory [Armv8.1]
1711   void ldsminh(const Register& rs, const Register& rt, const MemOperand& src);
1712 
1713   // Atomic signed minimum on halfword in memory, with Load-acquire semantics
1714   // [Armv8.1]
1715   void ldsminah(const Register& rs, const Register& rt, const MemOperand& src);
1716 
1717   // Atomic signed minimum on halfword in memory, with Store-release semantics
1718   // [Armv8.1]
1719   void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src);
1720 
1721   // Atomic signed minimum on halfword in memory, with Load-acquire and
1722   // Store-release semantics [Armv8.1]
1723   void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src);
1724 
1725   // Atomic signed minimum on word or doubleword in memory [Armv8.1]
1726   void ldsmin(const Register& rs, const Register& rt, const MemOperand& src);
1727 
1728   // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1729   // semantics [Armv8.1]
1730   void ldsmina(const Register& rs, const Register& rt, const MemOperand& src);
1731 
1732   // Atomic signed minimum on word or doubleword in memory, with Store-release
1733   // semantics [Armv8.1]
1734   void ldsminl(const Register& rs, const Register& rt, const MemOperand& src);
1735 
1736   // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1737   // and Store-release semantics [Armv8.1]
1738   void ldsminal(const Register& rs, const Register& rt, const MemOperand& src);
1739 
1740   // Atomic unsigned maximum on byte in memory [Armv8.1]
1741   void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src);
1742 
1743   // Atomic unsigned maximum on byte in memory, with Load-acquire semantics
1744   // [Armv8.1]
1745   void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src);
1746 
1747   // Atomic unsigned maximum on byte in memory, with Store-release semantics
1748   // [Armv8.1]
1749   void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1750 
1751   // Atomic unsigned maximum on byte in memory, with Load-acquire and
1752   // Store-release semantics [Armv8.1]
1753   void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1754 
1755   // Atomic unsigned maximum on halfword in memory [Armv8.1]
1756   void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src);
1757 
1758   // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics
1759   // [Armv8.1]
1760   void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src);
1761 
1762   // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1763   // [Armv8.1]
1764   void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1765 
1766   // Atomic unsigned maximum on halfword in memory, with Load-acquire and
1767   // Store-release semantics [Armv8.1]
1768   void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1769 
1770   // Atomic unsigned maximum on word or doubleword in memory [Armv8.1]
1771   void ldumax(const Register& rs, const Register& rt, const MemOperand& src);
1772 
1773   // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1774   // semantics [Armv8.1]
1775   void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src);
1776 
1777   // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1778   // semantics [Armv8.1]
1779   void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src);
1780 
1781   // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1782   // and Store-release semantics [Armv8.1]
1783   void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src);
1784 
1785   // Atomic unsigned minimum on byte in memory [Armv8.1]
1786   void lduminb(const Register& rs, const Register& rt, const MemOperand& src);
1787 
1788   // Atomic unsigned minimum on byte in memory, with Load-acquire semantics
1789   // [Armv8.1]
1790   void lduminab(const Register& rs, const Register& rt, const MemOperand& src);
1791 
1792   // Atomic unsigned minimum on byte in memory, with Store-release semantics
1793   // [Armv8.1]
1794   void lduminlb(const Register& rs, const Register& rt, const MemOperand& src);
1795 
1796   // Atomic unsigned minimum on byte in memory, with Load-acquire and
1797   // Store-release semantics [Armv8.1]
1798   void lduminalb(const Register& rs, const Register& rt, const MemOperand& src);
1799 
1800   // Atomic unsigned minimum on halfword in memory [Armv8.1]
1801   void lduminh(const Register& rs, const Register& rt, const MemOperand& src);
1802 
1803   // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics
1804   // [Armv8.1]
1805   void lduminah(const Register& rs, const Register& rt, const MemOperand& src);
1806 
1807   // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1808   // [Armv8.1]
1809   void lduminlh(const Register& rs, const Register& rt, const MemOperand& src);
1810 
1811   // Atomic unsigned minimum on halfword in memory, with Load-acquire and
1812   // Store-release semantics [Armv8.1]
1813   void lduminalh(const Register& rs, const Register& rt, const MemOperand& src);
1814 
1815   // Atomic unsigned minimum on word or doubleword in memory [Armv8.1]
1816   void ldumin(const Register& rs, const Register& rt, const MemOperand& src);
1817 
1818   // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1819   // semantics [Armv8.1]
1820   void ldumina(const Register& rs, const Register& rt, const MemOperand& src);
1821 
1822   // Atomic unsigned minimum on word or doubleword in memory, with Store-release
1823   // semantics [Armv8.1]
1824   void lduminl(const Register& rs, const Register& rt, const MemOperand& src);
1825 
1826   // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1827   // and Store-release semantics [Armv8.1]
1828   void lduminal(const Register& rs, const Register& rt, const MemOperand& src);
1829 
1830   // Atomic add on byte in memory, without return. [Armv8.1]
1831   void staddb(const Register& rs, const MemOperand& src);
1832 
1833   // Atomic add on byte in memory, with Store-release semantics and without
1834   // return. [Armv8.1]
1835   void staddlb(const Register& rs, const MemOperand& src);
1836 
1837   // Atomic add on halfword in memory, without return. [Armv8.1]
1838   void staddh(const Register& rs, const MemOperand& src);
1839 
1840   // Atomic add on halfword in memory, with Store-release semantics and without
1841   // return. [Armv8.1]
1842   void staddlh(const Register& rs, const MemOperand& src);
1843 
1844   // Atomic add on word or doubleword in memory, without return. [Armv8.1]
1845   void stadd(const Register& rs, const MemOperand& src);
1846 
1847   // Atomic add on word or doubleword in memory, with Store-release semantics
1848   // and without return. [Armv8.1]
1849   void staddl(const Register& rs, const MemOperand& src);
1850 
1851   // Atomic bit clear on byte in memory, without return. [Armv8.1]
1852   void stclrb(const Register& rs, const MemOperand& src);
1853 
1854   // Atomic bit clear on byte in memory, with Store-release semantics and
1855   // without return. [Armv8.1]
1856   void stclrlb(const Register& rs, const MemOperand& src);
1857 
1858   // Atomic bit clear on halfword in memory, without return. [Armv8.1]
1859   void stclrh(const Register& rs, const MemOperand& src);
1860 
1861   // Atomic bit clear on halfword in memory, with Store-release semantics and
1862   // without return. [Armv8.1]
1863   void stclrlh(const Register& rs, const MemOperand& src);
1864 
1865   // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1]
1866   void stclr(const Register& rs, const MemOperand& src);
1867 
1868   // Atomic bit clear on word or doubleword in memory, with Store-release
1869   // semantics and without return. [Armv8.1]
1870   void stclrl(const Register& rs, const MemOperand& src);
1871 
1872   // Atomic exclusive OR on byte in memory, without return. [Armv8.1]
1873   void steorb(const Register& rs, const MemOperand& src);
1874 
1875   // Atomic exclusive OR on byte in memory, with Store-release semantics and
1876   // without return. [Armv8.1]
1877   void steorlb(const Register& rs, const MemOperand& src);
1878 
1879   // Atomic exclusive OR on halfword in memory, without return. [Armv8.1]
1880   void steorh(const Register& rs, const MemOperand& src);
1881 
1882   // Atomic exclusive OR on halfword in memory, with Store-release semantics
1883   // and without return. [Armv8.1]
1884   void steorlh(const Register& rs, const MemOperand& src);
1885 
1886   // Atomic exclusive OR on word or doubleword in memory, without return.
1887   // [Armv8.1]
1888   void steor(const Register& rs, const MemOperand& src);
1889 
1890   // Atomic exclusive OR on word or doubleword in memory, with Store-release
1891   // semantics and without return. [Armv8.1]
1892   void steorl(const Register& rs, const MemOperand& src);
1893 
1894   // Atomic bit set on byte in memory, without return. [Armv8.1]
1895   void stsetb(const Register& rs, const MemOperand& src);
1896 
1897   // Atomic bit set on byte in memory, with Store-release semantics and without
1898   // return. [Armv8.1]
1899   void stsetlb(const Register& rs, const MemOperand& src);
1900 
1901   // Atomic bit set on halfword in memory, without return. [Armv8.1]
1902   void stseth(const Register& rs, const MemOperand& src);
1903 
1904   // Atomic bit set on halfword in memory, with Store-release semantics and
1905   // without return. [Armv8.1]
1906   void stsetlh(const Register& rs, const MemOperand& src);
1907 
1908   // Atomic bit set on word or doubleword in memory, without return. [Armv8.1]
1909   void stset(const Register& rs, const MemOperand& src);
1910 
1911   // Atomic bit set on word or doubleword in memory, with Store-release
1912   // semantics and without return. [Armv8.1]
1913   void stsetl(const Register& rs, const MemOperand& src);
1914 
1915   // Atomic signed maximum on byte in memory, without return. [Armv8.1]
1916   void stsmaxb(const Register& rs, const MemOperand& src);
1917 
1918   // Atomic signed maximum on byte in memory, with Store-release semantics and
1919   // without return. [Armv8.1]
1920   void stsmaxlb(const Register& rs, const MemOperand& src);
1921 
1922   // Atomic signed maximum on halfword in memory, without return. [Armv8.1]
1923   void stsmaxh(const Register& rs, const MemOperand& src);
1924 
1925   // Atomic signed maximum on halfword in memory, with Store-release semantics
1926   // and without return. [Armv8.1]
1927   void stsmaxlh(const Register& rs, const MemOperand& src);
1928 
1929   // Atomic signed maximum on word or doubleword in memory, without return.
1930   // [Armv8.1]
1931   void stsmax(const Register& rs, const MemOperand& src);
1932 
1933   // Atomic signed maximum on word or doubleword in memory, with Store-release
1934   // semantics and without return. [Armv8.1]
1935   void stsmaxl(const Register& rs, const MemOperand& src);
1936 
1937   // Atomic signed minimum on byte in memory, without return. [Armv8.1]
1938   void stsminb(const Register& rs, const MemOperand& src);
1939 
1940   // Atomic signed minimum on byte in memory, with Store-release semantics and
1941   // without return. [Armv8.1]
1942   void stsminlb(const Register& rs, const MemOperand& src);
1943 
1944   // Atomic signed minimum on halfword in memory, without return. [Armv8.1]
1945   void stsminh(const Register& rs, const MemOperand& src);
1946 
1947   // Atomic signed minimum on halfword in memory, with Store-release semantics
1948   // and without return. [Armv8.1]
1949   void stsminlh(const Register& rs, const MemOperand& src);
1950 
1951   // Atomic signed minimum on word or doubleword in memory, without return.
1952   // [Armv8.1]
1953   void stsmin(const Register& rs, const MemOperand& src);
1954 
1955   // Atomic signed minimum on word or doubleword in memory, with Store-release
1956   // semantics and without return. semantics [Armv8.1]
1957   void stsminl(const Register& rs, const MemOperand& src);
1958 
1959   // Atomic unsigned maximum on byte in memory, without return. [Armv8.1]
1960   void stumaxb(const Register& rs, const MemOperand& src);
1961 
1962   // Atomic unsigned maximum on byte in memory, with Store-release semantics and
1963   // without return. [Armv8.1]
1964   void stumaxlb(const Register& rs, const MemOperand& src);
1965 
1966   // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1]
1967   void stumaxh(const Register& rs, const MemOperand& src);
1968 
1969   // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1970   // and without return. [Armv8.1]
1971   void stumaxlh(const Register& rs, const MemOperand& src);
1972 
1973   // Atomic unsigned maximum on word or doubleword in memory, without return.
1974   // [Armv8.1]
1975   void stumax(const Register& rs, const MemOperand& src);
1976 
1977   // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1978   // semantics and without return. [Armv8.1]
1979   void stumaxl(const Register& rs, const MemOperand& src);
1980 
1981   // Atomic unsigned minimum on byte in memory, without return. [Armv8.1]
1982   void stuminb(const Register& rs, const MemOperand& src);
1983 
1984   // Atomic unsigned minimum on byte in memory, with Store-release semantics and
1985   // without return. [Armv8.1]
1986   void stuminlb(const Register& rs, const MemOperand& src);
1987 
1988   // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1]
1989   void stuminh(const Register& rs, const MemOperand& src);
1990 
1991   // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1992   // and without return. [Armv8.1]
1993   void stuminlh(const Register& rs, const MemOperand& src);
1994 
1995   // Atomic unsigned minimum on word or doubleword in memory, without return.
1996   // [Armv8.1]
1997   void stumin(const Register& rs, const MemOperand& src);
1998 
1999   // Atomic unsigned minimum on word or doubleword in memory, with Store-release
2000   // semantics and without return. [Armv8.1]
2001   void stuminl(const Register& rs, const MemOperand& src);
2002 
2003   // Swap byte in memory [Armv8.1]
2004   void swpb(const Register& rs, const Register& rt, const MemOperand& src);
2005 
2006   // Swap byte in memory, with Load-acquire semantics [Armv8.1]
2007   void swpab(const Register& rs, const Register& rt, const MemOperand& src);
2008 
2009   // Swap byte in memory, with Store-release semantics [Armv8.1]
2010   void swplb(const Register& rs, const Register& rt, const MemOperand& src);
2011 
2012   // Swap byte in memory, with Load-acquire and Store-release semantics
2013   // [Armv8.1]
2014   void swpalb(const Register& rs, const Register& rt, const MemOperand& src);
2015 
2016   // Swap halfword in memory [Armv8.1]
2017   void swph(const Register& rs, const Register& rt, const MemOperand& src);
2018 
2019   // Swap halfword in memory, with Load-acquire semantics [Armv8.1]
2020   void swpah(const Register& rs, const Register& rt, const MemOperand& src);
2021 
2022   // Swap halfword in memory, with Store-release semantics [Armv8.1]
2023   void swplh(const Register& rs, const Register& rt, const MemOperand& src);
2024 
2025   // Swap halfword in memory, with Load-acquire and Store-release semantics
2026   // [Armv8.1]
2027   void swpalh(const Register& rs, const Register& rt, const MemOperand& src);
2028 
2029   // Swap word or doubleword in memory [Armv8.1]
2030   void swp(const Register& rs, const Register& rt, const MemOperand& src);
2031 
2032   // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1]
2033   void swpa(const Register& rs, const Register& rt, const MemOperand& src);
2034 
2035   // Swap word or doubleword in memory, with Store-release semantics [Armv8.1]
2036   void swpl(const Register& rs, const Register& rt, const MemOperand& src);
2037 
2038   // Swap word or doubleword in memory, with Load-acquire and Store-release
2039   // semantics [Armv8.1]
2040   void swpal(const Register& rs, const Register& rt, const MemOperand& src);
2041 
2042   // Load-Acquire RCpc Register byte [Armv8.3]
2043   void ldaprb(const Register& rt, const MemOperand& src);
2044 
2045   // Load-Acquire RCpc Register halfword [Armv8.3]
2046   void ldaprh(const Register& rt, const MemOperand& src);
2047 
2048   // Load-Acquire RCpc Register word or doubleword [Armv8.3]
2049   void ldapr(const Register& rt, const MemOperand& src);
2050 
2051   // Prefetch memory.
2052   void prfm(PrefetchOperation op,
2053             const MemOperand& addr,
2054             LoadStoreScalingOption option = PreferScaledOffset);
2055 
2056   // Prefetch memory (with unscaled offset).
2057   void prfum(PrefetchOperation op,
2058              const MemOperand& addr,
2059              LoadStoreScalingOption option = PreferUnscaledOffset);
2060 
2061   // Prefetch memory in the literal pool.
2062   void prfm(PrefetchOperation op, RawLiteral* literal);
2063 
2064   // Prefetch from pc + imm19 << 2.
2065   void prfm(PrefetchOperation op, int64_t imm19);
2066 
2067   // Prefetch memory (allowing unallocated hints).
2068   void prfm(int op,
2069             const MemOperand& addr,
2070             LoadStoreScalingOption option = PreferScaledOffset);
2071 
2072   // Prefetch memory (with unscaled offset, allowing unallocated hints).
2073   void prfum(int op,
2074              const MemOperand& addr,
2075              LoadStoreScalingOption option = PreferUnscaledOffset);
2076 
2077   // Prefetch memory in the literal pool (allowing unallocated hints).
2078   void prfm(int op, RawLiteral* literal);
2079 
2080   // Prefetch from pc + imm19 << 2 (allowing unallocated hints).
2081   void prfm(int op, int64_t imm19);
2082 
2083   // Move instructions. The default shift of -1 indicates that the move
2084   // instruction will calculate an appropriate 16-bit immediate and left shift
2085   // that is equal to the 64-bit immediate argument. If an explicit left shift
2086   // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
2087   //
2088   // For movk, an explicit shift can be used to indicate which half word should
2089   // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
2090   // half word with zero, whereas movk(x0, 0, 48) will overwrite the
2091   // most-significant.
2092 
2093   // Move immediate and keep.
2094   void movk(const Register& rd, uint64_t imm, int shift = -1) {
2095     MoveWide(rd, imm, shift, MOVK);
2096   }
2097 
2098   // Move inverted immediate.
2099   void movn(const Register& rd, uint64_t imm, int shift = -1) {
2100     MoveWide(rd, imm, shift, MOVN);
2101   }
2102 
2103   // Move immediate.
2104   void movz(const Register& rd, uint64_t imm, int shift = -1) {
2105     MoveWide(rd, imm, shift, MOVZ);
2106   }
2107 
2108   // Move immediate, aliases for movz, movn, orr.
mov(const Register & rd,uint64_t imm)2109   void mov(const Register& rd, uint64_t imm) {
2110     if (!OneInstrMoveImmediateHelper(this, rd, imm)) {
2111       VIXL_UNIMPLEMENTED();
2112     }
2113   }
2114 
2115   // Misc instructions.
2116 
2117   // Monitor debug-mode breakpoint.
2118   void brk(int code);
2119 
2120   // Halting debug-mode breakpoint.
2121   void hlt(int code);
2122 
2123   // Generate exception targeting EL1.
2124   void svc(int code);
2125 
2126   // Generate undefined instruction exception.
2127   void udf(int code);
2128 
2129   // Move register to register.
2130   void mov(const Register& rd, const Register& rn);
2131 
2132   // Move inverted operand to register.
2133   void mvn(const Register& rd, const Operand& operand);
2134 
2135   // System instructions.
2136 
2137   // Move to register from system register.
2138   void mrs(const Register& xt, SystemRegister sysreg);
2139 
2140   // Move from register to system register.
2141   void msr(SystemRegister sysreg, const Register& xt);
2142 
2143   // Invert carry flag [Armv8.4].
2144   void cfinv();
2145 
2146   // Convert floating-point condition flags from alternative format to Arm
2147   // format [Armv8.5].
2148   void xaflag();
2149 
2150   // Convert floating-point condition flags from Arm format to alternative
2151   // format [Armv8.5].
2152   void axflag();
2153 
2154   // System instruction.
2155   void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr);
2156 
2157   // System instruction with pre-encoded op (op1:crn:crm:op2).
2158   void sys(int op, const Register& xt = xzr);
2159 
2160   // System data cache operation.
2161   void dc(DataCacheOp op, const Register& rt);
2162 
2163   // System instruction cache operation.
2164   void ic(InstructionCacheOp op, const Register& rt);
2165 
2166   // System hint (named type).
2167   void hint(SystemHint code);
2168 
2169   // System hint (numbered type).
2170   void hint(int imm7);
2171 
2172   // Clear exclusive monitor.
2173   void clrex(int imm4 = 0xf);
2174 
2175   // Data memory barrier.
2176   void dmb(BarrierDomain domain, BarrierType type);
2177 
2178   // Data synchronization barrier.
2179   void dsb(BarrierDomain domain, BarrierType type);
2180 
2181   // Instruction synchronization barrier.
2182   void isb();
2183 
2184   // Error synchronization barrier.
2185   void esb();
2186 
2187   // Conditional speculation dependency barrier.
2188   void csdb();
2189 
2190   // No-op.
nop()2191   void nop() { hint(NOP); }
2192 
2193   // Branch target identification.
2194   void bti(BranchTargetIdentifier id);
2195 
2196   // FP and NEON instructions.
2197 
2198   // Move double precision immediate to FP register.
2199   void fmov(const VRegister& vd, double imm);
2200 
2201   // Move single precision immediate to FP register.
2202   void fmov(const VRegister& vd, float imm);
2203 
2204   // Move half precision immediate to FP register [Armv8.2].
2205   void fmov(const VRegister& vd, Float16 imm);
2206 
2207   // Move FP register to register.
2208   void fmov(const Register& rd, const VRegister& fn);
2209 
2210   // Move register to FP register.
2211   void fmov(const VRegister& vd, const Register& rn);
2212 
2213   // Move FP register to FP register.
2214   void fmov(const VRegister& vd, const VRegister& fn);
2215 
2216   // Move 64-bit register to top half of 128-bit FP register.
2217   void fmov(const VRegister& vd, int index, const Register& rn);
2218 
2219   // Move top half of 128-bit FP register to 64-bit register.
2220   void fmov(const Register& rd, const VRegister& vn, int index);
2221 
2222   // FP add.
2223   void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2224 
2225   // FP subtract.
2226   void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2227 
2228   // FP multiply.
2229   void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2230 
2231   // FP fused multiply-add.
2232   void fmadd(const VRegister& vd,
2233              const VRegister& vn,
2234              const VRegister& vm,
2235              const VRegister& va);
2236 
2237   // FP fused multiply-subtract.
2238   void fmsub(const VRegister& vd,
2239              const VRegister& vn,
2240              const VRegister& vm,
2241              const VRegister& va);
2242 
2243   // FP fused multiply-add and negate.
2244   void fnmadd(const VRegister& vd,
2245               const VRegister& vn,
2246               const VRegister& vm,
2247               const VRegister& va);
2248 
2249   // FP fused multiply-subtract and negate.
2250   void fnmsub(const VRegister& vd,
2251               const VRegister& vn,
2252               const VRegister& vm,
2253               const VRegister& va);
2254 
2255   // FP multiply-negate scalar.
2256   void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2257 
2258   // FP reciprocal exponent scalar.
2259   void frecpx(const VRegister& vd, const VRegister& vn);
2260 
2261   // FP divide.
2262   void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2263 
2264   // FP maximum.
2265   void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2266 
2267   // FP minimum.
2268   void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2269 
2270   // FP maximum number.
2271   void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2272 
2273   // FP minimum number.
2274   void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2275 
2276   // FP absolute.
2277   void fabs(const VRegister& vd, const VRegister& vn);
2278 
2279   // FP negate.
2280   void fneg(const VRegister& vd, const VRegister& vn);
2281 
2282   // FP square root.
2283   void fsqrt(const VRegister& vd, const VRegister& vn);
2284 
2285   // FP round to integer, nearest with ties to away.
2286   void frinta(const VRegister& vd, const VRegister& vn);
2287 
2288   // FP round to integer, implicit rounding.
2289   void frinti(const VRegister& vd, const VRegister& vn);
2290 
2291   // FP round to integer, toward minus infinity.
2292   void frintm(const VRegister& vd, const VRegister& vn);
2293 
2294   // FP round to integer, nearest with ties to even.
2295   void frintn(const VRegister& vd, const VRegister& vn);
2296 
2297   // FP round to integer, toward plus infinity.
2298   void frintp(const VRegister& vd, const VRegister& vn);
2299 
2300   // FP round to integer, exact, implicit rounding.
2301   void frintx(const VRegister& vd, const VRegister& vn);
2302 
2303   // FP round to integer, towards zero.
2304   void frintz(const VRegister& vd, const VRegister& vn);
2305 
2306   // FP round to 32-bit integer, exact, implicit rounding [Armv8.5].
2307   void frint32x(const VRegister& vd, const VRegister& vn);
2308 
2309   // FP round to 32-bit integer, towards zero [Armv8.5].
2310   void frint32z(const VRegister& vd, const VRegister& vn);
2311 
2312   // FP round to 64-bit integer, exact, implicit rounding [Armv8.5].
2313   void frint64x(const VRegister& vd, const VRegister& vn);
2314 
2315   // FP round to 64-bit integer, towards zero [Armv8.5].
2316   void frint64z(const VRegister& vd, const VRegister& vn);
2317 
2318   void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap);
2319 
2320   void FPCompareMacro(const VRegister& vn,
2321                       const VRegister& vm,
2322                       FPTrapFlags trap);
2323 
2324   // FP compare registers.
2325   void fcmp(const VRegister& vn, const VRegister& vm);
2326 
2327   // FP compare immediate.
2328   void fcmp(const VRegister& vn, double value);
2329 
2330   void FPCCompareMacro(const VRegister& vn,
2331                        const VRegister& vm,
2332                        StatusFlags nzcv,
2333                        Condition cond,
2334                        FPTrapFlags trap);
2335 
2336   // FP conditional compare.
2337   void fccmp(const VRegister& vn,
2338              const VRegister& vm,
2339              StatusFlags nzcv,
2340              Condition cond);
2341 
2342   // FP signaling compare registers.
2343   void fcmpe(const VRegister& vn, const VRegister& vm);
2344 
2345   // FP signaling compare immediate.
2346   void fcmpe(const VRegister& vn, double value);
2347 
2348   // FP conditional signaling compare.
2349   void fccmpe(const VRegister& vn,
2350               const VRegister& vm,
2351               StatusFlags nzcv,
2352               Condition cond);
2353 
2354   // FP conditional select.
2355   void fcsel(const VRegister& vd,
2356              const VRegister& vn,
2357              const VRegister& vm,
2358              Condition cond);
2359 
2360   // Common FP Convert functions.
2361   void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
2362   void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2363   void NEONFP16ConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2364 
2365   // FP convert between precisions.
2366   void fcvt(const VRegister& vd, const VRegister& vn);
2367 
2368   // FP convert to higher precision.
2369   void fcvtl(const VRegister& vd, const VRegister& vn);
2370 
2371   // FP convert to higher precision (second part).
2372   void fcvtl2(const VRegister& vd, const VRegister& vn);
2373 
2374   // FP convert to lower precision.
2375   void fcvtn(const VRegister& vd, const VRegister& vn);
2376 
2377   // FP convert to lower prevision (second part).
2378   void fcvtn2(const VRegister& vd, const VRegister& vn);
2379 
2380   // FP convert to lower precision, rounding to odd.
2381   void fcvtxn(const VRegister& vd, const VRegister& vn);
2382 
2383   // FP convert to lower precision, rounding to odd (second part).
2384   void fcvtxn2(const VRegister& vd, const VRegister& vn);
2385 
2386   // FP convert to signed integer, nearest with ties to away.
2387   void fcvtas(const Register& rd, const VRegister& vn);
2388 
2389   // FP convert to unsigned integer, nearest with ties to away.
2390   void fcvtau(const Register& rd, const VRegister& vn);
2391 
2392   // FP convert to signed integer, nearest with ties to away.
2393   void fcvtas(const VRegister& vd, const VRegister& vn);
2394 
2395   // FP convert to unsigned integer, nearest with ties to away.
2396   void fcvtau(const VRegister& vd, const VRegister& vn);
2397 
2398   // FP convert to signed integer, round towards -infinity.
2399   void fcvtms(const Register& rd, const VRegister& vn);
2400 
2401   // FP convert to unsigned integer, round towards -infinity.
2402   void fcvtmu(const Register& rd, const VRegister& vn);
2403 
2404   // FP convert to signed integer, round towards -infinity.
2405   void fcvtms(const VRegister& vd, const VRegister& vn);
2406 
2407   // FP convert to unsigned integer, round towards -infinity.
2408   void fcvtmu(const VRegister& vd, const VRegister& vn);
2409 
2410   // FP convert to signed integer, nearest with ties to even.
2411   void fcvtns(const Register& rd, const VRegister& vn);
2412 
2413   // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
2414   void fjcvtzs(const Register& rd, const VRegister& vn);
2415 
2416   // FP convert to unsigned integer, nearest with ties to even.
2417   void fcvtnu(const Register& rd, const VRegister& vn);
2418 
2419   // FP convert to signed integer, nearest with ties to even.
2420   void fcvtns(const VRegister& rd, const VRegister& vn);
2421 
2422   // FP convert to unsigned integer, nearest with ties to even.
2423   void fcvtnu(const VRegister& rd, const VRegister& vn);
2424 
2425   // FP convert to signed integer or fixed-point, round towards zero.
2426   void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2427 
2428   // FP convert to unsigned integer or fixed-point, round towards zero.
2429   void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2430 
2431   // FP convert to signed integer or fixed-point, round towards zero.
2432   void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2433 
2434   // FP convert to unsigned integer or fixed-point, round towards zero.
2435   void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2436 
2437   // FP convert to signed integer, round towards +infinity.
2438   void fcvtps(const Register& rd, const VRegister& vn);
2439 
2440   // FP convert to unsigned integer, round towards +infinity.
2441   void fcvtpu(const Register& rd, const VRegister& vn);
2442 
2443   // FP convert to signed integer, round towards +infinity.
2444   void fcvtps(const VRegister& vd, const VRegister& vn);
2445 
2446   // FP convert to unsigned integer, round towards +infinity.
2447   void fcvtpu(const VRegister& vd, const VRegister& vn);
2448 
2449   // Convert signed integer or fixed point to FP.
2450   void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2451 
2452   // Convert unsigned integer or fixed point to FP.
2453   void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2454 
2455   // Convert signed integer or fixed-point to FP.
2456   void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2457 
2458   // Convert unsigned integer or fixed-point to FP.
2459   void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2460 
2461   // Unsigned absolute difference.
2462   void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2463 
2464   // Signed absolute difference.
2465   void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2466 
2467   // Unsigned absolute difference and accumulate.
2468   void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2469 
2470   // Signed absolute difference and accumulate.
2471   void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2472 
2473   // Add.
2474   void add(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2475 
2476   // Subtract.
2477   void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2478 
2479   // Unsigned halving add.
2480   void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2481 
2482   // Signed halving add.
2483   void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2484 
2485   // Unsigned rounding halving add.
2486   void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2487 
2488   // Signed rounding halving add.
2489   void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2490 
2491   // Unsigned halving sub.
2492   void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2493 
2494   // Signed halving sub.
2495   void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2496 
2497   // Unsigned saturating add.
2498   void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2499 
2500   // Signed saturating add.
2501   void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2502 
2503   // Unsigned saturating subtract.
2504   void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2505 
2506   // Signed saturating subtract.
2507   void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2508 
2509   // Add pairwise.
2510   void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2511 
2512   // Add pair of elements scalar.
2513   void addp(const VRegister& vd, const VRegister& vn);
2514 
2515   // Multiply-add to accumulator.
2516   void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2517 
2518   // Multiply-subtract to accumulator.
2519   void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2520 
2521   // Multiply.
2522   void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2523 
2524   // Multiply by scalar element.
2525   void mul(const VRegister& vd,
2526            const VRegister& vn,
2527            const VRegister& vm,
2528            int vm_index);
2529 
2530   // Multiply-add by scalar element.
2531   void mla(const VRegister& vd,
2532            const VRegister& vn,
2533            const VRegister& vm,
2534            int vm_index);
2535 
2536   // Multiply-subtract by scalar element.
2537   void mls(const VRegister& vd,
2538            const VRegister& vn,
2539            const VRegister& vm,
2540            int vm_index);
2541 
2542   // Signed long multiply-add by scalar element.
2543   void smlal(const VRegister& vd,
2544              const VRegister& vn,
2545              const VRegister& vm,
2546              int vm_index);
2547 
2548   // Signed long multiply-add by scalar element (second part).
2549   void smlal2(const VRegister& vd,
2550               const VRegister& vn,
2551               const VRegister& vm,
2552               int vm_index);
2553 
2554   // Unsigned long multiply-add by scalar element.
2555   void umlal(const VRegister& vd,
2556              const VRegister& vn,
2557              const VRegister& vm,
2558              int vm_index);
2559 
2560   // Unsigned long multiply-add by scalar element (second part).
2561   void umlal2(const VRegister& vd,
2562               const VRegister& vn,
2563               const VRegister& vm,
2564               int vm_index);
2565 
2566   // Signed long multiply-sub by scalar element.
2567   void smlsl(const VRegister& vd,
2568              const VRegister& vn,
2569              const VRegister& vm,
2570              int vm_index);
2571 
2572   // Signed long multiply-sub by scalar element (second part).
2573   void smlsl2(const VRegister& vd,
2574               const VRegister& vn,
2575               const VRegister& vm,
2576               int vm_index);
2577 
2578   // Unsigned long multiply-sub by scalar element.
2579   void umlsl(const VRegister& vd,
2580              const VRegister& vn,
2581              const VRegister& vm,
2582              int vm_index);
2583 
2584   // Unsigned long multiply-sub by scalar element (second part).
2585   void umlsl2(const VRegister& vd,
2586               const VRegister& vn,
2587               const VRegister& vm,
2588               int vm_index);
2589 
2590   // Signed long multiply by scalar element.
2591   void smull(const VRegister& vd,
2592              const VRegister& vn,
2593              const VRegister& vm,
2594              int vm_index);
2595 
2596   // Signed long multiply by scalar element (second part).
2597   void smull2(const VRegister& vd,
2598               const VRegister& vn,
2599               const VRegister& vm,
2600               int vm_index);
2601 
2602   // Unsigned long multiply by scalar element.
2603   void umull(const VRegister& vd,
2604              const VRegister& vn,
2605              const VRegister& vm,
2606              int vm_index);
2607 
2608   // Unsigned long multiply by scalar element (second part).
2609   void umull2(const VRegister& vd,
2610               const VRegister& vn,
2611               const VRegister& vm,
2612               int vm_index);
2613 
2614   // Signed saturating double long multiply by element.
2615   void sqdmull(const VRegister& vd,
2616                const VRegister& vn,
2617                const VRegister& vm,
2618                int vm_index);
2619 
2620   // Signed saturating double long multiply by element (second part).
2621   void sqdmull2(const VRegister& vd,
2622                 const VRegister& vn,
2623                 const VRegister& vm,
2624                 int vm_index);
2625 
2626   // Signed saturating doubling long multiply-add by element.
2627   void sqdmlal(const VRegister& vd,
2628                const VRegister& vn,
2629                const VRegister& vm,
2630                int vm_index);
2631 
2632   // Signed saturating doubling long multiply-add by element (second part).
2633   void sqdmlal2(const VRegister& vd,
2634                 const VRegister& vn,
2635                 const VRegister& vm,
2636                 int vm_index);
2637 
2638   // Signed saturating doubling long multiply-sub by element.
2639   void sqdmlsl(const VRegister& vd,
2640                const VRegister& vn,
2641                const VRegister& vm,
2642                int vm_index);
2643 
2644   // Signed saturating doubling long multiply-sub by element (second part).
2645   void sqdmlsl2(const VRegister& vd,
2646                 const VRegister& vn,
2647                 const VRegister& vm,
2648                 int vm_index);
2649 
2650   // Compare equal.
2651   void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2652 
2653   // Compare signed greater than or equal.
2654   void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2655 
2656   // Compare signed greater than.
2657   void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2658 
2659   // Compare unsigned higher.
2660   void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2661 
2662   // Compare unsigned higher or same.
2663   void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2664 
2665   // Compare bitwise test bits nonzero.
2666   void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2667 
2668   // Compare bitwise to zero.
2669   void cmeq(const VRegister& vd, const VRegister& vn, int value);
2670 
2671   // Compare signed greater than or equal to zero.
2672   void cmge(const VRegister& vd, const VRegister& vn, int value);
2673 
2674   // Compare signed greater than zero.
2675   void cmgt(const VRegister& vd, const VRegister& vn, int value);
2676 
2677   // Compare signed less than or equal to zero.
2678   void cmle(const VRegister& vd, const VRegister& vn, int value);
2679 
2680   // Compare signed less than zero.
2681   void cmlt(const VRegister& vd, const VRegister& vn, int value);
2682 
2683   // Signed shift left by register.
2684   void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2685 
2686   // Unsigned shift left by register.
2687   void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2688 
2689   // Signed saturating shift left by register.
2690   void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2691 
2692   // Unsigned saturating shift left by register.
2693   void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2694 
2695   // Signed rounding shift left by register.
2696   void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2697 
2698   // Unsigned rounding shift left by register.
2699   void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2700 
2701   // Signed saturating rounding shift left by register.
2702   void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2703 
2704   // Unsigned saturating rounding shift left by register.
2705   void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2706 
2707   // Bitwise and.
2708   void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2709 
2710   // Bitwise or.
2711   void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2712 
2713   // Bitwise or immediate.
2714   void orr(const VRegister& vd, const int imm8, const int left_shift = 0);
2715 
2716   // Move register to register.
2717   void mov(const VRegister& vd, const VRegister& vn);
2718 
2719   // Bitwise orn.
2720   void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2721 
2722   // Bitwise eor.
2723   void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2724 
2725   // Bit clear immediate.
2726   void bic(const VRegister& vd, const int imm8, const int left_shift = 0);
2727 
2728   // Bit clear.
2729   void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2730 
2731   // Bitwise insert if false.
2732   void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2733 
2734   // Bitwise insert if true.
2735   void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2736 
2737   // Bitwise select.
2738   void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2739 
2740   // Polynomial multiply.
2741   void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2742 
2743   // Vector move immediate.
2744   void movi(const VRegister& vd,
2745             const uint64_t imm,
2746             Shift shift = LSL,
2747             const int shift_amount = 0);
2748 
2749   // Bitwise not.
2750   void mvn(const VRegister& vd, const VRegister& vn);
2751 
2752   // Vector move inverted immediate.
2753   void mvni(const VRegister& vd,
2754             const int imm8,
2755             Shift shift = LSL,
2756             const int shift_amount = 0);
2757 
2758   // Signed saturating accumulate of unsigned value.
2759   void suqadd(const VRegister& vd, const VRegister& vn);
2760 
2761   // Unsigned saturating accumulate of signed value.
2762   void usqadd(const VRegister& vd, const VRegister& vn);
2763 
2764   // Absolute value.
2765   void abs(const VRegister& vd, const VRegister& vn);
2766 
2767   // Signed saturating absolute value.
2768   void sqabs(const VRegister& vd, const VRegister& vn);
2769 
2770   // Negate.
2771   void neg(const VRegister& vd, const VRegister& vn);
2772 
2773   // Signed saturating negate.
2774   void sqneg(const VRegister& vd, const VRegister& vn);
2775 
2776   // Bitwise not.
2777   void not_(const VRegister& vd, const VRegister& vn);
2778 
2779   // Extract narrow.
2780   void xtn(const VRegister& vd, const VRegister& vn);
2781 
2782   // Extract narrow (second part).
2783   void xtn2(const VRegister& vd, const VRegister& vn);
2784 
2785   // Signed saturating extract narrow.
2786   void sqxtn(const VRegister& vd, const VRegister& vn);
2787 
2788   // Signed saturating extract narrow (second part).
2789   void sqxtn2(const VRegister& vd, const VRegister& vn);
2790 
2791   // Unsigned saturating extract narrow.
2792   void uqxtn(const VRegister& vd, const VRegister& vn);
2793 
2794   // Unsigned saturating extract narrow (second part).
2795   void uqxtn2(const VRegister& vd, const VRegister& vn);
2796 
2797   // Signed saturating extract unsigned narrow.
2798   void sqxtun(const VRegister& vd, const VRegister& vn);
2799 
2800   // Signed saturating extract unsigned narrow (second part).
2801   void sqxtun2(const VRegister& vd, const VRegister& vn);
2802 
2803   // Extract vector from pair of vectors.
2804   void ext(const VRegister& vd,
2805            const VRegister& vn,
2806            const VRegister& vm,
2807            int index);
2808 
2809   // Duplicate vector element to vector or scalar.
2810   void dup(const VRegister& vd, const VRegister& vn, int vn_index);
2811 
2812   // Move vector element to scalar.
2813   void mov(const VRegister& vd, const VRegister& vn, int vn_index);
2814 
2815   // Duplicate general-purpose register to vector.
2816   void dup(const VRegister& vd, const Register& rn);
2817 
2818   // Insert vector element from another vector element.
2819   void ins(const VRegister& vd,
2820            int vd_index,
2821            const VRegister& vn,
2822            int vn_index);
2823 
2824   // Move vector element to another vector element.
2825   void mov(const VRegister& vd,
2826            int vd_index,
2827            const VRegister& vn,
2828            int vn_index);
2829 
2830   // Insert vector element from general-purpose register.
2831   void ins(const VRegister& vd, int vd_index, const Register& rn);
2832 
2833   // Move general-purpose register to a vector element.
2834   void mov(const VRegister& vd, int vd_index, const Register& rn);
2835 
2836   // Unsigned move vector element to general-purpose register.
2837   void umov(const Register& rd, const VRegister& vn, int vn_index);
2838 
2839   // Move vector element to general-purpose register.
2840   void mov(const Register& rd, const VRegister& vn, int vn_index);
2841 
2842   // Signed move vector element to general-purpose register.
2843   void smov(const Register& rd, const VRegister& vn, int vn_index);
2844 
2845   // One-element structure load to one register.
2846   void ld1(const VRegister& vt, const MemOperand& src);
2847 
2848   // One-element structure load to two registers.
2849   void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2850 
2851   // One-element structure load to three registers.
2852   void ld1(const VRegister& vt,
2853            const VRegister& vt2,
2854            const VRegister& vt3,
2855            const MemOperand& src);
2856 
2857   // One-element structure load to four registers.
2858   void ld1(const VRegister& vt,
2859            const VRegister& vt2,
2860            const VRegister& vt3,
2861            const VRegister& vt4,
2862            const MemOperand& src);
2863 
2864   // One-element single structure load to one lane.
2865   void ld1(const VRegister& vt, int lane, const MemOperand& src);
2866 
2867   // One-element single structure load to all lanes.
2868   void ld1r(const VRegister& vt, const MemOperand& src);
2869 
2870   // Two-element structure load.
2871   void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2872 
2873   // Two-element single structure load to one lane.
2874   void ld2(const VRegister& vt,
2875            const VRegister& vt2,
2876            int lane,
2877            const MemOperand& src);
2878 
2879   // Two-element single structure load to all lanes.
2880   void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2881 
2882   // Three-element structure load.
2883   void ld3(const VRegister& vt,
2884            const VRegister& vt2,
2885            const VRegister& vt3,
2886            const MemOperand& src);
2887 
2888   // Three-element single structure load to one lane.
2889   void ld3(const VRegister& vt,
2890            const VRegister& vt2,
2891            const VRegister& vt3,
2892            int lane,
2893            const MemOperand& src);
2894 
2895   // Three-element single structure load to all lanes.
2896   void ld3r(const VRegister& vt,
2897             const VRegister& vt2,
2898             const VRegister& vt3,
2899             const MemOperand& src);
2900 
2901   // Four-element structure load.
2902   void ld4(const VRegister& vt,
2903            const VRegister& vt2,
2904            const VRegister& vt3,
2905            const VRegister& vt4,
2906            const MemOperand& src);
2907 
2908   // Four-element single structure load to one lane.
2909   void ld4(const VRegister& vt,
2910            const VRegister& vt2,
2911            const VRegister& vt3,
2912            const VRegister& vt4,
2913            int lane,
2914            const MemOperand& src);
2915 
2916   // Four-element single structure load to all lanes.
2917   void ld4r(const VRegister& vt,
2918             const VRegister& vt2,
2919             const VRegister& vt3,
2920             const VRegister& vt4,
2921             const MemOperand& src);
2922 
2923   // Count leading sign bits.
2924   void cls(const VRegister& vd, const VRegister& vn);
2925 
2926   // Count leading zero bits (vector).
2927   void clz(const VRegister& vd, const VRegister& vn);
2928 
2929   // Population count per byte.
2930   void cnt(const VRegister& vd, const VRegister& vn);
2931 
2932   // Reverse bit order.
2933   void rbit(const VRegister& vd, const VRegister& vn);
2934 
2935   // Reverse elements in 16-bit halfwords.
2936   void rev16(const VRegister& vd, const VRegister& vn);
2937 
2938   // Reverse elements in 32-bit words.
2939   void rev32(const VRegister& vd, const VRegister& vn);
2940 
2941   // Reverse elements in 64-bit doublewords.
2942   void rev64(const VRegister& vd, const VRegister& vn);
2943 
2944   // Unsigned reciprocal square root estimate.
2945   void ursqrte(const VRegister& vd, const VRegister& vn);
2946 
2947   // Unsigned reciprocal estimate.
2948   void urecpe(const VRegister& vd, const VRegister& vn);
2949 
2950   // Signed pairwise long add.
2951   void saddlp(const VRegister& vd, const VRegister& vn);
2952 
2953   // Unsigned pairwise long add.
2954   void uaddlp(const VRegister& vd, const VRegister& vn);
2955 
2956   // Signed pairwise long add and accumulate.
2957   void sadalp(const VRegister& vd, const VRegister& vn);
2958 
2959   // Unsigned pairwise long add and accumulate.
2960   void uadalp(const VRegister& vd, const VRegister& vn);
2961 
2962   // Shift left by immediate.
2963   void shl(const VRegister& vd, const VRegister& vn, int shift);
2964 
2965   // Signed saturating shift left by immediate.
2966   void sqshl(const VRegister& vd, const VRegister& vn, int shift);
2967 
2968   // Signed saturating shift left unsigned by immediate.
2969   void sqshlu(const VRegister& vd, const VRegister& vn, int shift);
2970 
2971   // Unsigned saturating shift left by immediate.
2972   void uqshl(const VRegister& vd, const VRegister& vn, int shift);
2973 
2974   // Signed shift left long by immediate.
2975   void sshll(const VRegister& vd, const VRegister& vn, int shift);
2976 
2977   // Signed shift left long by immediate (second part).
2978   void sshll2(const VRegister& vd, const VRegister& vn, int shift);
2979 
2980   // Signed extend long.
2981   void sxtl(const VRegister& vd, const VRegister& vn);
2982 
2983   // Signed extend long (second part).
2984   void sxtl2(const VRegister& vd, const VRegister& vn);
2985 
2986   // Unsigned shift left long by immediate.
2987   void ushll(const VRegister& vd, const VRegister& vn, int shift);
2988 
2989   // Unsigned shift left long by immediate (second part).
2990   void ushll2(const VRegister& vd, const VRegister& vn, int shift);
2991 
2992   // Shift left long by element size.
2993   void shll(const VRegister& vd, const VRegister& vn, int shift);
2994 
2995   // Shift left long by element size (second part).
2996   void shll2(const VRegister& vd, const VRegister& vn, int shift);
2997 
2998   // Unsigned extend long.
2999   void uxtl(const VRegister& vd, const VRegister& vn);
3000 
3001   // Unsigned extend long (second part).
3002   void uxtl2(const VRegister& vd, const VRegister& vn);
3003 
3004   // Shift left by immediate and insert.
3005   void sli(const VRegister& vd, const VRegister& vn, int shift);
3006 
3007   // Shift right by immediate and insert.
3008   void sri(const VRegister& vd, const VRegister& vn, int shift);
3009 
3010   // Signed maximum.
3011   void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3012 
3013   // Signed pairwise maximum.
3014   void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3015 
3016   // Add across vector.
3017   void addv(const VRegister& vd, const VRegister& vn);
3018 
3019   // Signed add long across vector.
3020   void saddlv(const VRegister& vd, const VRegister& vn);
3021 
3022   // Unsigned add long across vector.
3023   void uaddlv(const VRegister& vd, const VRegister& vn);
3024 
3025   // FP maximum number across vector.
3026   void fmaxnmv(const VRegister& vd, const VRegister& vn);
3027 
3028   // FP maximum across vector.
3029   void fmaxv(const VRegister& vd, const VRegister& vn);
3030 
3031   // FP minimum number across vector.
3032   void fminnmv(const VRegister& vd, const VRegister& vn);
3033 
3034   // FP minimum across vector.
3035   void fminv(const VRegister& vd, const VRegister& vn);
3036 
3037   // Signed maximum across vector.
3038   void smaxv(const VRegister& vd, const VRegister& vn);
3039 
3040   // Signed minimum.
3041   void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3042 
3043   // Signed minimum pairwise.
3044   void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3045 
3046   // Signed minimum across vector.
3047   void sminv(const VRegister& vd, const VRegister& vn);
3048 
3049   // One-element structure store from one register.
3050   void st1(const VRegister& vt, const MemOperand& src);
3051 
3052   // One-element structure store from two registers.
3053   void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3054 
3055   // One-element structure store from three registers.
3056   void st1(const VRegister& vt,
3057            const VRegister& vt2,
3058            const VRegister& vt3,
3059            const MemOperand& src);
3060 
3061   // One-element structure store from four registers.
3062   void st1(const VRegister& vt,
3063            const VRegister& vt2,
3064            const VRegister& vt3,
3065            const VRegister& vt4,
3066            const MemOperand& src);
3067 
3068   // One-element single structure store from one lane.
3069   void st1(const VRegister& vt, int lane, const MemOperand& src);
3070 
3071   // Two-element structure store from two registers.
3072   void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3073 
3074   // Two-element single structure store from two lanes.
3075   void st2(const VRegister& vt,
3076            const VRegister& vt2,
3077            int lane,
3078            const MemOperand& src);
3079 
3080   // Three-element structure store from three registers.
3081   void st3(const VRegister& vt,
3082            const VRegister& vt2,
3083            const VRegister& vt3,
3084            const MemOperand& src);
3085 
3086   // Three-element single structure store from three lanes.
3087   void st3(const VRegister& vt,
3088            const VRegister& vt2,
3089            const VRegister& vt3,
3090            int lane,
3091            const MemOperand& src);
3092 
3093   // Four-element structure store from four registers.
3094   void st4(const VRegister& vt,
3095            const VRegister& vt2,
3096            const VRegister& vt3,
3097            const VRegister& vt4,
3098            const MemOperand& src);
3099 
3100   // Four-element single structure store from four lanes.
3101   void st4(const VRegister& vt,
3102            const VRegister& vt2,
3103            const VRegister& vt3,
3104            const VRegister& vt4,
3105            int lane,
3106            const MemOperand& src);
3107 
3108   // Unsigned add long.
3109   void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3110 
3111   // Unsigned add long (second part).
3112   void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3113 
3114   // Unsigned add wide.
3115   void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3116 
3117   // Unsigned add wide (second part).
3118   void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3119 
3120   // Signed add long.
3121   void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3122 
3123   // Signed add long (second part).
3124   void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3125 
3126   // Signed add wide.
3127   void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3128 
3129   // Signed add wide (second part).
3130   void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3131 
3132   // Unsigned subtract long.
3133   void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3134 
3135   // Unsigned subtract long (second part).
3136   void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3137 
3138   // Unsigned subtract wide.
3139   void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3140 
3141   // Unsigned subtract wide (second part).
3142   void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3143 
3144   // Signed subtract long.
3145   void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3146 
3147   // Signed subtract long (second part).
3148   void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3149 
3150   // Signed integer subtract wide.
3151   void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3152 
3153   // Signed integer subtract wide (second part).
3154   void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3155 
3156   // Unsigned maximum.
3157   void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3158 
3159   // Unsigned pairwise maximum.
3160   void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3161 
3162   // Unsigned maximum across vector.
3163   void umaxv(const VRegister& vd, const VRegister& vn);
3164 
3165   // Unsigned minimum.
3166   void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3167 
3168   // Unsigned pairwise minimum.
3169   void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3170 
3171   // Unsigned minimum across vector.
3172   void uminv(const VRegister& vd, const VRegister& vn);
3173 
3174   // Transpose vectors (primary).
3175   void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3176 
3177   // Transpose vectors (secondary).
3178   void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3179 
3180   // Unzip vectors (primary).
3181   void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3182 
3183   // Unzip vectors (secondary).
3184   void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3185 
3186   // Zip vectors (primary).
3187   void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3188 
3189   // Zip vectors (secondary).
3190   void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3191 
3192   // Signed shift right by immediate.
3193   void sshr(const VRegister& vd, const VRegister& vn, int shift);
3194 
3195   // Unsigned shift right by immediate.
3196   void ushr(const VRegister& vd, const VRegister& vn, int shift);
3197 
3198   // Signed rounding shift right by immediate.
3199   void srshr(const VRegister& vd, const VRegister& vn, int shift);
3200 
3201   // Unsigned rounding shift right by immediate.
3202   void urshr(const VRegister& vd, const VRegister& vn, int shift);
3203 
3204   // Signed shift right by immediate and accumulate.
3205   void ssra(const VRegister& vd, const VRegister& vn, int shift);
3206 
3207   // Unsigned shift right by immediate and accumulate.
3208   void usra(const VRegister& vd, const VRegister& vn, int shift);
3209 
3210   // Signed rounding shift right by immediate and accumulate.
3211   void srsra(const VRegister& vd, const VRegister& vn, int shift);
3212 
3213   // Unsigned rounding shift right by immediate and accumulate.
3214   void ursra(const VRegister& vd, const VRegister& vn, int shift);
3215 
3216   // Shift right narrow by immediate.
3217   void shrn(const VRegister& vd, const VRegister& vn, int shift);
3218 
3219   // Shift right narrow by immediate (second part).
3220   void shrn2(const VRegister& vd, const VRegister& vn, int shift);
3221 
3222   // Rounding shift right narrow by immediate.
3223   void rshrn(const VRegister& vd, const VRegister& vn, int shift);
3224 
3225   // Rounding shift right narrow by immediate (second part).
3226   void rshrn2(const VRegister& vd, const VRegister& vn, int shift);
3227 
3228   // Unsigned saturating shift right narrow by immediate.
3229   void uqshrn(const VRegister& vd, const VRegister& vn, int shift);
3230 
3231   // Unsigned saturating shift right narrow by immediate (second part).
3232   void uqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3233 
3234   // Unsigned saturating rounding shift right narrow by immediate.
3235   void uqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3236 
3237   // Unsigned saturating rounding shift right narrow by immediate (second part).
3238   void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3239 
3240   // Signed saturating shift right narrow by immediate.
3241   void sqshrn(const VRegister& vd, const VRegister& vn, int shift);
3242 
3243   // Signed saturating shift right narrow by immediate (second part).
3244   void sqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3245 
3246   // Signed saturating rounded shift right narrow by immediate.
3247   void sqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3248 
3249   // Signed saturating rounded shift right narrow by immediate (second part).
3250   void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3251 
3252   // Signed saturating shift right unsigned narrow by immediate.
3253   void sqshrun(const VRegister& vd, const VRegister& vn, int shift);
3254 
3255   // Signed saturating shift right unsigned narrow by immediate (second part).
3256   void sqshrun2(const VRegister& vd, const VRegister& vn, int shift);
3257 
3258   // Signed sat rounded shift right unsigned narrow by immediate.
3259   void sqrshrun(const VRegister& vd, const VRegister& vn, int shift);
3260 
3261   // Signed sat rounded shift right unsigned narrow by immediate (second part).
3262   void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift);
3263 
3264   // FP reciprocal step.
3265   void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3266 
3267   // FP reciprocal estimate.
3268   void frecpe(const VRegister& vd, const VRegister& vn);
3269 
3270   // FP reciprocal square root estimate.
3271   void frsqrte(const VRegister& vd, const VRegister& vn);
3272 
3273   // FP reciprocal square root step.
3274   void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3275 
3276   // Signed absolute difference and accumulate long.
3277   void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3278 
3279   // Signed absolute difference and accumulate long (second part).
3280   void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3281 
3282   // Unsigned absolute difference and accumulate long.
3283   void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3284 
3285   // Unsigned absolute difference and accumulate long (second part).
3286   void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3287 
3288   // Signed absolute difference long.
3289   void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3290 
3291   // Signed absolute difference long (second part).
3292   void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3293 
3294   // Unsigned absolute difference long.
3295   void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3296 
3297   // Unsigned absolute difference long (second part).
3298   void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3299 
3300   // Polynomial multiply long.
3301   void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3302 
3303   // Polynomial multiply long (second part).
3304   void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3305 
3306   // Signed long multiply-add.
3307   void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3308 
3309   // Signed long multiply-add (second part).
3310   void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3311 
3312   // Unsigned long multiply-add.
3313   void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3314 
3315   // Unsigned long multiply-add (second part).
3316   void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3317 
3318   // Signed long multiply-sub.
3319   void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3320 
3321   // Signed long multiply-sub (second part).
3322   void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3323 
3324   // Unsigned long multiply-sub.
3325   void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3326 
3327   // Unsigned long multiply-sub (second part).
3328   void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3329 
3330   // Signed long multiply.
3331   void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3332 
3333   // Signed long multiply (second part).
3334   void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3335 
3336   // Signed saturating doubling long multiply-add.
3337   void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3338 
3339   // Signed saturating doubling long multiply-add (second part).
3340   void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3341 
3342   // Signed saturating doubling long multiply-subtract.
3343   void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3344 
3345   // Signed saturating doubling long multiply-subtract (second part).
3346   void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3347 
3348   // Signed saturating doubling long multiply.
3349   void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3350 
3351   // Signed saturating doubling long multiply (second part).
3352   void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3353 
3354   // Signed saturating doubling multiply returning high half.
3355   void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3356 
3357   // Signed saturating rounding doubling multiply returning high half.
3358   void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3359 
3360   // Signed dot product [Armv8.2].
3361   void sdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3362 
3363   // Signed saturating rounding doubling multiply accumulate returning high
3364   // half [Armv8.1].
3365   void sqrdmlah(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3366 
3367   // Unsigned dot product [Armv8.2].
3368   void udot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3369 
3370   // Dot Product with unsigned and signed integers (vector).
3371   void usdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3372 
3373   // Dot product with signed and unsigned integers (vector, by element).
3374   void sudot(const VRegister& vd,
3375              const VRegister& vn,
3376              const VRegister& vm,
3377              int vm_index);
3378 
3379   // Dot product with unsigned and signed integers (vector, by element).
3380   void usdot(const VRegister& vd,
3381              const VRegister& vn,
3382              const VRegister& vm,
3383              int vm_index);
3384 
3385   // Signed saturating rounding doubling multiply subtract returning high half
3386   // [Armv8.1].
3387   void sqrdmlsh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3388 
3389   // Signed saturating doubling multiply element returning high half.
3390   void sqdmulh(const VRegister& vd,
3391                const VRegister& vn,
3392                const VRegister& vm,
3393                int vm_index);
3394 
3395   // Signed saturating rounding doubling multiply element returning high half.
3396   void sqrdmulh(const VRegister& vd,
3397                 const VRegister& vn,
3398                 const VRegister& vm,
3399                 int vm_index);
3400 
3401   // Signed dot product by element [Armv8.2].
3402   void sdot(const VRegister& vd,
3403             const VRegister& vn,
3404             const VRegister& vm,
3405             int vm_index);
3406 
3407   // Signed saturating rounding doubling multiply accumulate element returning
3408   // high half [Armv8.1].
3409   void sqrdmlah(const VRegister& vd,
3410                 const VRegister& vn,
3411                 const VRegister& vm,
3412                 int vm_index);
3413 
3414   // Unsigned dot product by element [Armv8.2].
3415   void udot(const VRegister& vd,
3416             const VRegister& vn,
3417             const VRegister& vm,
3418             int vm_index);
3419 
3420   // Signed saturating rounding doubling multiply subtract element returning
3421   // high half [Armv8.1].
3422   void sqrdmlsh(const VRegister& vd,
3423                 const VRegister& vn,
3424                 const VRegister& vm,
3425                 int vm_index);
3426 
3427   // Unsigned long multiply long.
3428   void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3429 
3430   // Unsigned long multiply (second part).
3431   void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3432 
3433   // Add narrow returning high half.
3434   void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3435 
3436   // Add narrow returning high half (second part).
3437   void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3438 
3439   // Rounding add narrow returning high half.
3440   void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3441 
3442   // Rounding add narrow returning high half (second part).
3443   void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3444 
3445   // Subtract narrow returning high half.
3446   void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3447 
3448   // Subtract narrow returning high half (second part).
3449   void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3450 
3451   // Rounding subtract narrow returning high half.
3452   void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3453 
3454   // Rounding subtract narrow returning high half (second part).
3455   void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3456 
3457   // FP vector multiply accumulate.
3458   void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3459 
3460   // FP fused multiply-add long to accumulator.
3461   void fmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3462 
3463   // FP fused multiply-add long to accumulator (second part).
3464   void fmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3465 
3466   // FP fused multiply-add long to accumulator by element.
3467   void fmlal(const VRegister& vd,
3468              const VRegister& vn,
3469              const VRegister& vm,
3470              int vm_index);
3471 
3472   // FP fused multiply-add long to accumulator by element (second part).
3473   void fmlal2(const VRegister& vd,
3474               const VRegister& vn,
3475               const VRegister& vm,
3476               int vm_index);
3477 
3478   // FP vector multiply subtract.
3479   void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3480 
3481   // FP fused multiply-subtract long to accumulator.
3482   void fmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3483 
3484   // FP fused multiply-subtract long to accumulator (second part).
3485   void fmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3486 
3487   // FP fused multiply-subtract long to accumulator by element.
3488   void fmlsl(const VRegister& vd,
3489              const VRegister& vn,
3490              const VRegister& vm,
3491              int vm_index);
3492 
3493   // FP fused multiply-subtract long to accumulator by element (second part).
3494   void fmlsl2(const VRegister& vd,
3495               const VRegister& vn,
3496               const VRegister& vm,
3497               int vm_index);
3498 
3499   // FP vector multiply extended.
3500   void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3501 
3502   // FP absolute greater than or equal.
3503   void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3504 
3505   // FP absolute greater than.
3506   void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3507 
3508   // FP multiply by element.
3509   void fmul(const VRegister& vd,
3510             const VRegister& vn,
3511             const VRegister& vm,
3512             int vm_index);
3513 
3514   // FP fused multiply-add to accumulator by element.
3515   void fmla(const VRegister& vd,
3516             const VRegister& vn,
3517             const VRegister& vm,
3518             int vm_index);
3519 
3520   // FP fused multiply-sub from accumulator by element.
3521   void fmls(const VRegister& vd,
3522             const VRegister& vn,
3523             const VRegister& vm,
3524             int vm_index);
3525 
3526   // FP multiply extended by element.
3527   void fmulx(const VRegister& vd,
3528              const VRegister& vn,
3529              const VRegister& vm,
3530              int vm_index);
3531 
3532   // FP compare equal.
3533   void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3534 
3535   // FP greater than.
3536   void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3537 
3538   // FP greater than or equal.
3539   void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3540 
3541   // FP compare equal to zero.
3542   void fcmeq(const VRegister& vd, const VRegister& vn, double imm);
3543 
3544   // FP greater than zero.
3545   void fcmgt(const VRegister& vd, const VRegister& vn, double imm);
3546 
3547   // FP greater than or equal to zero.
3548   void fcmge(const VRegister& vd, const VRegister& vn, double imm);
3549 
3550   // FP less than or equal to zero.
3551   void fcmle(const VRegister& vd, const VRegister& vn, double imm);
3552 
3553   // FP less than to zero.
3554   void fcmlt(const VRegister& vd, const VRegister& vn, double imm);
3555 
3556   // FP absolute difference.
3557   void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3558 
3559   // FP pairwise add vector.
3560   void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3561 
3562   // FP pairwise add scalar.
3563   void faddp(const VRegister& vd, const VRegister& vn);
3564 
3565   // FP pairwise maximum vector.
3566   void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3567 
3568   // FP pairwise maximum scalar.
3569   void fmaxp(const VRegister& vd, const VRegister& vn);
3570 
3571   // FP pairwise minimum vector.
3572   void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3573 
3574   // FP pairwise minimum scalar.
3575   void fminp(const VRegister& vd, const VRegister& vn);
3576 
3577   // FP pairwise maximum number vector.
3578   void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3579 
3580   // FP pairwise maximum number scalar.
3581   void fmaxnmp(const VRegister& vd, const VRegister& vn);
3582 
3583   // FP pairwise minimum number vector.
3584   void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3585 
3586   // FP pairwise minimum number scalar.
3587   void fminnmp(const VRegister& vd, const VRegister& vn);
3588 
3589   // v8.3 complex numbers - note that these are only partial/helper functions
3590   // and must be used in series in order to perform full CN operations.
3591 
3592   // FP complex multiply accumulate (by element) [Armv8.3].
3593   void fcmla(const VRegister& vd,
3594              const VRegister& vn,
3595              const VRegister& vm,
3596              int vm_index,
3597              int rot);
3598 
3599   // FP complex multiply accumulate [Armv8.3].
3600   void fcmla(const VRegister& vd,
3601              const VRegister& vn,
3602              const VRegister& vm,
3603              int rot);
3604 
3605   // FP complex add [Armv8.3].
3606   void fcadd(const VRegister& vd,
3607              const VRegister& vn,
3608              const VRegister& vm,
3609              int rot);
3610 
3611   // Signed 8-bit integer matrix multiply-accumulate (vector).
3612   void smmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3613 
3614   // Unsigned and signed 8-bit integer matrix multiply-accumulate (vector).
3615   void usmmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3616 
3617   // Unsigned 8-bit integer matrix multiply-accumulate (vector).
3618   void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3619 
3620   // Scalable Vector Extensions.
3621 
3622   // Absolute value (predicated).
3623   void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3624 
3625   // Add vectors (predicated).
3626   void add(const ZRegister& zd,
3627            const PRegisterM& pg,
3628            const ZRegister& zn,
3629            const ZRegister& zm);
3630 
3631   // Add vectors (unpredicated).
3632   void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3633 
3634   // Add immediate (unpredicated).
3635   void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
3636 
3637   // Add multiple of predicate register size to scalar register.
3638   void addpl(const Register& xd, const Register& xn, int imm6);
3639 
3640   // Add multiple of vector register size to scalar register.
3641   void addvl(const Register& xd, const Register& xn, int imm6);
3642 
3643   // Compute vector address.
3644   void adr(const ZRegister& zd, const SVEMemOperand& addr);
3645 
3646   // Bitwise AND predicates.
3647   void and_(const PRegisterWithLaneSize& pd,
3648             const PRegisterZ& pg,
3649             const PRegisterWithLaneSize& pn,
3650             const PRegisterWithLaneSize& pm);
3651 
3652   // Bitwise AND vectors (predicated).
3653   void and_(const ZRegister& zd,
3654             const PRegisterM& pg,
3655             const ZRegister& zn,
3656             const ZRegister& zm);
3657 
3658   // Bitwise AND with immediate (unpredicated).
3659   void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3660 
3661   // Bitwise AND vectors (unpredicated).
3662   void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3663 
3664   // Bitwise AND predicates.
3665   void ands(const PRegisterWithLaneSize& pd,
3666             const PRegisterZ& pg,
3667             const PRegisterWithLaneSize& pn,
3668             const PRegisterWithLaneSize& pm);
3669 
3670   // Bitwise AND reduction to scalar.
3671   void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
3672 
3673   // Arithmetic shift right by immediate (predicated).
3674   void asr(const ZRegister& zd,
3675            const PRegisterM& pg,
3676            const ZRegister& zn,
3677            int shift);
3678 
3679   // Arithmetic shift right by 64-bit wide elements (predicated).
3680   void asr(const ZRegister& zd,
3681            const PRegisterM& pg,
3682            const ZRegister& zn,
3683            const ZRegister& zm);
3684 
3685   // Arithmetic shift right by immediate (unpredicated).
3686   void asr(const ZRegister& zd, const ZRegister& zn, int shift);
3687 
3688   // Arithmetic shift right by 64-bit wide elements (unpredicated).
3689   void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3690 
3691   // Arithmetic shift right for divide by immediate (predicated).
3692   void asrd(const ZRegister& zd,
3693             const PRegisterM& pg,
3694             const ZRegister& zn,
3695             int shift);
3696 
3697   // Reversed arithmetic shift right by vector (predicated).
3698   void asrr(const ZRegister& zd,
3699             const PRegisterM& pg,
3700             const ZRegister& zn,
3701             const ZRegister& zm);
3702 
3703   // Bitwise clear predicates.
3704   void bic(const PRegisterWithLaneSize& pd,
3705            const PRegisterZ& pg,
3706            const PRegisterWithLaneSize& pn,
3707            const PRegisterWithLaneSize& pm);
3708 
3709   // Bitwise clear vectors (predicated).
3710   void bic(const ZRegister& zd,
3711            const PRegisterM& pg,
3712            const ZRegister& zn,
3713            const ZRegister& zm);
3714 
3715   // Bitwise clear bits using immediate (unpredicated).
3716   void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3717 
3718   // Bitwise clear vectors (unpredicated).
3719   void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3720 
3721   // Bitwise clear predicates.
3722   void bics(const PRegisterWithLaneSize& pd,
3723             const PRegisterZ& pg,
3724             const PRegisterWithLaneSize& pn,
3725             const PRegisterWithLaneSize& pm);
3726 
3727   // Break after first true condition.
3728   void brka(const PRegisterWithLaneSize& pd,
3729             const PRegister& pg,
3730             const PRegisterWithLaneSize& pn);
3731 
3732   // Break after first true condition.
3733   void brkas(const PRegisterWithLaneSize& pd,
3734              const PRegisterZ& pg,
3735              const PRegisterWithLaneSize& pn);
3736 
3737   // Break before first true condition.
3738   void brkb(const PRegisterWithLaneSize& pd,
3739             const PRegister& pg,
3740             const PRegisterWithLaneSize& pn);
3741 
3742   // Break before first true condition.
3743   void brkbs(const PRegisterWithLaneSize& pd,
3744              const PRegisterZ& pg,
3745              const PRegisterWithLaneSize& pn);
3746 
3747   // Propagate break to next partition.
3748   void brkn(const PRegisterWithLaneSize& pd,
3749             const PRegisterZ& pg,
3750             const PRegisterWithLaneSize& pn,
3751             const PRegisterWithLaneSize& pm);
3752 
3753   // Propagate break to next partition.
3754   void brkns(const PRegisterWithLaneSize& pd,
3755              const PRegisterZ& pg,
3756              const PRegisterWithLaneSize& pn,
3757              const PRegisterWithLaneSize& pm);
3758 
3759   // Break after first true condition, propagating from previous partition.
3760   void brkpa(const PRegisterWithLaneSize& pd,
3761              const PRegisterZ& pg,
3762              const PRegisterWithLaneSize& pn,
3763              const PRegisterWithLaneSize& pm);
3764 
3765   // Break after first true condition, propagating from previous partition.
3766   void brkpas(const PRegisterWithLaneSize& pd,
3767               const PRegisterZ& pg,
3768               const PRegisterWithLaneSize& pn,
3769               const PRegisterWithLaneSize& pm);
3770 
3771   // Break before first true condition, propagating from previous partition.
3772   void brkpb(const PRegisterWithLaneSize& pd,
3773              const PRegisterZ& pg,
3774              const PRegisterWithLaneSize& pn,
3775              const PRegisterWithLaneSize& pm);
3776 
3777   // Break before first true condition, propagating from previous partition.
3778   void brkpbs(const PRegisterWithLaneSize& pd,
3779               const PRegisterZ& pg,
3780               const PRegisterWithLaneSize& pn,
3781               const PRegisterWithLaneSize& pm);
3782 
3783   // Conditionally extract element after last to general-purpose register.
3784   void clasta(const Register& rd,
3785               const PRegister& pg,
3786               const Register& rn,
3787               const ZRegister& zm);
3788 
3789   // Conditionally extract element after last to SIMD&FP scalar register.
3790   void clasta(const VRegister& vd,
3791               const PRegister& pg,
3792               const VRegister& vn,
3793               const ZRegister& zm);
3794 
3795   // Conditionally extract element after last to vector register.
3796   void clasta(const ZRegister& zd,
3797               const PRegister& pg,
3798               const ZRegister& zn,
3799               const ZRegister& zm);
3800 
3801   // Conditionally extract last element to general-purpose register.
3802   void clastb(const Register& rd,
3803               const PRegister& pg,
3804               const Register& rn,
3805               const ZRegister& zm);
3806 
3807   // Conditionally extract last element to SIMD&FP scalar register.
3808   void clastb(const VRegister& vd,
3809               const PRegister& pg,
3810               const VRegister& vn,
3811               const ZRegister& zm);
3812 
3813   // Conditionally extract last element to vector register.
3814   void clastb(const ZRegister& zd,
3815               const PRegister& pg,
3816               const ZRegister& zn,
3817               const ZRegister& zm);
3818 
3819   // Count leading sign bits (predicated).
3820   void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3821 
3822   // Count leading zero bits (predicated).
3823   void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3824 
3825   void cmp(Condition cond,
3826            const PRegisterWithLaneSize& pd,
3827            const PRegisterZ& pg,
3828            const ZRegister& zn,
3829            const ZRegister& zm);
3830 
3831   // Compare vector to 64-bit wide elements.
3832   void cmpeq(const PRegisterWithLaneSize& pd,
3833              const PRegisterZ& pg,
3834              const ZRegister& zn,
3835              const ZRegister& zm);
3836 
3837   // Compare vector to immediate.
3838   void cmpeq(const PRegisterWithLaneSize& pd,
3839              const PRegisterZ& pg,
3840              const ZRegister& zn,
3841              int imm5);
3842 
3843   // Compare vector to 64-bit wide elements.
3844   void cmpge(const PRegisterWithLaneSize& pd,
3845              const PRegisterZ& pg,
3846              const ZRegister& zn,
3847              const ZRegister& zm);
3848 
3849   // Compare vector to immediate.
3850   void cmpge(const PRegisterWithLaneSize& pd,
3851              const PRegisterZ& pg,
3852              const ZRegister& zn,
3853              int imm5);
3854 
3855   // Compare vector to 64-bit wide elements.
3856   void cmpgt(const PRegisterWithLaneSize& pd,
3857              const PRegisterZ& pg,
3858              const ZRegister& zn,
3859              const ZRegister& zm);
3860 
3861   // Compare vector to immediate.
3862   void cmpgt(const PRegisterWithLaneSize& pd,
3863              const PRegisterZ& pg,
3864              const ZRegister& zn,
3865              int imm5);
3866 
3867   // Compare vector to 64-bit wide elements.
3868   void cmphi(const PRegisterWithLaneSize& pd,
3869              const PRegisterZ& pg,
3870              const ZRegister& zn,
3871              const ZRegister& zm);
3872 
3873   // Compare vector to immediate.
3874   void cmphi(const PRegisterWithLaneSize& pd,
3875              const PRegisterZ& pg,
3876              const ZRegister& zn,
3877              unsigned imm7);
3878 
3879   // Compare vector to 64-bit wide elements.
3880   void cmphs(const PRegisterWithLaneSize& pd,
3881              const PRegisterZ& pg,
3882              const ZRegister& zn,
3883              const ZRegister& zm);
3884 
3885   // Compare vector to immediate.
3886   void cmphs(const PRegisterWithLaneSize& pd,
3887              const PRegisterZ& pg,
3888              const ZRegister& zn,
3889              unsigned imm7);
3890 
3891   // Compare vector to 64-bit wide elements.
3892   void cmple(const PRegisterWithLaneSize& pd,
3893              const PRegisterZ& pg,
3894              const ZRegister& zn,
3895              const ZRegister& zm);
3896 
3897   // Compare vector to immediate.
3898   void cmple(const PRegisterWithLaneSize& pd,
3899              const PRegisterZ& pg,
3900              const ZRegister& zn,
3901              int imm5);
3902 
3903   // Compare vector to 64-bit wide elements.
3904   void cmplo(const PRegisterWithLaneSize& pd,
3905              const PRegisterZ& pg,
3906              const ZRegister& zn,
3907              const ZRegister& zm);
3908 
3909   // Compare vector to immediate.
3910   void cmplo(const PRegisterWithLaneSize& pd,
3911              const PRegisterZ& pg,
3912              const ZRegister& zn,
3913              unsigned imm7);
3914 
3915   // Compare vector to 64-bit wide elements.
3916   void cmpls(const PRegisterWithLaneSize& pd,
3917              const PRegisterZ& pg,
3918              const ZRegister& zn,
3919              const ZRegister& zm);
3920 
3921   // Compare vector to immediate.
3922   void cmpls(const PRegisterWithLaneSize& pd,
3923              const PRegisterZ& pg,
3924              const ZRegister& zn,
3925              unsigned imm7);
3926 
3927   // Compare vector to 64-bit wide elements.
3928   void cmplt(const PRegisterWithLaneSize& pd,
3929              const PRegisterZ& pg,
3930              const ZRegister& zn,
3931              const ZRegister& zm);
3932 
3933   // Compare vector to immediate.
3934   void cmplt(const PRegisterWithLaneSize& pd,
3935              const PRegisterZ& pg,
3936              const ZRegister& zn,
3937              int imm5);
3938 
3939   // Compare vector to 64-bit wide elements.
3940   void cmpne(const PRegisterWithLaneSize& pd,
3941              const PRegisterZ& pg,
3942              const ZRegister& zn,
3943              const ZRegister& zm);
3944 
3945   // Compare vector to immediate.
3946   void cmpne(const PRegisterWithLaneSize& pd,
3947              const PRegisterZ& pg,
3948              const ZRegister& zn,
3949              int imm5);
3950 
3951   // Logically invert boolean condition in vector (predicated).
3952   void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3953 
3954   // Count non-zero bits (predicated).
3955   void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3956 
3957   // Set scalar to multiple of predicate constraint element count.
3958   void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3959 
3960   // Set scalar to multiple of predicate constraint element count.
3961   void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3962 
3963   // Set scalar to multiple of predicate constraint element count.
3964   void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3965 
3966   // Set scalar to active predicate element count.
3967   void cntp(const Register& xd,
3968             const PRegister& pg,
3969             const PRegisterWithLaneSize& pn);
3970 
3971   // Set scalar to multiple of predicate constraint element count.
3972   void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3973 
3974   // Shuffle active elements of vector to the right and fill with zero.
3975   void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
3976 
3977   // Copy signed integer immediate to vector elements (predicated).
3978   void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
3979 
3980   // Copy general-purpose register to vector elements (predicated).
3981   void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
3982 
3983   // Copy SIMD&FP scalar register to vector elements (predicated).
3984   void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
3985 
3986   // Compare and terminate loop.
3987   void ctermeq(const Register& rn, const Register& rm);
3988 
3989   // Compare and terminate loop.
3990   void ctermne(const Register& rn, const Register& rm);
3991 
3992   // Decrement scalar by multiple of predicate constraint element count.
3993   void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
3994 
3995   // Decrement scalar by multiple of predicate constraint element count.
3996   void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
3997 
3998   // Decrement vector by multiple of predicate constraint element count.
3999   void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4000 
4001   // Decrement scalar by multiple of predicate constraint element count.
4002   void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4003 
4004   // Decrement vector by multiple of predicate constraint element count.
4005   void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4006 
4007   // Decrement scalar by active predicate element count.
4008   void decp(const Register& rdn, const PRegisterWithLaneSize& pg);
4009 
4010   // Decrement vector by active predicate element count.
4011   void decp(const ZRegister& zdn, const PRegister& pg);
4012 
4013   // Decrement scalar by multiple of predicate constraint element count.
4014   void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4015 
4016   // Decrement vector by multiple of predicate constraint element count.
4017   void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4018 
4019   // Broadcast general-purpose register to vector elements (unpredicated).
4020   void dup(const ZRegister& zd, const Register& xn);
4021 
4022   // Broadcast indexed element to vector (unpredicated).
4023   void dup(const ZRegister& zd, const ZRegister& zn, unsigned index);
4024 
4025   // As for movz/movk/movn, if the default shift of -1 is specified to dup, the
4026   // assembler will pick an appropriate immediate and left shift that is
4027   // equivalent to the immediate argument. If an explicit left shift is
4028   // specified (0 or 8), the immediate must be a signed 8-bit integer.
4029 
4030   // Broadcast signed immediate to vector elements (unpredicated).
4031   void dup(const ZRegister& zd, int imm8, int shift = -1);
4032 
4033   // Broadcast logical bitmask immediate to vector (unpredicated).
4034   void dupm(const ZRegister& zd, uint64_t imm);
4035 
4036   // Bitwise exclusive OR with inverted immediate (unpredicated).
4037   void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4038 
4039   // Bitwise exclusive OR predicates.
4040   void eor(const PRegisterWithLaneSize& pd,
4041            const PRegisterZ& pg,
4042            const PRegisterWithLaneSize& pn,
4043            const PRegisterWithLaneSize& pm);
4044 
4045   // Bitwise exclusive OR vectors (predicated).
4046   void eor(const ZRegister& zd,
4047            const PRegisterM& pg,
4048            const ZRegister& zn,
4049            const ZRegister& zm);
4050 
4051   // Bitwise exclusive OR with immediate (unpredicated).
4052   void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4053 
4054   // Bitwise exclusive OR vectors (unpredicated).
4055   void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4056 
4057   // Bitwise exclusive OR predicates.
4058   void eors(const PRegisterWithLaneSize& pd,
4059             const PRegisterZ& pg,
4060             const PRegisterWithLaneSize& pn,
4061             const PRegisterWithLaneSize& pm);
4062 
4063   // Bitwise XOR reduction to scalar.
4064   void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4065 
4066   // Extract vector from pair of vectors.
4067   void ext(const ZRegister& zd,
4068            const ZRegister& zn,
4069            const ZRegister& zm,
4070            unsigned offset);
4071 
4072   // Floating-point absolute difference (predicated).
4073   void fabd(const ZRegister& zd,
4074             const PRegisterM& pg,
4075             const ZRegister& zn,
4076             const ZRegister& zm);
4077 
4078   // Floating-point absolute value (predicated).
4079   void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4080 
4081   // Floating-point absolute compare vectors.
4082   void facge(const PRegisterWithLaneSize& pd,
4083              const PRegisterZ& pg,
4084              const ZRegister& zn,
4085              const ZRegister& zm);
4086 
4087   // Floating-point absolute compare vectors.
4088   void facgt(const PRegisterWithLaneSize& pd,
4089              const PRegisterZ& pg,
4090              const ZRegister& zn,
4091              const ZRegister& zm);
4092 
4093   // Floating-point add immediate (predicated).
4094   void fadd(const ZRegister& zd,
4095             const PRegisterM& pg,
4096             const ZRegister& zn,
4097             double imm);
4098 
4099   // Floating-point add vector (predicated).
4100   void fadd(const ZRegister& zd,
4101             const PRegisterM& pg,
4102             const ZRegister& zn,
4103             const ZRegister& zm);
4104 
4105   // Floating-point add vector (unpredicated).
4106   void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4107 
4108   // Floating-point add strictly-ordered reduction, accumulating in scalar.
4109   void fadda(const VRegister& vd,
4110              const PRegister& pg,
4111              const VRegister& vn,
4112              const ZRegister& zm);
4113 
4114   // Floating-point add recursive reduction to scalar.
4115   void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4116 
4117   // Floating-point complex add with rotate (predicated).
4118   void fcadd(const ZRegister& zd,
4119              const PRegisterM& pg,
4120              const ZRegister& zn,
4121              const ZRegister& zm,
4122              int rot);
4123 
4124   // Floating-point compare vector with zero.
4125   void fcmeq(const PRegisterWithLaneSize& pd,
4126              const PRegisterZ& pg,
4127              const ZRegister& zn,
4128              double zero);
4129 
4130   // Floating-point compare vectors.
4131   void fcmeq(const PRegisterWithLaneSize& pd,
4132              const PRegisterZ& pg,
4133              const ZRegister& zn,
4134              const ZRegister& zm);
4135 
4136   // Floating-point compare vector with zero.
4137   void fcmge(const PRegisterWithLaneSize& pd,
4138              const PRegisterZ& pg,
4139              const ZRegister& zn,
4140              double zero);
4141 
4142   // Floating-point compare vectors.
4143   void fcmge(const PRegisterWithLaneSize& pd,
4144              const PRegisterZ& pg,
4145              const ZRegister& zn,
4146              const ZRegister& zm);
4147 
4148   // Floating-point compare vector with zero.
4149   void fcmgt(const PRegisterWithLaneSize& pd,
4150              const PRegisterZ& pg,
4151              const ZRegister& zn,
4152              double zero);
4153 
4154   // Floating-point compare vectors.
4155   void fcmgt(const PRegisterWithLaneSize& pd,
4156              const PRegisterZ& pg,
4157              const ZRegister& zn,
4158              const ZRegister& zm);
4159 
4160   // Floating-point complex multiply-add with rotate (predicated).
4161   void fcmla(const ZRegister& zda,
4162              const PRegisterM& pg,
4163              const ZRegister& zn,
4164              const ZRegister& zm,
4165              int rot);
4166 
4167   // Floating-point complex multiply-add by indexed values with rotate.
4168   void fcmla(const ZRegister& zda,
4169              const ZRegister& zn,
4170              const ZRegister& zm,
4171              int index,
4172              int rot);
4173 
4174   // Floating-point compare vector with zero.
4175   void fcmle(const PRegisterWithLaneSize& pd,
4176              const PRegisterZ& pg,
4177              const ZRegister& zn,
4178              double zero);
4179 
4180   // Floating-point compare vector with zero.
4181   void fcmlt(const PRegisterWithLaneSize& pd,
4182              const PRegisterZ& pg,
4183              const ZRegister& zn,
4184              double zero);
4185 
4186   // Floating-point compare vector with zero.
4187   void fcmne(const PRegisterWithLaneSize& pd,
4188              const PRegisterZ& pg,
4189              const ZRegister& zn,
4190              double zero);
4191 
4192   // Floating-point compare vectors.
4193   void fcmne(const PRegisterWithLaneSize& pd,
4194              const PRegisterZ& pg,
4195              const ZRegister& zn,
4196              const ZRegister& zm);
4197 
4198   // Floating-point compare vectors.
4199   void fcmuo(const PRegisterWithLaneSize& pd,
4200              const PRegisterZ& pg,
4201              const ZRegister& zn,
4202              const ZRegister& zm);
4203 
4204   // Copy floating-point immediate to vector elements (predicated).
4205   void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm);
4206 
4207   // Copy half-precision floating-point immediate to vector elements
4208   // (predicated).
fcpy(const ZRegister & zd,const PRegisterM & pg,Float16 imm)4209   void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) {
4210     fcpy(zd, pg, FPToDouble(imm, kIgnoreDefaultNaN));
4211   }
4212 
4213   // Floating-point convert precision (predicated).
4214   void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4215 
4216   // Floating-point convert to signed integer, rounding toward zero
4217   // (predicated).
4218   void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4219 
4220   // Floating-point convert to unsigned integer, rounding toward zero
4221   // (predicated).
4222   void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4223 
4224   // Floating-point divide by vector (predicated).
4225   void fdiv(const ZRegister& zd,
4226             const PRegisterM& pg,
4227             const ZRegister& zn,
4228             const ZRegister& zm);
4229 
4230   // Floating-point reversed divide by vector (predicated).
4231   void fdivr(const ZRegister& zd,
4232              const PRegisterM& pg,
4233              const ZRegister& zn,
4234              const ZRegister& zm);
4235 
4236   // Broadcast floating-point immediate to vector elements.
4237   void fdup(const ZRegister& zd, double imm);
4238 
4239   // Broadcast half-precision floating-point immediate to vector elements.
fdup(const ZRegister & zd,Float16 imm)4240   void fdup(const ZRegister& zd, Float16 imm) {
4241     fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
4242   }
4243 
4244   // Floating-point exponential accelerator.
4245   void fexpa(const ZRegister& zd, const ZRegister& zn);
4246 
4247   // Floating-point fused multiply-add vectors (predicated), writing
4248   // multiplicand [Zdn = Za + Zdn * Zm].
4249   void fmad(const ZRegister& zdn,
4250             const PRegisterM& pg,
4251             const ZRegister& zm,
4252             const ZRegister& za);
4253 
4254   // Floating-point maximum with immediate (predicated).
4255   void fmax(const ZRegister& zd,
4256             const PRegisterM& pg,
4257             const ZRegister& zn,
4258             double imm);
4259 
4260   // Floating-point maximum (predicated).
4261   void fmax(const ZRegister& zd,
4262             const PRegisterM& pg,
4263             const ZRegister& zn,
4264             const ZRegister& zm);
4265 
4266   // Floating-point maximum number with immediate (predicated).
4267   void fmaxnm(const ZRegister& zd,
4268               const PRegisterM& pg,
4269               const ZRegister& zn,
4270               double imm);
4271 
4272   // Floating-point maximum number (predicated).
4273   void fmaxnm(const ZRegister& zd,
4274               const PRegisterM& pg,
4275               const ZRegister& zn,
4276               const ZRegister& zm);
4277 
4278   // Floating-point maximum number recursive reduction to scalar.
4279   void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4280 
4281   // Floating-point maximum recursive reduction to scalar.
4282   void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4283 
4284   // Floating-point minimum with immediate (predicated).
4285   void fmin(const ZRegister& zd,
4286             const PRegisterM& pg,
4287             const ZRegister& zn,
4288             double imm);
4289 
4290   // Floating-point minimum (predicated).
4291   void fmin(const ZRegister& zd,
4292             const PRegisterM& pg,
4293             const ZRegister& zn,
4294             const ZRegister& zm);
4295 
4296   // Floating-point minimum number with immediate (predicated).
4297   void fminnm(const ZRegister& zd,
4298               const PRegisterM& pg,
4299               const ZRegister& zn,
4300               double imm);
4301 
4302   // Floating-point minimum number (predicated).
4303   void fminnm(const ZRegister& zd,
4304               const PRegisterM& pg,
4305               const ZRegister& zn,
4306               const ZRegister& zm);
4307 
4308   // Floating-point minimum number recursive reduction to scalar.
4309   void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4310 
4311   // Floating-point minimum recursive reduction to scalar.
4312   void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4313 
4314   // Floating-point fused multiply-add vectors (predicated), writing addend
4315   // [Zda = Zda + Zn * Zm].
4316   void fmla(const ZRegister& zda,
4317             const PRegisterM& pg,
4318             const ZRegister& zn,
4319             const ZRegister& zm);
4320 
4321   // Floating-point fused multiply-add by indexed elements
4322   // (Zda = Zda + Zn * Zm[indexed]).
4323   void fmla(const ZRegister& zda,
4324             const ZRegister& zn,
4325             const ZRegister& zm,
4326             int index);
4327 
4328   // Floating-point fused multiply-subtract vectors (predicated), writing
4329   // addend [Zda = Zda + -Zn * Zm].
4330   void fmls(const ZRegister& zda,
4331             const PRegisterM& pg,
4332             const ZRegister& zn,
4333             const ZRegister& zm);
4334 
4335   // Floating-point fused multiply-subtract by indexed elements
4336   // (Zda = Zda + -Zn * Zm[indexed]).
4337   void fmls(const ZRegister& zda,
4338             const ZRegister& zn,
4339             const ZRegister& zm,
4340             int index);
4341 
4342   // Move 8-bit floating-point immediate to vector elements (unpredicated).
4343   void fmov(const ZRegister& zd, double imm);
4344 
4345   // Move 8-bit floating-point immediate to vector elements (predicated).
4346   void fmov(const ZRegister& zd, const PRegisterM& pg, double imm);
4347 
4348   // Floating-point fused multiply-subtract vectors (predicated), writing
4349   // multiplicand [Zdn = Za + -Zdn * Zm].
4350   void fmsb(const ZRegister& zdn,
4351             const PRegisterM& pg,
4352             const ZRegister& zm,
4353             const ZRegister& za);
4354 
4355   // Floating-point multiply by immediate (predicated).
4356   void fmul(const ZRegister& zd,
4357             const PRegisterM& pg,
4358             const ZRegister& zn,
4359             double imm);
4360 
4361   // Floating-point multiply vectors (predicated).
4362   void fmul(const ZRegister& zd,
4363             const PRegisterM& pg,
4364             const ZRegister& zn,
4365             const ZRegister& zm);
4366 
4367   // Floating-point multiply by indexed elements.
4368   void fmul(const ZRegister& zd,
4369             const ZRegister& zn,
4370             const ZRegister& zm,
4371             unsigned index);
4372 
4373   // Floating-point multiply vectors (unpredicated).
4374   void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4375 
4376   // Floating-point multiply-extended vectors (predicated).
4377   void fmulx(const ZRegister& zd,
4378              const PRegisterM& pg,
4379              const ZRegister& zn,
4380              const ZRegister& zm);
4381 
4382   // Floating-point negate (predicated).
4383   void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4384 
4385   // Floating-point negated fused multiply-add vectors (predicated), writing
4386   // multiplicand [Zdn = -Za + -Zdn * Zm].
4387   void fnmad(const ZRegister& zdn,
4388              const PRegisterM& pg,
4389              const ZRegister& zm,
4390              const ZRegister& za);
4391 
4392   // Floating-point negated fused multiply-add vectors (predicated), writing
4393   // addend [Zda = -Zda + -Zn * Zm].
4394   void fnmla(const ZRegister& zda,
4395              const PRegisterM& pg,
4396              const ZRegister& zn,
4397              const ZRegister& zm);
4398 
4399   // Floating-point negated fused multiply-subtract vectors (predicated),
4400   // writing addend [Zda = -Zda + Zn * Zm].
4401   void fnmls(const ZRegister& zda,
4402              const PRegisterM& pg,
4403              const ZRegister& zn,
4404              const ZRegister& zm);
4405 
4406   // Floating-point negated fused multiply-subtract vectors (predicated),
4407   // writing multiplicand [Zdn = -Za + Zdn * Zm].
4408   void fnmsb(const ZRegister& zdn,
4409              const PRegisterM& pg,
4410              const ZRegister& zm,
4411              const ZRegister& za);
4412 
4413   // Floating-point reciprocal estimate (unpredicated).
4414   void frecpe(const ZRegister& zd, const ZRegister& zn);
4415 
4416   // Floating-point reciprocal step (unpredicated).
4417   void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4418 
4419   // Floating-point reciprocal exponent (predicated).
4420   void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4421 
4422   // Floating-point round to integral value (predicated).
4423   void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4424 
4425   // Floating-point round to integral value (predicated).
4426   void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4427 
4428   // Floating-point round to integral value (predicated).
4429   void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4430 
4431   // Floating-point round to integral value (predicated).
4432   void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4433 
4434   // Floating-point round to integral value (predicated).
4435   void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4436 
4437   // Floating-point round to integral value (predicated).
4438   void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4439 
4440   // Floating-point round to integral value (predicated).
4441   void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4442 
4443   // Floating-point reciprocal square root estimate (unpredicated).
4444   void frsqrte(const ZRegister& zd, const ZRegister& zn);
4445 
4446   // Floating-point reciprocal square root step (unpredicated).
4447   void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4448 
4449   // Floating-point adjust exponent by vector (predicated).
4450   void fscale(const ZRegister& zd,
4451               const PRegisterM& pg,
4452               const ZRegister& zn,
4453               const ZRegister& zm);
4454 
4455   // Floating-point square root (predicated).
4456   void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4457 
4458   // Floating-point subtract immediate (predicated).
4459   void fsub(const ZRegister& zd,
4460             const PRegisterM& pg,
4461             const ZRegister& zn,
4462             double imm);
4463 
4464   // Floating-point subtract vectors (predicated).
4465   void fsub(const ZRegister& zd,
4466             const PRegisterM& pg,
4467             const ZRegister& zn,
4468             const ZRegister& zm);
4469 
4470   // Floating-point subtract vectors (unpredicated).
4471   void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4472 
4473   // Floating-point reversed subtract from immediate (predicated).
4474   void fsubr(const ZRegister& zd,
4475              const PRegisterM& pg,
4476              const ZRegister& zn,
4477              double imm);
4478 
4479   // Floating-point reversed subtract vectors (predicated).
4480   void fsubr(const ZRegister& zd,
4481              const PRegisterM& pg,
4482              const ZRegister& zn,
4483              const ZRegister& zm);
4484 
4485   // Floating-point trigonometric multiply-add coefficient.
4486   void ftmad(const ZRegister& zd,
4487              const ZRegister& zn,
4488              const ZRegister& zm,
4489              int imm3);
4490 
4491   // Floating-point trigonometric starting value.
4492   void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4493 
4494   // Floating-point trigonometric select coefficient.
4495   void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4496 
4497   // Increment scalar by multiple of predicate constraint element count.
4498   void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4499 
4500   // Increment scalar by multiple of predicate constraint element count.
4501   void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4502 
4503   // Increment vector by multiple of predicate constraint element count.
4504   void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4505 
4506   // Increment scalar by multiple of predicate constraint element count.
4507   void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4508 
4509   // Increment vector by multiple of predicate constraint element count.
4510   void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4511 
4512   // Increment scalar by active predicate element count.
4513   void incp(const Register& rdn, const PRegisterWithLaneSize& pg);
4514 
4515   // Increment vector by active predicate element count.
4516   void incp(const ZRegister& zdn, const PRegister& pg);
4517 
4518   // Increment scalar by multiple of predicate constraint element count.
4519   void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4520 
4521   // Increment vector by multiple of predicate constraint element count.
4522   void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4523 
4524   // Create index starting from and incremented by immediate.
4525   void index(const ZRegister& zd, int start, int step);
4526 
4527   // Create index starting from and incremented by general-purpose register.
4528   void index(const ZRegister& zd, const Register& rn, const Register& rm);
4529 
4530   // Create index starting from general-purpose register and incremented by
4531   // immediate.
4532   void index(const ZRegister& zd, const Register& rn, int imm5);
4533 
4534   // Create index starting from immediate and incremented by general-purpose
4535   // register.
4536   void index(const ZRegister& zd, int imm5, const Register& rm);
4537 
4538   // Insert general-purpose register in shifted vector.
4539   void insr(const ZRegister& zdn, const Register& rm);
4540 
4541   // Insert SIMD&FP scalar register in shifted vector.
4542   void insr(const ZRegister& zdn, const VRegister& vm);
4543 
4544   // Extract element after last to general-purpose register.
4545   void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn);
4546 
4547   // Extract element after last to SIMD&FP scalar register.
4548   void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4549 
4550   // Extract last element to general-purpose register.
4551   void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn);
4552 
4553   // Extract last element to SIMD&FP scalar register.
4554   void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4555 
4556   // Contiguous/gather load bytes to vector.
4557   void ld1b(const ZRegister& zt,
4558             const PRegisterZ& pg,
4559             const SVEMemOperand& addr);
4560 
4561   // Contiguous/gather load halfwords to vector.
4562   void ld1h(const ZRegister& zt,
4563             const PRegisterZ& pg,
4564             const SVEMemOperand& addr);
4565 
4566   // Contiguous/gather load words to vector.
4567   void ld1w(const ZRegister& zt,
4568             const PRegisterZ& pg,
4569             const SVEMemOperand& addr);
4570 
4571   // Contiguous/gather load doublewords to vector.
4572   void ld1d(const ZRegister& zt,
4573             const PRegisterZ& pg,
4574             const SVEMemOperand& addr);
4575 
4576   // TODO: Merge other loads into the SVEMemOperand versions.
4577 
4578   // Load and broadcast unsigned byte to vector.
4579   void ld1rb(const ZRegister& zt,
4580              const PRegisterZ& pg,
4581              const SVEMemOperand& addr);
4582 
4583   // Load and broadcast unsigned halfword to vector.
4584   void ld1rh(const ZRegister& zt,
4585              const PRegisterZ& pg,
4586              const SVEMemOperand& addr);
4587 
4588   // Load and broadcast unsigned word to vector.
4589   void ld1rw(const ZRegister& zt,
4590              const PRegisterZ& pg,
4591              const SVEMemOperand& addr);
4592 
4593   // Load and broadcast doubleword to vector.
4594   void ld1rd(const ZRegister& zt,
4595              const PRegisterZ& pg,
4596              const SVEMemOperand& addr);
4597 
4598   // Contiguous load and replicate sixteen bytes.
4599   void ld1rqb(const ZRegister& zt,
4600               const PRegisterZ& pg,
4601               const SVEMemOperand& addr);
4602 
4603   // Contiguous load and replicate eight halfwords.
4604   void ld1rqh(const ZRegister& zt,
4605               const PRegisterZ& pg,
4606               const SVEMemOperand& addr);
4607 
4608   // Contiguous load and replicate four words.
4609   void ld1rqw(const ZRegister& zt,
4610               const PRegisterZ& pg,
4611               const SVEMemOperand& addr);
4612 
4613   // Contiguous load and replicate two doublewords.
4614   void ld1rqd(const ZRegister& zt,
4615               const PRegisterZ& pg,
4616               const SVEMemOperand& addr);
4617 
4618   // Contiguous load and replicate thirty-two bytes.
4619   void ld1rob(const ZRegister& zt,
4620               const PRegisterZ& pg,
4621               const SVEMemOperand& addr);
4622 
4623   // Contiguous load and replicate sixteen halfwords.
4624   void ld1roh(const ZRegister& zt,
4625               const PRegisterZ& pg,
4626               const SVEMemOperand& addr);
4627 
4628   // Contiguous load and replicate eight words.
4629   void ld1row(const ZRegister& zt,
4630               const PRegisterZ& pg,
4631               const SVEMemOperand& addr);
4632 
4633   // Contiguous load and replicate four doublewords.
4634   void ld1rod(const ZRegister& zt,
4635               const PRegisterZ& pg,
4636               const SVEMemOperand& addr);
4637 
4638   // Load and broadcast signed byte to vector.
4639   void ld1rsb(const ZRegister& zt,
4640               const PRegisterZ& pg,
4641               const SVEMemOperand& addr);
4642 
4643   // Load and broadcast signed halfword to vector.
4644   void ld1rsh(const ZRegister& zt,
4645               const PRegisterZ& pg,
4646               const SVEMemOperand& addr);
4647 
4648   // Load and broadcast signed word to vector.
4649   void ld1rsw(const ZRegister& zt,
4650               const PRegisterZ& pg,
4651               const SVEMemOperand& addr);
4652 
4653   // Contiguous/gather load signed bytes to vector.
4654   void ld1sb(const ZRegister& zt,
4655              const PRegisterZ& pg,
4656              const SVEMemOperand& addr);
4657 
4658   // Contiguous/gather load signed halfwords to vector.
4659   void ld1sh(const ZRegister& zt,
4660              const PRegisterZ& pg,
4661              const SVEMemOperand& addr);
4662 
4663   // Contiguous/gather load signed words to vector.
4664   void ld1sw(const ZRegister& zt,
4665              const PRegisterZ& pg,
4666              const SVEMemOperand& addr);
4667 
4668   // TODO: Merge other loads into the SVEMemOperand versions.
4669 
4670   // Contiguous load two-byte structures to two vectors.
4671   void ld2b(const ZRegister& zt1,
4672             const ZRegister& zt2,
4673             const PRegisterZ& pg,
4674             const SVEMemOperand& addr);
4675 
4676   // Contiguous load two-halfword structures to two vectors.
4677   void ld2h(const ZRegister& zt1,
4678             const ZRegister& zt2,
4679             const PRegisterZ& pg,
4680             const SVEMemOperand& addr);
4681 
4682   // Contiguous load two-word structures to two vectors.
4683   void ld2w(const ZRegister& zt1,
4684             const ZRegister& zt2,
4685             const PRegisterZ& pg,
4686             const SVEMemOperand& addr);
4687 
4688   // Contiguous load two-doubleword structures to two vectors.
4689   void ld2d(const ZRegister& zt1,
4690             const ZRegister& zt2,
4691             const PRegisterZ& pg,
4692             const SVEMemOperand& addr);
4693 
4694   // Contiguous load three-byte structures to three vectors.
4695   void ld3b(const ZRegister& zt1,
4696             const ZRegister& zt2,
4697             const ZRegister& zt3,
4698             const PRegisterZ& pg,
4699             const SVEMemOperand& addr);
4700 
4701   // Contiguous load three-halfword structures to three vectors.
4702   void ld3h(const ZRegister& zt1,
4703             const ZRegister& zt2,
4704             const ZRegister& zt3,
4705             const PRegisterZ& pg,
4706             const SVEMemOperand& addr);
4707 
4708   // Contiguous load three-word structures to three vectors.
4709   void ld3w(const ZRegister& zt1,
4710             const ZRegister& zt2,
4711             const ZRegister& zt3,
4712             const PRegisterZ& pg,
4713             const SVEMemOperand& addr);
4714 
4715   // Contiguous load three-doubleword structures to three vectors.
4716   void ld3d(const ZRegister& zt1,
4717             const ZRegister& zt2,
4718             const ZRegister& zt3,
4719             const PRegisterZ& pg,
4720             const SVEMemOperand& addr);
4721 
4722   // Contiguous load four-byte structures to four vectors.
4723   void ld4b(const ZRegister& zt1,
4724             const ZRegister& zt2,
4725             const ZRegister& zt3,
4726             const ZRegister& zt4,
4727             const PRegisterZ& pg,
4728             const SVEMemOperand& addr);
4729 
4730   // Contiguous load four-halfword structures to four vectors.
4731   void ld4h(const ZRegister& zt1,
4732             const ZRegister& zt2,
4733             const ZRegister& zt3,
4734             const ZRegister& zt4,
4735             const PRegisterZ& pg,
4736             const SVEMemOperand& addr);
4737 
4738   // Contiguous load four-word structures to four vectors.
4739   void ld4w(const ZRegister& zt1,
4740             const ZRegister& zt2,
4741             const ZRegister& zt3,
4742             const ZRegister& zt4,
4743             const PRegisterZ& pg,
4744             const SVEMemOperand& addr);
4745 
4746   // Contiguous load four-doubleword structures to four vectors.
4747   void ld4d(const ZRegister& zt1,
4748             const ZRegister& zt2,
4749             const ZRegister& zt3,
4750             const ZRegister& zt4,
4751             const PRegisterZ& pg,
4752             const SVEMemOperand& addr);
4753 
4754   // Contiguous load first-fault unsigned bytes to vector.
4755   void ldff1b(const ZRegister& zt,
4756               const PRegisterZ& pg,
4757               const SVEMemOperand& addr);
4758 
4759   // Contiguous load first-fault unsigned halfwords to vector.
4760   void ldff1h(const ZRegister& zt,
4761               const PRegisterZ& pg,
4762               const SVEMemOperand& addr);
4763 
4764   // Contiguous load first-fault unsigned words to vector.
4765   void ldff1w(const ZRegister& zt,
4766               const PRegisterZ& pg,
4767               const SVEMemOperand& addr);
4768 
4769   // Contiguous load first-fault doublewords to vector.
4770   void ldff1d(const ZRegister& zt,
4771               const PRegisterZ& pg,
4772               const SVEMemOperand& addr);
4773 
4774   // Contiguous load first-fault signed bytes to vector.
4775   void ldff1sb(const ZRegister& zt,
4776                const PRegisterZ& pg,
4777                const SVEMemOperand& addr);
4778 
4779   // Contiguous load first-fault signed halfwords to vector.
4780   void ldff1sh(const ZRegister& zt,
4781                const PRegisterZ& pg,
4782                const SVEMemOperand& addr);
4783 
4784   // Contiguous load first-fault signed words to vector.
4785   void ldff1sw(const ZRegister& zt,
4786                const PRegisterZ& pg,
4787                const SVEMemOperand& addr);
4788 
4789   // Gather load first-fault unsigned bytes to vector.
4790   void ldff1b(const ZRegister& zt,
4791               const PRegisterZ& pg,
4792               const Register& xn,
4793               const ZRegister& zm);
4794 
4795   // Gather load first-fault unsigned bytes to vector (immediate index).
4796   void ldff1b(const ZRegister& zt,
4797               const PRegisterZ& pg,
4798               const ZRegister& zn,
4799               int imm5);
4800 
4801   // Gather load first-fault doublewords to vector (vector index).
4802   void ldff1d(const ZRegister& zt,
4803               const PRegisterZ& pg,
4804               const Register& xn,
4805               const ZRegister& zm);
4806 
4807   // Gather load first-fault doublewords to vector (immediate index).
4808   void ldff1d(const ZRegister& zt,
4809               const PRegisterZ& pg,
4810               const ZRegister& zn,
4811               int imm5);
4812 
4813   // Gather load first-fault unsigned halfwords to vector (vector index).
4814   void ldff1h(const ZRegister& zt,
4815               const PRegisterZ& pg,
4816               const Register& xn,
4817               const ZRegister& zm);
4818 
4819   // Gather load first-fault unsigned halfwords to vector (immediate index).
4820   void ldff1h(const ZRegister& zt,
4821               const PRegisterZ& pg,
4822               const ZRegister& zn,
4823               int imm5);
4824 
4825   // Gather load first-fault signed bytes to vector (vector index).
4826   void ldff1sb(const ZRegister& zt,
4827                const PRegisterZ& pg,
4828                const Register& xn,
4829                const ZRegister& zm);
4830 
4831   // Gather load first-fault signed bytes to vector (immediate index).
4832   void ldff1sb(const ZRegister& zt,
4833                const PRegisterZ& pg,
4834                const ZRegister& zn,
4835                int imm5);
4836 
4837   // Gather load first-fault signed halfwords to vector (vector index).
4838   void ldff1sh(const ZRegister& zt,
4839                const PRegisterZ& pg,
4840                const Register& xn,
4841                const ZRegister& zm);
4842 
4843   // Gather load first-fault signed halfwords to vector (immediate index).
4844   void ldff1sh(const ZRegister& zt,
4845                const PRegisterZ& pg,
4846                const ZRegister& zn,
4847                int imm5);
4848 
4849   // Gather load first-fault signed words to vector (vector index).
4850   void ldff1sw(const ZRegister& zt,
4851                const PRegisterZ& pg,
4852                const Register& xn,
4853                const ZRegister& zm);
4854 
4855   // Gather load first-fault signed words to vector (immediate index).
4856   void ldff1sw(const ZRegister& zt,
4857                const PRegisterZ& pg,
4858                const ZRegister& zn,
4859                int imm5);
4860 
4861   // Gather load first-fault unsigned words to vector (vector index).
4862   void ldff1w(const ZRegister& zt,
4863               const PRegisterZ& pg,
4864               const Register& xn,
4865               const ZRegister& zm);
4866 
4867   // Gather load first-fault unsigned words to vector (immediate index).
4868   void ldff1w(const ZRegister& zt,
4869               const PRegisterZ& pg,
4870               const ZRegister& zn,
4871               int imm5);
4872 
4873   // Contiguous load non-fault unsigned bytes to vector (immediate index).
4874   void ldnf1b(const ZRegister& zt,
4875               const PRegisterZ& pg,
4876               const SVEMemOperand& addr);
4877 
4878   // Contiguous load non-fault doublewords to vector (immediate index).
4879   void ldnf1d(const ZRegister& zt,
4880               const PRegisterZ& pg,
4881               const SVEMemOperand& addr);
4882 
4883   // Contiguous load non-fault unsigned halfwords to vector (immediate
4884   // index).
4885   void ldnf1h(const ZRegister& zt,
4886               const PRegisterZ& pg,
4887               const SVEMemOperand& addr);
4888 
4889   // Contiguous load non-fault signed bytes to vector (immediate index).
4890   void ldnf1sb(const ZRegister& zt,
4891                const PRegisterZ& pg,
4892                const SVEMemOperand& addr);
4893 
4894   // Contiguous load non-fault signed halfwords to vector (immediate index).
4895   void ldnf1sh(const ZRegister& zt,
4896                const PRegisterZ& pg,
4897                const SVEMemOperand& addr);
4898 
4899   // Contiguous load non-fault signed words to vector (immediate index).
4900   void ldnf1sw(const ZRegister& zt,
4901                const PRegisterZ& pg,
4902                const SVEMemOperand& addr);
4903 
4904   // Contiguous load non-fault unsigned words to vector (immediate index).
4905   void ldnf1w(const ZRegister& zt,
4906               const PRegisterZ& pg,
4907               const SVEMemOperand& addr);
4908 
4909   // Contiguous load non-temporal bytes to vector.
4910   void ldnt1b(const ZRegister& zt,
4911               const PRegisterZ& pg,
4912               const SVEMemOperand& addr);
4913 
4914   // Contiguous load non-temporal halfwords to vector.
4915   void ldnt1h(const ZRegister& zt,
4916               const PRegisterZ& pg,
4917               const SVEMemOperand& addr);
4918 
4919   // Contiguous load non-temporal words to vector.
4920   void ldnt1w(const ZRegister& zt,
4921               const PRegisterZ& pg,
4922               const SVEMemOperand& addr);
4923 
4924   // Contiguous load non-temporal doublewords to vector.
4925   void ldnt1d(const ZRegister& zt,
4926               const PRegisterZ& pg,
4927               const SVEMemOperand& addr);
4928 
4929   // Load SVE predicate/vector register.
4930   void ldr(const CPURegister& rt, const SVEMemOperand& addr);
4931 
4932   // Logical shift left by immediate (predicated).
4933   void lsl(const ZRegister& zd,
4934            const PRegisterM& pg,
4935            const ZRegister& zn,
4936            int shift);
4937 
4938   // Logical shift left by 64-bit wide elements (predicated).
4939   void lsl(const ZRegister& zd,
4940            const PRegisterM& pg,
4941            const ZRegister& zn,
4942            const ZRegister& zm);
4943 
4944   // Logical shift left by immediate (unpredicated).
4945   void lsl(const ZRegister& zd, const ZRegister& zn, int shift);
4946 
4947   // Logical shift left by 64-bit wide elements (unpredicated).
4948   void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4949 
4950   // Reversed logical shift left by vector (predicated).
4951   void lslr(const ZRegister& zd,
4952             const PRegisterM& pg,
4953             const ZRegister& zn,
4954             const ZRegister& zm);
4955 
4956   // Logical shift right by immediate (predicated).
4957   void lsr(const ZRegister& zd,
4958            const PRegisterM& pg,
4959            const ZRegister& zn,
4960            int shift);
4961 
4962   // Logical shift right by 64-bit wide elements (predicated).
4963   void lsr(const ZRegister& zd,
4964            const PRegisterM& pg,
4965            const ZRegister& zn,
4966            const ZRegister& zm);
4967 
4968   // Logical shift right by immediate (unpredicated).
4969   void lsr(const ZRegister& zd, const ZRegister& zn, int shift);
4970 
4971   // Logical shift right by 64-bit wide elements (unpredicated).
4972   void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4973 
4974   // Reversed logical shift right by vector (predicated).
4975   void lsrr(const ZRegister& zd,
4976             const PRegisterM& pg,
4977             const ZRegister& zn,
4978             const ZRegister& zm);
4979 
4980   // Bitwise invert predicate.
4981   void not_(const PRegisterWithLaneSize& pd,
4982             const PRegisterZ& pg,
4983             const PRegisterWithLaneSize& pn);
4984 
4985   // Bitwise invert predicate, setting the condition flags.
4986   void nots(const PRegisterWithLaneSize& pd,
4987             const PRegisterZ& pg,
4988             const PRegisterWithLaneSize& pn);
4989 
4990   // Multiply-add vectors (predicated), writing multiplicand
4991   // [Zdn = Za + Zdn * Zm].
4992   void mad(const ZRegister& zdn,
4993            const PRegisterM& pg,
4994            const ZRegister& zm,
4995            const ZRegister& za);
4996 
4997   // Multiply-add vectors (predicated), writing addend
4998   // [Zda = Zda + Zn * Zm].
4999   void mla(const ZRegister& zda,
5000            const PRegisterM& pg,
5001            const ZRegister& zn,
5002            const ZRegister& zm);
5003 
5004   // Multiply-subtract vectors (predicated), writing addend
5005   // [Zda = Zda - Zn * Zm].
5006   void mls(const ZRegister& zda,
5007            const PRegisterM& pg,
5008            const ZRegister& zn,
5009            const ZRegister& zm);
5010 
5011   // Move predicates (unpredicated)
5012   void mov(const PRegister& pd, const PRegister& pn);
5013 
5014   // Move predicates (merging)
5015   void mov(const PRegisterWithLaneSize& pd,
5016            const PRegisterM& pg,
5017            const PRegisterWithLaneSize& pn);
5018 
5019   // Move predicates (zeroing)
5020   void mov(const PRegisterWithLaneSize& pd,
5021            const PRegisterZ& pg,
5022            const PRegisterWithLaneSize& pn);
5023 
5024   // Move general-purpose register to vector elements (unpredicated)
5025   void mov(const ZRegister& zd, const Register& xn);
5026 
5027   // Move SIMD&FP scalar register to vector elements (unpredicated)
5028   void mov(const ZRegister& zd, const VRegister& vn);
5029 
5030   // Move vector register (unpredicated)
5031   void mov(const ZRegister& zd, const ZRegister& zn);
5032 
5033   // Move indexed element to vector elements (unpredicated)
5034   void mov(const ZRegister& zd, const ZRegister& zn, unsigned index);
5035 
5036   // Move general-purpose register to vector elements (predicated)
5037   void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
5038 
5039   // Move SIMD&FP scalar register to vector elements (predicated)
5040   void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
5041 
5042   // Move vector elements (predicated)
5043   void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5044 
5045   // Move signed integer immediate to vector elements (predicated)
5046   void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
5047 
5048   // Move signed immediate to vector elements (unpredicated).
5049   void mov(const ZRegister& zd, int imm8, int shift);
5050 
5051   // Move logical bitmask immediate to vector (unpredicated).
5052   void mov(const ZRegister& zd, uint64_t imm);
5053 
5054   // Move predicate (unpredicated), setting the condition flags
5055   void movs(const PRegister& pd, const PRegister& pn);
5056 
5057   // Move predicates (zeroing), setting the condition flags
5058   void movs(const PRegisterWithLaneSize& pd,
5059             const PRegisterZ& pg,
5060             const PRegisterWithLaneSize& pn);
5061 
5062   // Move prefix (predicated).
5063   void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
5064 
5065   // Move prefix (unpredicated).
5066   void movprfx(const ZRegister& zd, const ZRegister& zn);
5067 
5068   // Multiply-subtract vectors (predicated), writing multiplicand
5069   // [Zdn = Za - Zdn * Zm].
5070   void msb(const ZRegister& zdn,
5071            const PRegisterM& pg,
5072            const ZRegister& zm,
5073            const ZRegister& za);
5074 
5075   // Multiply vectors (predicated).
5076   void mul(const ZRegister& zd,
5077            const PRegisterM& pg,
5078            const ZRegister& zn,
5079            const ZRegister& zm);
5080 
5081   // Multiply by immediate (unpredicated).
5082   void mul(const ZRegister& zd, const ZRegister& zn, int imm8);
5083 
5084   // Bitwise NAND predicates.
5085   void nand(const PRegisterWithLaneSize& pd,
5086             const PRegisterZ& pg,
5087             const PRegisterWithLaneSize& pn,
5088             const PRegisterWithLaneSize& pm);
5089 
5090   // Bitwise NAND predicates.
5091   void nands(const PRegisterWithLaneSize& pd,
5092              const PRegisterZ& pg,
5093              const PRegisterWithLaneSize& pn,
5094              const PRegisterWithLaneSize& pm);
5095 
5096   // Negate (predicated).
5097   void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5098 
5099   // Bitwise NOR predicates.
5100   void nor(const PRegisterWithLaneSize& pd,
5101            const PRegisterZ& pg,
5102            const PRegisterWithLaneSize& pn,
5103            const PRegisterWithLaneSize& pm);
5104 
5105   // Bitwise NOR predicates.
5106   void nors(const PRegisterWithLaneSize& pd,
5107             const PRegisterZ& pg,
5108             const PRegisterWithLaneSize& pn,
5109             const PRegisterWithLaneSize& pm);
5110 
5111   // Bitwise invert vector (predicated).
5112   void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5113 
5114   // Bitwise OR inverted predicate.
5115   void orn(const PRegisterWithLaneSize& pd,
5116            const PRegisterZ& pg,
5117            const PRegisterWithLaneSize& pn,
5118            const PRegisterWithLaneSize& pm);
5119 
5120   // Bitwise OR inverted predicate.
5121   void orns(const PRegisterWithLaneSize& pd,
5122             const PRegisterZ& pg,
5123             const PRegisterWithLaneSize& pn,
5124             const PRegisterWithLaneSize& pm);
5125 
5126   // Bitwise OR with inverted immediate (unpredicated).
5127   void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5128 
5129   // Bitwise OR predicate.
5130   void orr(const PRegisterWithLaneSize& pd,
5131            const PRegisterZ& pg,
5132            const PRegisterWithLaneSize& pn,
5133            const PRegisterWithLaneSize& pm);
5134 
5135   // Bitwise OR vectors (predicated).
5136   void orr(const ZRegister& zd,
5137            const PRegisterM& pg,
5138            const ZRegister& zn,
5139            const ZRegister& zm);
5140 
5141   // Bitwise OR with immediate (unpredicated).
5142   void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5143 
5144   // Bitwise OR vectors (unpredicated).
5145   void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5146 
5147   // Bitwise OR predicate.
5148   void orrs(const PRegisterWithLaneSize& pd,
5149             const PRegisterZ& pg,
5150             const PRegisterWithLaneSize& pn,
5151             const PRegisterWithLaneSize& pm);
5152 
5153   // Bitwise OR reduction to scalar.
5154   void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5155 
5156   // Set all predicate elements to false.
5157   void pfalse(const PRegisterWithLaneSize& pd);
5158 
5159   // Set the first active predicate element to true.
5160   void pfirst(const PRegisterWithLaneSize& pd,
5161               const PRegister& pg,
5162               const PRegisterWithLaneSize& pn);
5163 
5164   // Find next active predicate.
5165   void pnext(const PRegisterWithLaneSize& pd,
5166              const PRegister& pg,
5167              const PRegisterWithLaneSize& pn);
5168 
5169   // Prefetch bytes.
5170   void prfb(PrefetchOperation prfop,
5171             const PRegister& pg,
5172             const SVEMemOperand& addr);
5173 
5174   // Prefetch halfwords.
5175   void prfh(PrefetchOperation prfop,
5176             const PRegister& pg,
5177             const SVEMemOperand& addr);
5178 
5179   // Prefetch words.
5180   void prfw(PrefetchOperation prfop,
5181             const PRegister& pg,
5182             const SVEMemOperand& addr);
5183 
5184   // Prefetch doublewords.
5185   void prfd(PrefetchOperation prfop,
5186             const PRegister& pg,
5187             const SVEMemOperand& addr);
5188 
5189   // Set condition flags for predicate.
5190   void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn);
5191 
5192   // Initialise predicate from named constraint.
5193   void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5194 
5195   // Initialise predicate from named constraint.
5196   void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5197 
5198   // Unpack and widen half of predicate.
5199   void punpkhi(const PRegisterWithLaneSize& pd,
5200                const PRegisterWithLaneSize& pn);
5201 
5202   // Unpack and widen half of predicate.
5203   void punpklo(const PRegisterWithLaneSize& pd,
5204                const PRegisterWithLaneSize& pn);
5205 
5206   // Reverse bits (predicated).
5207   void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5208 
5209   // Read the first-fault register.
5210   void rdffr(const PRegisterWithLaneSize& pd);
5211 
5212   // Return predicate of succesfully loaded elements.
5213   void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5214 
5215   // Return predicate of succesfully loaded elements.
5216   void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5217 
5218   // Read multiple of vector register size to scalar register.
5219   void rdvl(const Register& xd, int imm6);
5220 
5221   // Reverse all elements in a predicate.
5222   void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn);
5223 
5224   // Reverse all elements in a vector (unpredicated).
5225   void rev(const ZRegister& zd, const ZRegister& zn);
5226 
5227   // Reverse bytes / halfwords / words within elements (predicated).
5228   void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5229 
5230   // Reverse bytes / halfwords / words within elements (predicated).
5231   void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5232 
5233   // Reverse bytes / halfwords / words within elements (predicated).
5234   void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5235 
5236   // Signed absolute difference (predicated).
5237   void sabd(const ZRegister& zd,
5238             const PRegisterM& pg,
5239             const ZRegister& zn,
5240             const ZRegister& zm);
5241 
5242   // Signed add reduction to scalar.
5243   void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5244 
5245   // Signed integer convert to floating-point (predicated).
5246   void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5247 
5248   // Signed divide (predicated).
5249   void sdiv(const ZRegister& zd,
5250             const PRegisterM& pg,
5251             const ZRegister& zn,
5252             const ZRegister& zm);
5253 
5254   // Signed reversed divide (predicated).
5255   void sdivr(const ZRegister& zd,
5256              const PRegisterM& pg,
5257              const ZRegister& zn,
5258              const ZRegister& zm);
5259 
5260   // Signed dot product by indexed quadtuplet.
5261   void sdot(const ZRegister& zda,
5262             const ZRegister& zn,
5263             const ZRegister& zm,
5264             int index);
5265 
5266   // Signed dot product.
5267   void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5268 
5269   // Conditionally select elements from two predicates.
5270   void sel(const PRegisterWithLaneSize& pd,
5271            const PRegister& pg,
5272            const PRegisterWithLaneSize& pn,
5273            const PRegisterWithLaneSize& pm);
5274 
5275   // Conditionally select elements from two vectors.
5276   void sel(const ZRegister& zd,
5277            const PRegister& pg,
5278            const ZRegister& zn,
5279            const ZRegister& zm);
5280 
5281   // Initialise the first-fault register to all true.
5282   void setffr();
5283 
5284   // Signed maximum vectors (predicated).
5285   void smax(const ZRegister& zd,
5286             const PRegisterM& pg,
5287             const ZRegister& zn,
5288             const ZRegister& zm);
5289 
5290   // Signed maximum with immediate (unpredicated).
5291   void smax(const ZRegister& zd, const ZRegister& zn, int imm8);
5292 
5293   // Signed maximum reduction to scalar.
5294   void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5295 
5296   // Signed minimum vectors (predicated).
5297   void smin(const ZRegister& zd,
5298             const PRegisterM& pg,
5299             const ZRegister& zn,
5300             const ZRegister& zm);
5301 
5302   // Signed minimum with immediate (unpredicated).
5303   void smin(const ZRegister& zd, const ZRegister& zn, int imm8);
5304 
5305   // Signed minimum reduction to scalar.
5306   void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5307 
5308   // Signed multiply returning high half (predicated).
5309   void smulh(const ZRegister& zd,
5310              const PRegisterM& pg,
5311              const ZRegister& zn,
5312              const ZRegister& zm);
5313 
5314   // Splice two vectors under predicate control.
5315   void splice(const ZRegister& zd,
5316               const PRegister& pg,
5317               const ZRegister& zn,
5318               const ZRegister& zm);
5319 
5320   // Splice two vectors under predicate control (constructive).
5321   void splice_con(const ZRegister& zd,
5322                   const PRegister& pg,
5323                   const ZRegister& zn,
5324                   const ZRegister& zm);
5325 
5326   // Signed saturating add vectors (unpredicated).
5327   void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5328 
5329   // Signed saturating add immediate (unpredicated).
5330   void sqadd(const ZRegister& zd,
5331              const ZRegister& zn,
5332              int imm8,
5333              int shift = -1);
5334 
5335   // Signed saturating decrement scalar by multiple of 8-bit predicate
5336   // constraint element count.
5337   void sqdecb(const Register& xd,
5338               const Register& wn,
5339               int pattern,
5340               int multiplier);
5341 
5342   // Signed saturating decrement scalar by multiple of 8-bit predicate
5343   // constraint element count.
5344   void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5345 
5346   // Signed saturating decrement scalar by multiple of 64-bit predicate
5347   // constraint element count.
5348   void sqdecd(const Register& xd,
5349               const Register& wn,
5350               int pattern = SVE_ALL,
5351               int multiplier = 1);
5352 
5353   // Signed saturating decrement scalar by multiple of 64-bit predicate
5354   // constraint element count.
5355   void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5356 
5357   // Signed saturating decrement vector by multiple of 64-bit predicate
5358   // constraint element count.
5359   void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5360 
5361   // Signed saturating decrement scalar by multiple of 16-bit predicate
5362   // constraint element count.
5363   void sqdech(const Register& xd,
5364               const Register& wn,
5365               int pattern = SVE_ALL,
5366               int multiplier = 1);
5367 
5368   // Signed saturating decrement scalar by multiple of 16-bit predicate
5369   // constraint element count.
5370   void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5371 
5372   // Signed saturating decrement vector by multiple of 16-bit predicate
5373   // constraint element count.
5374   void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5375 
5376   // Signed saturating decrement scalar by active predicate element count.
5377   void sqdecp(const Register& xd,
5378               const PRegisterWithLaneSize& pg,
5379               const Register& wn);
5380 
5381   // Signed saturating decrement scalar by active predicate element count.
5382   void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg);
5383 
5384   // Signed saturating decrement vector by active predicate element count.
5385   void sqdecp(const ZRegister& zdn, const PRegister& pg);
5386 
5387   // Signed saturating decrement scalar by multiple of 32-bit predicate
5388   // constraint element count.
5389   void sqdecw(const Register& xd,
5390               const Register& wn,
5391               int pattern = SVE_ALL,
5392               int multiplier = 1);
5393 
5394   // Signed saturating decrement scalar by multiple of 32-bit predicate
5395   // constraint element count.
5396   void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5397 
5398   // Signed saturating decrement vector by multiple of 32-bit predicate
5399   // constraint element count.
5400   void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5401 
5402   // Signed saturating increment scalar by multiple of 8-bit predicate
5403   // constraint element count.
5404   void sqincb(const Register& xd,
5405               const Register& wn,
5406               int pattern = SVE_ALL,
5407               int multiplier = 1);
5408 
5409   // Signed saturating increment scalar by multiple of 8-bit predicate
5410   // constraint element count.
5411   void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5412 
5413   // Signed saturating increment scalar by multiple of 64-bit predicate
5414   // constraint element count.
5415   void sqincd(const Register& xd,
5416               const Register& wn,
5417               int pattern,
5418               int multiplier);
5419 
5420   // Signed saturating increment scalar by multiple of 64-bit predicate
5421   // constraint element count.
5422   void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5423 
5424   // Signed saturating increment vector by multiple of 64-bit predicate
5425   // constraint element count.
5426   void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5427 
5428   // Signed saturating increment scalar by multiple of 16-bit predicate
5429   // constraint element count.
5430   void sqinch(const Register& xd,
5431               const Register& wn,
5432               int pattern = SVE_ALL,
5433               int multiplier = 1);
5434 
5435   // Signed saturating increment scalar by multiple of 16-bit predicate
5436   // constraint element count.
5437   void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5438 
5439   // Signed saturating increment vector by multiple of 16-bit predicate
5440   // constraint element count.
5441   void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5442 
5443   // Signed saturating increment scalar by active predicate element count.
5444   void sqincp(const Register& xd,
5445               const PRegisterWithLaneSize& pg,
5446               const Register& wn);
5447 
5448   // Signed saturating increment scalar by active predicate element count.
5449   void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg);
5450 
5451   // Signed saturating increment vector by active predicate element count.
5452   void sqincp(const ZRegister& zdn, const PRegister& pg);
5453 
5454   // Signed saturating increment scalar by multiple of 32-bit predicate
5455   // constraint element count.
5456   void sqincw(const Register& xd,
5457               const Register& wn,
5458               int pattern = SVE_ALL,
5459               int multiplier = 1);
5460 
5461   // Signed saturating increment scalar by multiple of 32-bit predicate
5462   // constraint element count.
5463   void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5464 
5465   // Signed saturating increment vector by multiple of 32-bit predicate
5466   // constraint element count.
5467   void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5468 
5469   // Signed saturating subtract vectors (unpredicated).
5470   void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5471 
5472   // Signed saturating subtract immediate (unpredicated).
5473   void sqsub(const ZRegister& zd,
5474              const ZRegister& zn,
5475              int imm8,
5476              int shift = -1);
5477 
5478   // Contiguous/scatter store bytes from vector.
5479   void st1b(const ZRegister& zt,
5480             const PRegister& pg,
5481             const SVEMemOperand& addr);
5482 
5483   // Contiguous/scatter store halfwords from vector.
5484   void st1h(const ZRegister& zt,
5485             const PRegister& pg,
5486             const SVEMemOperand& addr);
5487 
5488   // Contiguous/scatter store words from vector.
5489   void st1w(const ZRegister& zt,
5490             const PRegister& pg,
5491             const SVEMemOperand& addr);
5492 
5493   // Contiguous/scatter store doublewords from vector.
5494   void st1d(const ZRegister& zt,
5495             const PRegister& pg,
5496             const SVEMemOperand& addr);
5497 
5498   // Contiguous store two-byte structures from two vectors.
5499   void st2b(const ZRegister& zt1,
5500             const ZRegister& zt2,
5501             const PRegister& pg,
5502             const SVEMemOperand& addr);
5503 
5504   // Contiguous store two-halfword structures from two vectors.
5505   void st2h(const ZRegister& zt1,
5506             const ZRegister& zt2,
5507             const PRegister& pg,
5508             const SVEMemOperand& addr);
5509 
5510   // Contiguous store two-word structures from two vectors.
5511   void st2w(const ZRegister& zt1,
5512             const ZRegister& zt2,
5513             const PRegister& pg,
5514             const SVEMemOperand& addr);
5515 
5516   // Contiguous store two-doubleword structures from two vectors,
5517   void st2d(const ZRegister& zt1,
5518             const ZRegister& zt2,
5519             const PRegister& pg,
5520             const SVEMemOperand& addr);
5521 
5522   // Contiguous store three-byte structures from three vectors.
5523   void st3b(const ZRegister& zt1,
5524             const ZRegister& zt2,
5525             const ZRegister& zt3,
5526             const PRegister& pg,
5527             const SVEMemOperand& addr);
5528 
5529   // Contiguous store three-halfword structures from three vectors.
5530   void st3h(const ZRegister& zt1,
5531             const ZRegister& zt2,
5532             const ZRegister& zt3,
5533             const PRegister& pg,
5534             const SVEMemOperand& addr);
5535 
5536   // Contiguous store three-word structures from three vectors.
5537   void st3w(const ZRegister& zt1,
5538             const ZRegister& zt2,
5539             const ZRegister& zt3,
5540             const PRegister& pg,
5541             const SVEMemOperand& addr);
5542 
5543   // Contiguous store three-doubleword structures from three vectors.
5544   void st3d(const ZRegister& zt1,
5545             const ZRegister& zt2,
5546             const ZRegister& zt3,
5547             const PRegister& pg,
5548             const SVEMemOperand& addr);
5549 
5550   // Contiguous store four-byte structures from four vectors.
5551   void st4b(const ZRegister& zt1,
5552             const ZRegister& zt2,
5553             const ZRegister& zt3,
5554             const ZRegister& zt4,
5555             const PRegister& pg,
5556             const SVEMemOperand& addr);
5557 
5558   // Contiguous store four-halfword structures from four vectors.
5559   void st4h(const ZRegister& zt1,
5560             const ZRegister& zt2,
5561             const ZRegister& zt3,
5562             const ZRegister& zt4,
5563             const PRegister& pg,
5564             const SVEMemOperand& addr);
5565 
5566   // Contiguous store four-word structures from four vectors.
5567   void st4w(const ZRegister& zt1,
5568             const ZRegister& zt2,
5569             const ZRegister& zt3,
5570             const ZRegister& zt4,
5571             const PRegister& pg,
5572             const SVEMemOperand& addr);
5573 
5574   // Contiguous store four-doubleword structures from four vectors.
5575   void st4d(const ZRegister& zt1,
5576             const ZRegister& zt2,
5577             const ZRegister& zt3,
5578             const ZRegister& zt4,
5579             const PRegister& pg,
5580             const SVEMemOperand& addr);
5581 
5582   // Contiguous store non-temporal bytes from vector.
5583   void stnt1b(const ZRegister& zt,
5584               const PRegister& pg,
5585               const SVEMemOperand& addr);
5586 
5587   // Contiguous store non-temporal halfwords from vector.
5588   void stnt1h(const ZRegister& zt,
5589               const PRegister& pg,
5590               const SVEMemOperand& addr);
5591 
5592   // Contiguous store non-temporal words from vector.
5593   void stnt1w(const ZRegister& zt,
5594               const PRegister& pg,
5595               const SVEMemOperand& addr);
5596 
5597   // Contiguous store non-temporal doublewords from vector.
5598   void stnt1d(const ZRegister& zt,
5599               const PRegister& pg,
5600               const SVEMemOperand& addr);
5601 
5602   // Store SVE predicate/vector register.
5603   void str(const CPURegister& rt, const SVEMemOperand& addr);
5604 
5605   // Subtract vectors (predicated).
5606   void sub(const ZRegister& zd,
5607            const PRegisterM& pg,
5608            const ZRegister& zn,
5609            const ZRegister& zm);
5610 
5611   // Subtract vectors (unpredicated).
5612   void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5613 
5614   // Subtract immediate (unpredicated).
5615   void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5616 
5617   // Reversed subtract vectors (predicated).
5618   void subr(const ZRegister& zd,
5619             const PRegisterM& pg,
5620             const ZRegister& zn,
5621             const ZRegister& zm);
5622 
5623   // Reversed subtract from immediate (unpredicated).
5624   void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5625 
5626   // Signed unpack and extend half of vector.
5627   void sunpkhi(const ZRegister& zd, const ZRegister& zn);
5628 
5629   // Signed unpack and extend half of vector.
5630   void sunpklo(const ZRegister& zd, const ZRegister& zn);
5631 
5632   // Signed byte extend (predicated).
5633   void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5634 
5635   // Signed halfword extend (predicated).
5636   void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5637 
5638   // Signed word extend (predicated).
5639   void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5640 
5641   // Programmable table lookup/permute using vector of indices into a
5642   // vector.
5643   void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5644 
5645   // Interleave even or odd elements from two predicates.
5646   void trn1(const PRegisterWithLaneSize& pd,
5647             const PRegisterWithLaneSize& pn,
5648             const PRegisterWithLaneSize& pm);
5649 
5650   // Interleave even or odd elements from two vectors.
5651   void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5652 
5653   // Interleave even or odd elements from two predicates.
5654   void trn2(const PRegisterWithLaneSize& pd,
5655             const PRegisterWithLaneSize& pn,
5656             const PRegisterWithLaneSize& pm);
5657 
5658   // Interleave even or odd elements from two vectors.
5659   void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5660 
5661   // Unsigned absolute difference (predicated).
5662   void uabd(const ZRegister& zd,
5663             const PRegisterM& pg,
5664             const ZRegister& zn,
5665             const ZRegister& zm);
5666 
5667   // Unsigned add reduction to scalar.
5668   void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5669 
5670   // Unsigned integer convert to floating-point (predicated).
5671   void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5672 
5673   // Unsigned divide (predicated).
5674   void udiv(const ZRegister& zd,
5675             const PRegisterM& pg,
5676             const ZRegister& zn,
5677             const ZRegister& zm);
5678 
5679   // Unsigned reversed divide (predicated).
5680   void udivr(const ZRegister& zd,
5681              const PRegisterM& pg,
5682              const ZRegister& zn,
5683              const ZRegister& zm);
5684 
5685   // Unsigned dot product by indexed quadtuplet.
5686   void udot(const ZRegister& zda,
5687             const ZRegister& zn,
5688             const ZRegister& zm,
5689             int index);
5690 
5691   // Unsigned dot product.
5692   void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5693 
5694   // Unsigned maximum vectors (predicated).
5695   void umax(const ZRegister& zd,
5696             const PRegisterM& pg,
5697             const ZRegister& zn,
5698             const ZRegister& zm);
5699 
5700   // Unsigned maximum with immediate (unpredicated).
5701   void umax(const ZRegister& zd, const ZRegister& zn, int imm8);
5702 
5703   // Unsigned maximum reduction to scalar.
5704   void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5705 
5706   // Unsigned minimum vectors (predicated).
5707   void umin(const ZRegister& zd,
5708             const PRegisterM& pg,
5709             const ZRegister& zn,
5710             const ZRegister& zm);
5711 
5712   // Unsigned minimum with immediate (unpredicated).
5713   void umin(const ZRegister& zd, const ZRegister& zn, int imm8);
5714 
5715   // Unsigned minimum reduction to scalar.
5716   void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5717 
5718   // Unsigned multiply returning high half (predicated).
5719   void umulh(const ZRegister& zd,
5720              const PRegisterM& pg,
5721              const ZRegister& zn,
5722              const ZRegister& zm);
5723 
5724   // Unsigned saturating add vectors (unpredicated).
5725   void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5726 
5727   // Unsigned saturating add immediate (unpredicated).
5728   void uqadd(const ZRegister& zd,
5729              const ZRegister& zn,
5730              int imm8,
5731              int shift = -1);
5732 
5733   // Unsigned saturating decrement scalar by multiple of 8-bit predicate
5734   // constraint element count.
5735   void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5736 
5737   // Unsigned saturating decrement scalar by multiple of 64-bit predicate
5738   // constraint element count.
5739   void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5740 
5741   // Unsigned saturating decrement vector by multiple of 64-bit predicate
5742   // constraint element count.
5743   void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5744 
5745   // Unsigned saturating decrement scalar by multiple of 16-bit predicate
5746   // constraint element count.
5747   void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5748 
5749   // Unsigned saturating decrement vector by multiple of 16-bit predicate
5750   // constraint element count.
5751   void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5752 
5753   // Unsigned saturating decrement scalar by active predicate element count.
5754   void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg);
5755 
5756   // Unsigned saturating decrement vector by active predicate element count.
5757   void uqdecp(const ZRegister& zdn, const PRegister& pg);
5758 
5759   // Unsigned saturating decrement scalar by multiple of 32-bit predicate
5760   // constraint element count.
5761   void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5762 
5763   // Unsigned saturating decrement vector by multiple of 32-bit predicate
5764   // constraint element count.
5765   void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5766 
5767   // Unsigned saturating increment scalar by multiple of 8-bit predicate
5768   // constraint element count.
5769   void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5770 
5771   // Unsigned saturating increment scalar by multiple of 64-bit predicate
5772   // constraint element count.
5773   void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5774 
5775   // Unsigned saturating increment vector by multiple of 64-bit predicate
5776   // constraint element count.
5777   void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5778 
5779   // Unsigned saturating increment scalar by multiple of 16-bit predicate
5780   // constraint element count.
5781   void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5782 
5783   // Unsigned saturating increment vector by multiple of 16-bit predicate
5784   // constraint element count.
5785   void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5786 
5787   // Unsigned saturating increment scalar by active predicate element count.
5788   void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg);
5789 
5790   // Unsigned saturating increment vector by active predicate element count.
5791   void uqincp(const ZRegister& zdn, const PRegister& pg);
5792 
5793   // Unsigned saturating increment scalar by multiple of 32-bit predicate
5794   // constraint element count.
5795   void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5796 
5797   // Unsigned saturating increment vector by multiple of 32-bit predicate
5798   // constraint element count.
5799   void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5800 
5801   // Unsigned saturating subtract vectors (unpredicated).
5802   void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5803 
5804   // Unsigned saturating subtract immediate (unpredicated).
5805   void uqsub(const ZRegister& zd,
5806              const ZRegister& zn,
5807              int imm8,
5808              int shift = -1);
5809 
5810   // Unsigned unpack and extend half of vector.
5811   void uunpkhi(const ZRegister& zd, const ZRegister& zn);
5812 
5813   // Unsigned unpack and extend half of vector.
5814   void uunpklo(const ZRegister& zd, const ZRegister& zn);
5815 
5816   // Unsigned byte extend (predicated).
5817   void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5818 
5819   // Unsigned halfword extend (predicated).
5820   void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5821 
5822   // Unsigned word extend (predicated).
5823   void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5824 
5825   // Concatenate even or odd elements from two predicates.
5826   void uzp1(const PRegisterWithLaneSize& pd,
5827             const PRegisterWithLaneSize& pn,
5828             const PRegisterWithLaneSize& pm);
5829 
5830   // Concatenate even or odd elements from two vectors.
5831   void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5832 
5833   // Concatenate even or odd elements from two predicates.
5834   void uzp2(const PRegisterWithLaneSize& pd,
5835             const PRegisterWithLaneSize& pn,
5836             const PRegisterWithLaneSize& pm);
5837 
5838   // Concatenate even or odd elements from two vectors.
5839   void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5840 
5841   // While incrementing signed scalar less than or equal to scalar.
5842   void whilele(const PRegisterWithLaneSize& pd,
5843                const Register& rn,
5844                const Register& rm);
5845 
5846   // While incrementing unsigned scalar lower than scalar.
5847   void whilelo(const PRegisterWithLaneSize& pd,
5848                const Register& rn,
5849                const Register& rm);
5850 
5851   // While incrementing unsigned scalar lower or same as scalar.
5852   void whilels(const PRegisterWithLaneSize& pd,
5853                const Register& rn,
5854                const Register& rm);
5855 
5856   // While incrementing signed scalar less than scalar.
5857   void whilelt(const PRegisterWithLaneSize& pd,
5858                const Register& rn,
5859                const Register& rm);
5860 
5861   // Write the first-fault register.
5862   void wrffr(const PRegisterWithLaneSize& pn);
5863 
5864   // Interleave elements from two half predicates.
5865   void zip1(const PRegisterWithLaneSize& pd,
5866             const PRegisterWithLaneSize& pn,
5867             const PRegisterWithLaneSize& pm);
5868 
5869   // Interleave elements from two half vectors.
5870   void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5871 
5872   // Interleave elements from two half predicates.
5873   void zip2(const PRegisterWithLaneSize& pd,
5874             const PRegisterWithLaneSize& pn,
5875             const PRegisterWithLaneSize& pm);
5876 
5877   // Interleave elements from two half vectors.
5878   void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5879 
5880   // Add with carry long (bottom).
5881   void adclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5882 
5883   // Add with carry long (top).
5884   void adclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5885 
5886   // Add narrow high part (bottom).
5887   void addhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5888 
5889   // Add narrow high part (top).
5890   void addhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5891 
5892   // Add pairwise.
5893   void addp(const ZRegister& zd,
5894             const PRegisterM& pg,
5895             const ZRegister& zn,
5896             const ZRegister& zm);
5897 
5898   // Bitwise clear and exclusive OR.
5899   void bcax(const ZRegister& zd,
5900             const ZRegister& zn,
5901             const ZRegister& zm,
5902             const ZRegister& zk);
5903 
5904   // Scatter lower bits into positions selected by bitmask.
5905   void bdep(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5906 
5907   // Gather lower bits from positions selected by bitmask.
5908   void bext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5909 
5910   // Group bits to right or left as selected by bitmask.
5911   void bgrp(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5912 
5913   // Bitwise select.
5914   void bsl(const ZRegister& zd,
5915            const ZRegister& zn,
5916            const ZRegister& zm,
5917            const ZRegister& zk);
5918 
5919   // Bitwise select with first input inverted.
5920   void bsl1n(const ZRegister& zd,
5921              const ZRegister& zn,
5922              const ZRegister& zm,
5923              const ZRegister& zk);
5924 
5925   // Bitwise select with second input inverted.
5926   void bsl2n(const ZRegister& zd,
5927              const ZRegister& zn,
5928              const ZRegister& zm,
5929              const ZRegister& zk);
5930 
5931   // Complex integer add with rotate.
5932   void cadd(const ZRegister& zd,
5933             const ZRegister& zn,
5934             const ZRegister& zm,
5935             int rot);
5936 
5937   // Complex integer dot product (indexed).
5938   void cdot(const ZRegister& zda,
5939             const ZRegister& zn,
5940             const ZRegister& zm,
5941             int index,
5942             int rot);
5943 
5944   // Complex integer dot product.
5945   void cdot(const ZRegister& zda,
5946             const ZRegister& zn,
5947             const ZRegister& zm,
5948             int rot);
5949 
5950   // Complex integer multiply-add with rotate (indexed).
5951   void cmla(const ZRegister& zda,
5952             const ZRegister& zn,
5953             const ZRegister& zm,
5954             int index,
5955             int rot);
5956 
5957   // Complex integer multiply-add with rotate.
5958   void cmla(const ZRegister& zda,
5959             const ZRegister& zn,
5960             const ZRegister& zm,
5961             int rot);
5962 
5963   // Bitwise exclusive OR of three vectors.
5964   void eor3(const ZRegister& zd,
5965             const ZRegister& zn,
5966             const ZRegister& zm,
5967             const ZRegister& zk);
5968 
5969   // Interleaving exclusive OR (bottom, top).
5970   void eorbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5971 
5972   // Interleaving exclusive OR (top, bottom).
5973   void eortb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5974 
5975   // Floating-point add pairwise.
5976   void faddp(const ZRegister& zd,
5977              const PRegisterM& pg,
5978              const ZRegister& zn,
5979              const ZRegister& zm);
5980 
5981   // Floating-point up convert long (top, predicated).
5982   void fcvtlt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5983 
5984   // Floating-point down convert and narrow (top, predicated).
5985   void fcvtnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5986 
5987   // Floating-point down convert, rounding to odd (predicated).
5988   void fcvtx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5989 
5990   // Floating-point down convert, rounding to odd (top, predicated).
5991   void fcvtxnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5992 
5993   // Floating-point base 2 logarithm as integer.
5994   void flogb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5995 
5996   // Floating-point maximum number pairwise.
5997   void fmaxnmp(const ZRegister& zd,
5998                const PRegisterM& pg,
5999                const ZRegister& zn,
6000                const ZRegister& zm);
6001 
6002   // Floating-point maximum pairwise.
6003   void fmaxp(const ZRegister& zd,
6004              const PRegisterM& pg,
6005              const ZRegister& zn,
6006              const ZRegister& zm);
6007 
6008   // Floating-point minimum number pairwise.
6009   void fminnmp(const ZRegister& zd,
6010                const PRegisterM& pg,
6011                const ZRegister& zn,
6012                const ZRegister& zm);
6013 
6014   // Floating-point minimum pairwise.
6015   void fminp(const ZRegister& zd,
6016              const PRegisterM& pg,
6017              const ZRegister& zn,
6018              const ZRegister& zm);
6019 
6020   // Half-precision floating-point multiply-add long to single-precision
6021   // (bottom).
6022   void fmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6023 
6024   // Half-precision floating-point multiply-add long to single-precision
6025   // (top).
6026   void fmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6027 
6028   // Half-precision floating-point multiply-subtract long from
6029   // single-precision (bottom).
6030   void fmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6031 
6032   // Half-precision floating-point multiply-subtract long from
6033   // single-precision (top, indexed).
6034   void fmlslt(const ZRegister& zda,
6035               const ZRegister& zn,
6036               const ZRegister& zm,
6037               int index);
6038 
6039   // Half-precision floating-point multiply-add long to single-precision
6040   // (bottom, indexed).
6041   void fmlalb(const ZRegister& zda,
6042               const ZRegister& zn,
6043               const ZRegister& zm,
6044               int index);
6045 
6046   // Half-precision floating-point multiply-add long to single-precision
6047   // (top, indexed).
6048   void fmlalt(const ZRegister& zda,
6049               const ZRegister& zn,
6050               const ZRegister& zm,
6051               int index);
6052 
6053   // Half-precision floating-point multiply-subtract long from
6054   // single-precision (bottom, indexed).
6055   void fmlslb(const ZRegister& zda,
6056               const ZRegister& zn,
6057               const ZRegister& zm,
6058               int index);
6059 
6060   // Half-precision floating-point multiply-subtract long from
6061   // single-precision (top).
6062   void fmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6063 
6064   // Count matching elements in vector.
6065   void histcnt(const ZRegister& zd,
6066                const PRegisterZ& pg,
6067                const ZRegister& zn,
6068                const ZRegister& zm);
6069 
6070   // Count matching elements in vector segments.
6071   void histseg(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6072 
6073   // Gather load non-temporal signed bytes.
6074   void ldnt1sb(const ZRegister& zt,
6075                const PRegisterZ& pg,
6076                const SVEMemOperand& addr);
6077 
6078   // Gather load non-temporal signed halfwords.
6079   void ldnt1sh(const ZRegister& zt,
6080                const PRegisterZ& pg,
6081                const SVEMemOperand& addr);
6082 
6083   // Gather load non-temporal signed words.
6084   void ldnt1sw(const ZRegister& zt,
6085                const PRegisterZ& pg,
6086                const SVEMemOperand& addr);
6087 
6088   // Detect any matching elements, setting the condition flags.
6089   void match(const PRegisterWithLaneSize& pd,
6090              const PRegisterZ& pg,
6091              const ZRegister& zn,
6092              const ZRegister& zm);
6093 
6094   // Multiply-add to accumulator (indexed).
6095   void mla(const ZRegister& zda,
6096            const ZRegister& zn,
6097            const ZRegister& zm,
6098            int index);
6099 
6100   // Multiply-subtract from accumulator (indexed).
6101   void mls(const ZRegister& zda,
6102            const ZRegister& zn,
6103            const ZRegister& zm,
6104            int index);
6105 
6106   // Multiply (indexed).
6107   void mul(const ZRegister& zd,
6108            const ZRegister& zn,
6109            const ZRegister& zm,
6110            int index);
6111 
6112   // Multiply vectors (unpredicated).
6113   void mul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6114 
6115   // Bitwise inverted select.
6116   void nbsl(const ZRegister& zd,
6117             const ZRegister& zn,
6118             const ZRegister& zm,
6119             const ZRegister& zk);
6120 
6121   // Detect no matching elements, setting the condition flags.
6122   void nmatch(const PRegisterWithLaneSize& pd,
6123               const PRegisterZ& pg,
6124               const ZRegister& zn,
6125               const ZRegister& zm);
6126 
6127   // Polynomial multiply vectors (unpredicated).
6128   void pmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6129 
6130   // Polynomial multiply long (bottom).
6131   void pmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6132 
6133   // Polynomial multiply long (top).
6134   void pmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6135 
6136   // Rounding add narrow high part (bottom).
6137   void raddhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6138 
6139   // Rounding add narrow high part (top).
6140   void raddhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6141 
6142   // Rounding shift right narrow by immediate (bottom).
6143   void rshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6144 
6145   // Rounding shift right narrow by immediate (top).
6146   void rshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6147 
6148   // Rounding subtract narrow high part (bottom).
6149   void rsubhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6150 
6151   // Rounding subtract narrow high part (top).
6152   void rsubhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6153 
6154   // Signed absolute difference and accumulate.
6155   void saba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6156 
6157   // Signed absolute difference and accumulate long (bottom).
6158   void sabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6159 
6160   // Signed absolute difference and accumulate long (top).
6161   void sabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6162 
6163   // Signed absolute difference long (bottom).
6164   void sabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6165 
6166   // Signed absolute difference long (top).
6167   void sabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6168 
6169   // Signed add and accumulate long pairwise.
6170   void sadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
6171 
6172   // Signed add long (bottom).
6173   void saddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6174 
6175   // Signed add long (bottom + top).
6176   void saddlbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6177 
6178   // Signed add long (top).
6179   void saddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6180 
6181   // Signed add wide (bottom).
6182   void saddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6183 
6184   // Signed add wide (top).
6185   void saddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6186 
6187   // Subtract with carry long (bottom).
6188   void sbclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6189 
6190   // Subtract with carry long (top).
6191   void sbclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6192 
6193   // Signed halving addition.
6194   void shadd(const ZRegister& zd,
6195              const PRegisterM& pg,
6196              const ZRegister& zn,
6197              const ZRegister& zm);
6198 
6199   // Shift right narrow by immediate (bottom).
6200   void shrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6201 
6202   // Shift right narrow by immediate (top).
6203   void shrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6204 
6205   // Signed halving subtract.
6206   void shsub(const ZRegister& zd,
6207              const PRegisterM& pg,
6208              const ZRegister& zn,
6209              const ZRegister& zm);
6210 
6211   // Signed halving subtract reversed vectors.
6212   void shsubr(const ZRegister& zd,
6213               const PRegisterM& pg,
6214               const ZRegister& zn,
6215               const ZRegister& zm);
6216 
6217   // Shift left and insert (immediate).
6218   void sli(const ZRegister& zd, const ZRegister& zn, int shift);
6219 
6220   // Signed maximum pairwise.
6221   void smaxp(const ZRegister& zd,
6222              const PRegisterM& pg,
6223              const ZRegister& zn,
6224              const ZRegister& zm);
6225 
6226   // Signed minimum pairwise.
6227   void sminp(const ZRegister& zd,
6228              const PRegisterM& pg,
6229              const ZRegister& zn,
6230              const ZRegister& zm);
6231 
6232   // Signed multiply-add long to accumulator (bottom, indexed).
6233   void smlalb(const ZRegister& zda,
6234               const ZRegister& zn,
6235               const ZRegister& zm,
6236               int index);
6237 
6238   // Signed multiply-add long to accumulator (bottom).
6239   void smlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6240 
6241   // Signed multiply-add long to accumulator (top, indexed).
6242   void smlalt(const ZRegister& zda,
6243               const ZRegister& zn,
6244               const ZRegister& zm,
6245               int index);
6246 
6247   // Signed multiply-add long to accumulator (top).
6248   void smlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6249 
6250   // Signed multiply-subtract long from accumulator (bottom, indexed).
6251   void smlslb(const ZRegister& zda,
6252               const ZRegister& zn,
6253               const ZRegister& zm,
6254               int index);
6255 
6256   // Signed multiply-subtract long from accumulator (bottom).
6257   void smlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6258 
6259   // Signed multiply-subtract long from accumulator (top, indexed).
6260   void smlslt(const ZRegister& zda,
6261               const ZRegister& zn,
6262               const ZRegister& zm,
6263               int index);
6264 
6265   // Signed multiply-subtract long from accumulator (top).
6266   void smlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6267 
6268   // Signed multiply returning high half (unpredicated).
6269   void smulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6270 
6271   // Signed multiply long (bottom, indexed).
6272   void smullb(const ZRegister& zd,
6273               const ZRegister& zn,
6274               const ZRegister& zm,
6275               int index);
6276 
6277   // Signed multiply long (bottom).
6278   void smullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6279 
6280   // Signed multiply long (top, indexed).
6281   void smullt(const ZRegister& zd,
6282               const ZRegister& zn,
6283               const ZRegister& zm,
6284               int index);
6285 
6286   // Signed multiply long (top).
6287   void smullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6288 
6289   // Signed saturating absolute value.
6290   void sqabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6291 
6292   // Signed saturating addition (predicated).
6293   void sqadd(const ZRegister& zd,
6294              const PRegisterM& pg,
6295              const ZRegister& zn,
6296              const ZRegister& zm);
6297 
6298   // Saturating complex integer add with rotate.
6299   void sqcadd(const ZRegister& zd,
6300               const ZRegister& zn,
6301               const ZRegister& zm,
6302               int rot);
6303 
6304   // Signed saturating doubling multiply-add long to accumulator (bottom,
6305   // indexed).
6306   void sqdmlalb(const ZRegister& zda,
6307                 const ZRegister& zn,
6308                 const ZRegister& zm,
6309                 int index);
6310 
6311   // Signed saturating doubling multiply-add long to accumulator (bottom).
6312   void sqdmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6313 
6314   // Signed saturating doubling multiply-add long to accumulator (bottom x
6315   // top).
6316   void sqdmlalbt(const ZRegister& zda,
6317                  const ZRegister& zn,
6318                  const ZRegister& zm);
6319 
6320   // Signed saturating doubling multiply-add long to accumulator (top,
6321   // indexed).
6322   void sqdmlalt(const ZRegister& zda,
6323                 const ZRegister& zn,
6324                 const ZRegister& zm,
6325                 int index);
6326 
6327   // Signed saturating doubling multiply-add long to accumulator (top).
6328   void sqdmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6329 
6330   // Signed saturating doubling multiply-subtract long from accumulator
6331   // (bottom, indexed).
6332   void sqdmlslb(const ZRegister& zda,
6333                 const ZRegister& zn,
6334                 const ZRegister& zm,
6335                 int index);
6336 
6337   // Signed saturating doubling multiply-subtract long from accumulator
6338   // (bottom).
6339   void sqdmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6340 
6341   // Signed saturating doubling multiply-subtract long from accumulator
6342   // (bottom x top).
6343   void sqdmlslbt(const ZRegister& zda,
6344                  const ZRegister& zn,
6345                  const ZRegister& zm);
6346 
6347   // Signed saturating doubling multiply-subtract long from accumulator
6348   // (top, indexed).
6349   void sqdmlslt(const ZRegister& zda,
6350                 const ZRegister& zn,
6351                 const ZRegister& zm,
6352                 int index);
6353 
6354   // Signed saturating doubling multiply-subtract long from accumulator
6355   // (top).
6356   void sqdmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6357 
6358   // Signed saturating doubling multiply high (indexed).
6359   void sqdmulh(const ZRegister& zd,
6360                const ZRegister& zn,
6361                const ZRegister& zm,
6362                int index);
6363 
6364   // Signed saturating doubling multiply high (unpredicated).
6365   void sqdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6366 
6367   // Signed saturating doubling multiply long (bottom, indexed).
6368   void sqdmullb(const ZRegister& zd,
6369                 const ZRegister& zn,
6370                 const ZRegister& zm,
6371                 int index);
6372 
6373   // Signed saturating doubling multiply long (bottom).
6374   void sqdmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6375 
6376   // Signed saturating doubling multiply long (top, indexed).
6377   void sqdmullt(const ZRegister& zd,
6378                 const ZRegister& zn,
6379                 const ZRegister& zm,
6380                 int index);
6381 
6382   // Signed saturating doubling multiply long (top).
6383   void sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6384 
6385   // Signed saturating negate.
6386   void sqneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6387 
6388   // Saturating rounding doubling complex integer multiply-add high with
6389   // rotate (indexed).
6390   void sqrdcmlah(const ZRegister& zda,
6391                  const ZRegister& zn,
6392                  const ZRegister& zm,
6393                  int index,
6394                  int rot);
6395 
6396   // Saturating rounding doubling complex integer multiply-add high with
6397   // rotate.
6398   void sqrdcmlah(const ZRegister& zda,
6399                  const ZRegister& zn,
6400                  const ZRegister& zm,
6401                  int rot);
6402 
6403   // Signed saturating rounding doubling multiply-add high to accumulator
6404   // (indexed).
6405   void sqrdmlah(const ZRegister& zda,
6406                 const ZRegister& zn,
6407                 const ZRegister& zm,
6408                 int index);
6409 
6410   // Signed saturating rounding doubling multiply-add high to accumulator
6411   // (unpredicated).
6412   void sqrdmlah(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6413 
6414   // Signed saturating rounding doubling multiply-subtract high from
6415   // accumulator (indexed).
6416   void sqrdmlsh(const ZRegister& zda,
6417                 const ZRegister& zn,
6418                 const ZRegister& zm,
6419                 int index);
6420 
6421   // Signed saturating rounding doubling multiply-subtract high from
6422   // accumulator (unpredicated).
6423   void sqrdmlsh(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6424 
6425   // Signed saturating rounding doubling multiply high (indexed).
6426   void sqrdmulh(const ZRegister& zd,
6427                 const ZRegister& zn,
6428                 const ZRegister& zm,
6429                 int index);
6430 
6431   // Signed saturating rounding doubling multiply high (unpredicated).
6432   void sqrdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6433 
6434   // Signed saturating rounding shift left by vector (predicated).
6435   void sqrshl(const ZRegister& zd,
6436               const PRegisterM& pg,
6437               const ZRegister& zn,
6438               const ZRegister& zm);
6439 
6440   // Signed saturating rounding shift left reversed vectors (predicated).
6441   void sqrshlr(const ZRegister& zd,
6442                const PRegisterM& pg,
6443                const ZRegister& zn,
6444                const ZRegister& zm);
6445 
6446   // Signed saturating rounding shift right narrow by immediate (bottom).
6447   void sqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6448 
6449   // Signed saturating rounding shift right narrow by immediate (top).
6450   void sqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6451 
6452   // Signed saturating rounding shift right unsigned narrow by immediate
6453   // (bottom).
6454   void sqrshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
6455 
6456   // Signed saturating rounding shift right unsigned narrow by immediate
6457   // (top).
6458   void sqrshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
6459 
6460   // Signed saturating shift left by immediate.
6461   void sqshl(const ZRegister& zd,
6462              const PRegisterM& pg,
6463              const ZRegister& zn,
6464              int shift);
6465 
6466   // Signed saturating shift left by vector (predicated).
6467   void sqshl(const ZRegister& zd,
6468              const PRegisterM& pg,
6469              const ZRegister& zn,
6470              const ZRegister& zm);
6471 
6472   // Signed saturating shift left reversed vectors (predicated).
6473   void sqshlr(const ZRegister& zd,
6474               const PRegisterM& pg,
6475               const ZRegister& zn,
6476               const ZRegister& zm);
6477 
6478   // Signed saturating shift left unsigned by immediate.
6479   void sqshlu(const ZRegister& zd,
6480               const PRegisterM& pg,
6481               const ZRegister& zn,
6482               int shift);
6483 
6484   // Signed saturating shift right narrow by immediate (bottom).
6485   void sqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6486 
6487   // Signed saturating shift right narrow by immediate (top).
6488   void sqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6489 
6490   // Signed saturating shift right unsigned narrow by immediate (bottom).
6491   void sqshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
6492 
6493   // Signed saturating shift right unsigned narrow by immediate (top).
6494   void sqshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
6495 
6496   // Signed saturating subtraction (predicated).
6497   void sqsub(const ZRegister& zd,
6498              const PRegisterM& pg,
6499              const ZRegister& zn,
6500              const ZRegister& zm);
6501 
6502   // Signed saturating subtraction reversed vectors (predicated).
6503   void sqsubr(const ZRegister& zd,
6504               const PRegisterM& pg,
6505               const ZRegister& zn,
6506               const ZRegister& zm);
6507 
6508   // Signed saturating extract narrow (bottom).
6509   void sqxtnb(const ZRegister& zd, const ZRegister& zn);
6510 
6511   // Signed saturating extract narrow (top).
6512   void sqxtnt(const ZRegister& zd, const ZRegister& zn);
6513 
6514   // Signed saturating unsigned extract narrow (bottom).
6515   void sqxtunb(const ZRegister& zd, const ZRegister& zn);
6516 
6517   // Signed saturating unsigned extract narrow (top).
6518   void sqxtunt(const ZRegister& zd, const ZRegister& zn);
6519 
6520   // Signed rounding halving addition.
6521   void srhadd(const ZRegister& zd,
6522               const PRegisterM& pg,
6523               const ZRegister& zn,
6524               const ZRegister& zm);
6525 
6526   // Shift right and insert (immediate).
6527   void sri(const ZRegister& zd, const ZRegister& zn, int shift);
6528 
6529   // Signed rounding shift left by vector (predicated).
6530   void srshl(const ZRegister& zd,
6531              const PRegisterM& pg,
6532              const ZRegister& zn,
6533              const ZRegister& zm);
6534 
6535   // Signed rounding shift left reversed vectors (predicated).
6536   void srshlr(const ZRegister& zd,
6537               const PRegisterM& pg,
6538               const ZRegister& zn,
6539               const ZRegister& zm);
6540 
6541   // Signed rounding shift right by immediate.
6542   void srshr(const ZRegister& zd,
6543              const PRegisterM& pg,
6544              const ZRegister& zn,
6545              int shift);
6546 
6547   // Signed rounding shift right and accumulate (immediate).
6548   void srsra(const ZRegister& zda, const ZRegister& zn, int shift);
6549 
6550   // Signed shift left long by immediate (bottom).
6551   void sshllb(const ZRegister& zd, const ZRegister& zn, int shift);
6552 
6553   // Signed shift left long by immediate (top).
6554   void sshllt(const ZRegister& zd, const ZRegister& zn, int shift);
6555 
6556   // Signed shift right and accumulate (immediate).
6557   void ssra(const ZRegister& zda, const ZRegister& zn, int shift);
6558 
6559   // Signed subtract long (bottom).
6560   void ssublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6561 
6562   // Signed subtract long (bottom - top).
6563   void ssublbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6564 
6565   // Signed subtract long (top).
6566   void ssublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6567 
6568   // Signed subtract long (top - bottom).
6569   void ssubltb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6570 
6571   // Signed subtract wide (bottom).
6572   void ssubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6573 
6574   // Signed subtract wide (top).
6575   void ssubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6576 
6577   // Subtract narrow high part (bottom).
6578   void subhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6579 
6580   // Subtract narrow high part (top).
6581   void subhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6582 
6583   // Signed saturating addition of unsigned value.
6584   void suqadd(const ZRegister& zd,
6585               const PRegisterM& pg,
6586               const ZRegister& zn,
6587               const ZRegister& zm);
6588 
6589   // Programmable table lookup in one or two vector table (zeroing).
6590   void tbl(const ZRegister& zd,
6591            const ZRegister& zn1,
6592            const ZRegister& zn2,
6593            const ZRegister& zm);
6594 
6595   // Programmable table lookup in single vector table (merging).
6596   void tbx(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6597 
6598   // Unsigned absolute difference and accumulate.
6599   void uaba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6600 
6601   // Unsigned absolute difference and accumulate long (bottom).
6602   void uabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6603 
6604   // Unsigned absolute difference and accumulate long (top).
6605   void uabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6606 
6607   // Unsigned absolute difference long (bottom).
6608   void uabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6609 
6610   // Unsigned absolute difference long (top).
6611   void uabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6612 
6613   // Unsigned add and accumulate long pairwise.
6614   void uadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
6615 
6616   // Unsigned add long (bottom).
6617   void uaddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6618 
6619   // Unsigned add long (top).
6620   void uaddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6621 
6622   // Unsigned add wide (bottom).
6623   void uaddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6624 
6625   // Unsigned add wide (top).
6626   void uaddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6627 
6628   // Unsigned halving addition.
6629   void uhadd(const ZRegister& zd,
6630              const PRegisterM& pg,
6631              const ZRegister& zn,
6632              const ZRegister& zm);
6633 
6634   // Unsigned halving subtract.
6635   void uhsub(const ZRegister& zd,
6636              const PRegisterM& pg,
6637              const ZRegister& zn,
6638              const ZRegister& zm);
6639 
6640   // Unsigned halving subtract reversed vectors.
6641   void uhsubr(const ZRegister& zd,
6642               const PRegisterM& pg,
6643               const ZRegister& zn,
6644               const ZRegister& zm);
6645 
6646   // Unsigned maximum pairwise.
6647   void umaxp(const ZRegister& zd,
6648              const PRegisterM& pg,
6649              const ZRegister& zn,
6650              const ZRegister& zm);
6651 
6652   // Unsigned minimum pairwise.
6653   void uminp(const ZRegister& zd,
6654              const PRegisterM& pg,
6655              const ZRegister& zn,
6656              const ZRegister& zm);
6657 
6658   // Unsigned multiply-add long to accumulator (bottom, indexed).
6659   void umlalb(const ZRegister& zda,
6660               const ZRegister& zn,
6661               const ZRegister& zm,
6662               int index);
6663 
6664   // Unsigned multiply-add long to accumulator (bottom).
6665   void umlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6666 
6667   // Unsigned multiply-add long to accumulator (top, indexed).
6668   void umlalt(const ZRegister& zda,
6669               const ZRegister& zn,
6670               const ZRegister& zm,
6671               int index);
6672 
6673   // Unsigned multiply-add long to accumulator (top).
6674   void umlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6675 
6676   // Unsigned multiply-subtract long from accumulator (bottom, indexed).
6677   void umlslb(const ZRegister& zda,
6678               const ZRegister& zn,
6679               const ZRegister& zm,
6680               int index);
6681 
6682   // Unsigned multiply-subtract long from accumulator (bottom).
6683   void umlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6684 
6685   // Unsigned multiply-subtract long from accumulator (top, indexed).
6686   void umlslt(const ZRegister& zda,
6687               const ZRegister& zn,
6688               const ZRegister& zm,
6689               int index);
6690 
6691   // Unsigned multiply-subtract long from accumulator (top).
6692   void umlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6693 
6694   // Unsigned multiply returning high half (unpredicated).
6695   void umulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6696 
6697   // Unsigned multiply long (bottom, indexed).
6698   void umullb(const ZRegister& zd,
6699               const ZRegister& zn,
6700               const ZRegister& zm,
6701               int index);
6702 
6703   // Unsigned multiply long (bottom).
6704   void umullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6705 
6706   // Unsigned multiply long (top, indexed).
6707   void umullt(const ZRegister& zd,
6708               const ZRegister& zn,
6709               const ZRegister& zm,
6710               int index);
6711 
6712   // Unsigned multiply long (top).
6713   void umullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6714 
6715   // Unsigned saturating addition (predicated).
6716   void uqadd(const ZRegister& zd,
6717              const PRegisterM& pg,
6718              const ZRegister& zn,
6719              const ZRegister& zm);
6720 
6721   // Unsigned saturating rounding shift left by vector (predicated).
6722   void uqrshl(const ZRegister& zd,
6723               const PRegisterM& pg,
6724               const ZRegister& zn,
6725               const ZRegister& zm);
6726 
6727   // Unsigned saturating rounding shift left reversed vectors (predicated).
6728   void uqrshlr(const ZRegister& zd,
6729                const PRegisterM& pg,
6730                const ZRegister& zn,
6731                const ZRegister& zm);
6732 
6733   // Unsigned saturating rounding shift right narrow by immediate (bottom).
6734   void uqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6735 
6736   // Unsigned saturating rounding shift right narrow by immediate (top).
6737   void uqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6738 
6739   // Unsigned saturating shift left by immediate.
6740   void uqshl(const ZRegister& zd,
6741              const PRegisterM& pg,
6742              const ZRegister& zn,
6743              int shift);
6744 
6745   // Unsigned saturating shift left by vector (predicated).
6746   void uqshl(const ZRegister& zd,
6747              const PRegisterM& pg,
6748              const ZRegister& zn,
6749              const ZRegister& zm);
6750 
6751   // Unsigned saturating shift left reversed vectors (predicated).
6752   void uqshlr(const ZRegister& zd,
6753               const PRegisterM& pg,
6754               const ZRegister& zn,
6755               const ZRegister& zm);
6756 
6757   // Unsigned saturating shift right narrow by immediate (bottom).
6758   void uqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6759 
6760   // Unsigned saturating shift right narrow by immediate (top).
6761   void uqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6762 
6763   // Unsigned saturating subtraction (predicated).
6764   void uqsub(const ZRegister& zd,
6765              const PRegisterM& pg,
6766              const ZRegister& zn,
6767              const ZRegister& zm);
6768 
6769   // Unsigned saturating subtraction reversed vectors (predicated).
6770   void uqsubr(const ZRegister& zd,
6771               const PRegisterM& pg,
6772               const ZRegister& zn,
6773               const ZRegister& zm);
6774 
6775   // Unsigned saturating extract narrow (bottom).
6776   void uqxtnb(const ZRegister& zd, const ZRegister& zn);
6777 
6778   // Unsigned saturating extract narrow (top).
6779   void uqxtnt(const ZRegister& zd, const ZRegister& zn);
6780 
6781   // Unsigned reciprocal estimate (predicated).
6782   void urecpe(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6783 
6784   // Unsigned rounding halving addition.
6785   void urhadd(const ZRegister& zd,
6786               const PRegisterM& pg,
6787               const ZRegister& zn,
6788               const ZRegister& zm);
6789 
6790   // Unsigned rounding shift left by vector (predicated).
6791   void urshl(const ZRegister& zd,
6792              const PRegisterM& pg,
6793              const ZRegister& zn,
6794              const ZRegister& zm);
6795 
6796   // Unsigned rounding shift left reversed vectors (predicated).
6797   void urshlr(const ZRegister& zd,
6798               const PRegisterM& pg,
6799               const ZRegister& zn,
6800               const ZRegister& zm);
6801 
6802   // Unsigned rounding shift right by immediate.
6803   void urshr(const ZRegister& zd,
6804              const PRegisterM& pg,
6805              const ZRegister& zn,
6806              int shift);
6807 
6808   // Unsigned reciprocal square root estimate (predicated).
6809   void ursqrte(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6810 
6811   // Unsigned rounding shift right and accumulate (immediate).
6812   void ursra(const ZRegister& zda, const ZRegister& zn, int shift);
6813 
6814   // Unsigned shift left long by immediate (bottom).
6815   void ushllb(const ZRegister& zd, const ZRegister& zn, int shift);
6816 
6817   // Unsigned shift left long by immediate (top).
6818   void ushllt(const ZRegister& zd, const ZRegister& zn, int shift);
6819 
6820   // Unsigned saturating addition of signed value.
6821   void usqadd(const ZRegister& zd,
6822               const PRegisterM& pg,
6823               const ZRegister& zn,
6824               const ZRegister& zm);
6825 
6826   // Unsigned shift right and accumulate (immediate).
6827   void usra(const ZRegister& zda, const ZRegister& zn, int shift);
6828 
6829   // Unsigned subtract long (bottom).
6830   void usublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6831 
6832   // Unsigned subtract long (top).
6833   void usublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6834 
6835   // Unsigned subtract wide (bottom).
6836   void usubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6837 
6838   // Unsigned subtract wide (top).
6839   void usubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6840 
6841   // While decrementing signed scalar greater than or equal to scalar.
6842   void whilege(const PRegisterWithLaneSize& pd,
6843                const Register& rn,
6844                const Register& rm);
6845 
6846   // While decrementing signed scalar greater than scalar.
6847   void whilegt(const PRegisterWithLaneSize& pd,
6848                const Register& rn,
6849                const Register& rm);
6850 
6851   // While decrementing unsigned scalar higher than scalar.
6852   void whilehi(const PRegisterWithLaneSize& pd,
6853                const Register& rn,
6854                const Register& rm);
6855 
6856   // While decrementing unsigned scalar higher or same as scalar.
6857   void whilehs(const PRegisterWithLaneSize& pd,
6858                const Register& rn,
6859                const Register& rm);
6860 
6861   // While free of read-after-write conflicts.
6862   void whilerw(const PRegisterWithLaneSize& pd,
6863                const Register& rn,
6864                const Register& rm);
6865 
6866   // While free of write-after-read/write conflicts.
6867   void whilewr(const PRegisterWithLaneSize& pd,
6868                const Register& rn,
6869                const Register& rm);
6870 
6871   // Bitwise exclusive OR and rotate right by immediate.
6872   void xar(const ZRegister& zd,
6873            const ZRegister& zn,
6874            const ZRegister& zm,
6875            int shift);
6876 
6877   // Floating-point matrix multiply-accumulate.
6878   void fmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6879 
6880   // Signed integer matrix multiply-accumulate.
6881   void smmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6882 
6883   // Unsigned by signed integer matrix multiply-accumulate.
6884   void usmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6885 
6886   // Unsigned integer matrix multiply-accumulate.
6887   void ummla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6888 
6889   // Unsigned by signed integer dot product.
6890   void usdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6891 
6892   // Unsigned by signed integer indexed dot product.
6893   void usdot(const ZRegister& zda,
6894              const ZRegister& zn,
6895              const ZRegister& zm,
6896              int index);
6897 
6898   // Signed by unsigned integer indexed dot product.
6899   void sudot(const ZRegister& zda,
6900              const ZRegister& zn,
6901              const ZRegister& zm,
6902              int index);
6903 
6904   // Emit generic instructions.
6905 
6906   // Emit raw instructions into the instruction stream.
dci(Instr raw_inst)6907   void dci(Instr raw_inst) { Emit(raw_inst); }
6908 
6909   // Emit 32 bits of data into the instruction stream.
dc32(uint32_t data)6910   void dc32(uint32_t data) { dc(data); }
6911 
6912   // Emit 64 bits of data into the instruction stream.
dc64(uint64_t data)6913   void dc64(uint64_t data) { dc(data); }
6914 
6915   // Emit data in the instruction stream.
6916   template <typename T>
dc(T data)6917   void dc(T data) {
6918     VIXL_ASSERT(AllowAssembler());
6919     GetBuffer()->Emit<T>(data);
6920   }
6921 
6922   // Copy a string into the instruction stream, including the terminating NULL
6923   // character. The instruction pointer is then aligned correctly for
6924   // subsequent instructions.
EmitString(const char * string)6925   void EmitString(const char* string) {
6926     VIXL_ASSERT(string != NULL);
6927     VIXL_ASSERT(AllowAssembler());
6928 
6929     GetBuffer()->EmitString(string);
6930     GetBuffer()->Align();
6931   }
6932 
6933   // Code generation helpers.
6934   static bool OneInstrMoveImmediateHelper(Assembler* assm,
6935                                           const Register& dst,
6936                                           uint64_t imm);
6937 
6938   // Register encoding.
6939   template <int hibit, int lobit>
Rx(CPURegister rx)6940   static Instr Rx(CPURegister rx) {
6941     VIXL_ASSERT(rx.GetCode() != kSPRegInternalCode);
6942     return ImmUnsignedField<hibit, lobit>(rx.GetCode());
6943   }
6944 
6945 #define CPU_REGISTER_FIELD_NAMES(V) V(d) V(n) V(m) V(a) V(t) V(t2) V(s)
6946 #define REGISTER_ENCODER(N)                                           \
6947   static Instr R##N(CPURegister r##N) {                               \
6948     return Rx<R##N##_offset + R##N##_width - 1, R##N##_offset>(r##N); \
6949   }
CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)6950   CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)
6951 #undef REGISTER_ENCODER
6952 #undef CPU_REGISTER_FIELD_NAMES
6953 
6954   static Instr RmNot31(CPURegister rm) {
6955     VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
6956     VIXL_ASSERT(!rm.IsZero());
6957     return Rm(rm);
6958   }
6959 
6960   // These encoding functions allow the stack pointer to be encoded, and
6961   // disallow the zero register.
RdSP(Register rd)6962   static Instr RdSP(Register rd) {
6963     VIXL_ASSERT(!rd.IsZero());
6964     return (rd.GetCode() & kRegCodeMask) << Rd_offset;
6965   }
6966 
RnSP(Register rn)6967   static Instr RnSP(Register rn) {
6968     VIXL_ASSERT(!rn.IsZero());
6969     return (rn.GetCode() & kRegCodeMask) << Rn_offset;
6970   }
6971 
RmSP(Register rm)6972   static Instr RmSP(Register rm) {
6973     VIXL_ASSERT(!rm.IsZero());
6974     return (rm.GetCode() & kRegCodeMask) << Rm_offset;
6975   }
6976 
Pd(PRegister pd)6977   static Instr Pd(PRegister pd) {
6978     return Rx<Pd_offset + Pd_width - 1, Pd_offset>(pd);
6979   }
6980 
Pm(PRegister pm)6981   static Instr Pm(PRegister pm) {
6982     return Rx<Pm_offset + Pm_width - 1, Pm_offset>(pm);
6983   }
6984 
Pn(PRegister pn)6985   static Instr Pn(PRegister pn) {
6986     return Rx<Pn_offset + Pn_width - 1, Pn_offset>(pn);
6987   }
6988 
PgLow8(PRegister pg)6989   static Instr PgLow8(PRegister pg) {
6990     // Governing predicates can be merging, zeroing, or unqualified. They should
6991     // never have a lane size.
6992     VIXL_ASSERT(!pg.HasLaneSize());
6993     return Rx<PgLow8_offset + PgLow8_width - 1, PgLow8_offset>(pg);
6994   }
6995 
6996   template <int hibit, int lobit>
Pg(PRegister pg)6997   static Instr Pg(PRegister pg) {
6998     // Governing predicates can be merging, zeroing, or unqualified. They should
6999     // never have a lane size.
7000     VIXL_ASSERT(!pg.HasLaneSize());
7001     return Rx<hibit, lobit>(pg);
7002   }
7003 
7004   // Flags encoding.
Flags(FlagsUpdate S)7005   static Instr Flags(FlagsUpdate S) {
7006     if (S == SetFlags) {
7007       return 1 << FlagsUpdate_offset;
7008     } else if (S == LeaveFlags) {
7009       return 0 << FlagsUpdate_offset;
7010     }
7011     VIXL_UNREACHABLE();
7012     return 0;
7013   }
7014 
Cond(Condition cond)7015   static Instr Cond(Condition cond) { return cond << Condition_offset; }
7016 
7017   // Generic immediate encoding.
7018   template <int hibit, int lobit>
ImmField(int64_t imm)7019   static Instr ImmField(int64_t imm) {
7020     VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
7021     VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
7022     int fieldsize = hibit - lobit + 1;
7023     VIXL_ASSERT(IsIntN(fieldsize, imm));
7024     return static_cast<Instr>(TruncateToUintN(fieldsize, imm) << lobit);
7025   }
7026 
7027   // For unsigned immediate encoding.
7028   // TODO: Handle signed and unsigned immediate in satisfactory way.
7029   template <int hibit, int lobit>
ImmUnsignedField(uint64_t imm)7030   static Instr ImmUnsignedField(uint64_t imm) {
7031     VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
7032     VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
7033     VIXL_ASSERT(IsUintN(hibit - lobit + 1, imm));
7034     return static_cast<Instr>(imm << lobit);
7035   }
7036 
7037   // PC-relative address encoding.
ImmPCRelAddress(int64_t imm21)7038   static Instr ImmPCRelAddress(int64_t imm21) {
7039     VIXL_ASSERT(IsInt21(imm21));
7040     Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
7041     Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
7042     Instr immlo = imm << ImmPCRelLo_offset;
7043     return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
7044   }
7045 
7046   // Branch encoding.
ImmUncondBranch(int64_t imm26)7047   static Instr ImmUncondBranch(int64_t imm26) {
7048     VIXL_ASSERT(IsInt26(imm26));
7049     return TruncateToUint26(imm26) << ImmUncondBranch_offset;
7050   }
7051 
ImmCondBranch(int64_t imm19)7052   static Instr ImmCondBranch(int64_t imm19) {
7053     VIXL_ASSERT(IsInt19(imm19));
7054     return TruncateToUint19(imm19) << ImmCondBranch_offset;
7055   }
7056 
ImmCmpBranch(int64_t imm19)7057   static Instr ImmCmpBranch(int64_t imm19) {
7058     VIXL_ASSERT(IsInt19(imm19));
7059     return TruncateToUint19(imm19) << ImmCmpBranch_offset;
7060   }
7061 
ImmTestBranch(int64_t imm14)7062   static Instr ImmTestBranch(int64_t imm14) {
7063     VIXL_ASSERT(IsInt14(imm14));
7064     return TruncateToUint14(imm14) << ImmTestBranch_offset;
7065   }
7066 
ImmTestBranchBit(unsigned bit_pos)7067   static Instr ImmTestBranchBit(unsigned bit_pos) {
7068     VIXL_ASSERT(IsUint6(bit_pos));
7069     // Subtract five from the shift offset, as we need bit 5 from bit_pos.
7070     unsigned bit5 = bit_pos << (ImmTestBranchBit5_offset - 5);
7071     unsigned bit40 = bit_pos << ImmTestBranchBit40_offset;
7072     bit5 &= ImmTestBranchBit5_mask;
7073     bit40 &= ImmTestBranchBit40_mask;
7074     return bit5 | bit40;
7075   }
7076 
7077   // Data Processing encoding.
SF(Register rd)7078   static Instr SF(Register rd) {
7079     return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
7080   }
7081 
ImmAddSub(int imm)7082   static Instr ImmAddSub(int imm) {
7083     VIXL_ASSERT(IsImmAddSub(imm));
7084     if (IsUint12(imm)) {  // No shift required.
7085       imm <<= ImmAddSub_offset;
7086     } else {
7087       imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ImmAddSubShift_offset);
7088     }
7089     return imm;
7090   }
7091 
SVEImmSetBits(unsigned imms,unsigned lane_size)7092   static Instr SVEImmSetBits(unsigned imms, unsigned lane_size) {
7093     VIXL_ASSERT(IsUint6(imms));
7094     VIXL_ASSERT((lane_size == kDRegSize) || IsUint6(imms + 3));
7095     USE(lane_size);
7096     return imms << SVEImmSetBits_offset;
7097   }
7098 
SVEImmRotate(unsigned immr,unsigned lane_size)7099   static Instr SVEImmRotate(unsigned immr, unsigned lane_size) {
7100     VIXL_ASSERT(IsUintN(WhichPowerOf2(lane_size), immr));
7101     USE(lane_size);
7102     return immr << SVEImmRotate_offset;
7103   }
7104 
SVEBitN(unsigned bitn)7105   static Instr SVEBitN(unsigned bitn) {
7106     VIXL_ASSERT(IsUint1(bitn));
7107     return bitn << SVEBitN_offset;
7108   }
7109 
7110   static Instr SVEDtype(unsigned msize_in_bytes_log2,
7111                         unsigned esize_in_bytes_log2,
7112                         bool is_signed,
7113                         int dtype_h_lsb = 23,
7114                         int dtype_l_lsb = 21) {
7115     VIXL_ASSERT(msize_in_bytes_log2 <= kDRegSizeInBytesLog2);
7116     VIXL_ASSERT(esize_in_bytes_log2 <= kDRegSizeInBytesLog2);
7117     Instr dtype_h = msize_in_bytes_log2;
7118     Instr dtype_l = esize_in_bytes_log2;
7119     // Signed forms use the encodings where msize would be greater than esize.
7120     if (is_signed) {
7121       dtype_h = dtype_h ^ 0x3;
7122       dtype_l = dtype_l ^ 0x3;
7123     }
7124     VIXL_ASSERT(IsUint2(dtype_h));
7125     VIXL_ASSERT(IsUint2(dtype_l));
7126     VIXL_ASSERT((dtype_h > dtype_l) == is_signed);
7127 
7128     return (dtype_h << dtype_h_lsb) | (dtype_l << dtype_l_lsb);
7129   }
7130 
SVEDtypeSplit(unsigned msize_in_bytes_log2,unsigned esize_in_bytes_log2,bool is_signed)7131   static Instr SVEDtypeSplit(unsigned msize_in_bytes_log2,
7132                              unsigned esize_in_bytes_log2,
7133                              bool is_signed) {
7134     return SVEDtype(msize_in_bytes_log2,
7135                     esize_in_bytes_log2,
7136                     is_signed,
7137                     23,
7138                     13);
7139   }
7140 
ImmS(unsigned imms,unsigned reg_size)7141   static Instr ImmS(unsigned imms, unsigned reg_size) {
7142     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
7143                 ((reg_size == kWRegSize) && IsUint5(imms)));
7144     USE(reg_size);
7145     return imms << ImmS_offset;
7146   }
7147 
ImmR(unsigned immr,unsigned reg_size)7148   static Instr ImmR(unsigned immr, unsigned reg_size) {
7149     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
7150                 ((reg_size == kWRegSize) && IsUint5(immr)));
7151     USE(reg_size);
7152     VIXL_ASSERT(IsUint6(immr));
7153     return immr << ImmR_offset;
7154   }
7155 
ImmSetBits(unsigned imms,unsigned reg_size)7156   static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
7157     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7158     VIXL_ASSERT(IsUint6(imms));
7159     VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
7160     USE(reg_size);
7161     return imms << ImmSetBits_offset;
7162   }
7163 
ImmRotate(unsigned immr,unsigned reg_size)7164   static Instr ImmRotate(unsigned immr, unsigned reg_size) {
7165     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7166     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
7167                 ((reg_size == kWRegSize) && IsUint5(immr)));
7168     USE(reg_size);
7169     return immr << ImmRotate_offset;
7170   }
7171 
ImmLLiteral(int64_t imm19)7172   static Instr ImmLLiteral(int64_t imm19) {
7173     VIXL_ASSERT(IsInt19(imm19));
7174     return TruncateToUint19(imm19) << ImmLLiteral_offset;
7175   }
7176 
BitN(unsigned bitn,unsigned reg_size)7177   static Instr BitN(unsigned bitn, unsigned reg_size) {
7178     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7179     VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
7180     USE(reg_size);
7181     return bitn << BitN_offset;
7182   }
7183 
ShiftDP(Shift shift)7184   static Instr ShiftDP(Shift shift) {
7185     VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
7186     return shift << ShiftDP_offset;
7187   }
7188 
ImmDPShift(unsigned amount)7189   static Instr ImmDPShift(unsigned amount) {
7190     VIXL_ASSERT(IsUint6(amount));
7191     return amount << ImmDPShift_offset;
7192   }
7193 
ExtendMode(Extend extend)7194   static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; }
7195 
ImmExtendShift(unsigned left_shift)7196   static Instr ImmExtendShift(unsigned left_shift) {
7197     VIXL_ASSERT(left_shift <= 4);
7198     return left_shift << ImmExtendShift_offset;
7199   }
7200 
ImmCondCmp(unsigned imm)7201   static Instr ImmCondCmp(unsigned imm) {
7202     VIXL_ASSERT(IsUint5(imm));
7203     return imm << ImmCondCmp_offset;
7204   }
7205 
Nzcv(StatusFlags nzcv)7206   static Instr Nzcv(StatusFlags nzcv) {
7207     return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
7208   }
7209 
7210   // MemOperand offset encoding.
ImmLSUnsigned(int64_t imm12)7211   static Instr ImmLSUnsigned(int64_t imm12) {
7212     VIXL_ASSERT(IsUint12(imm12));
7213     return TruncateToUint12(imm12) << ImmLSUnsigned_offset;
7214   }
7215 
ImmLS(int64_t imm9)7216   static Instr ImmLS(int64_t imm9) {
7217     VIXL_ASSERT(IsInt9(imm9));
7218     return TruncateToUint9(imm9) << ImmLS_offset;
7219   }
7220 
ImmLSPair(int64_t imm7,unsigned access_size_in_bytes_log2)7221   static Instr ImmLSPair(int64_t imm7, unsigned access_size_in_bytes_log2) {
7222     VIXL_ASSERT(IsMultiple(imm7, 1 << access_size_in_bytes_log2));
7223     int64_t scaled_imm7 = imm7 / (1 << access_size_in_bytes_log2);
7224     VIXL_ASSERT(IsInt7(scaled_imm7));
7225     return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
7226   }
7227 
ImmShiftLS(unsigned shift_amount)7228   static Instr ImmShiftLS(unsigned shift_amount) {
7229     VIXL_ASSERT(IsUint1(shift_amount));
7230     return shift_amount << ImmShiftLS_offset;
7231   }
7232 
ImmLSPAC(int64_t imm10)7233   static Instr ImmLSPAC(int64_t imm10) {
7234     VIXL_ASSERT(IsMultiple(imm10, 1 << 3));
7235     int64_t scaled_imm10 = imm10 / (1 << 3);
7236     VIXL_ASSERT(IsInt10(scaled_imm10));
7237     uint32_t s_bit = (scaled_imm10 >> 9) & 1;
7238     return (s_bit << ImmLSPACHi_offset) |
7239            (TruncateToUint9(scaled_imm10) << ImmLSPACLo_offset);
7240   }
7241 
ImmPrefetchOperation(int imm5)7242   static Instr ImmPrefetchOperation(int imm5) {
7243     VIXL_ASSERT(IsUint5(imm5));
7244     return imm5 << ImmPrefetchOperation_offset;
7245   }
7246 
ImmException(int imm16)7247   static Instr ImmException(int imm16) {
7248     VIXL_ASSERT(IsUint16(imm16));
7249     return imm16 << ImmException_offset;
7250   }
7251 
ImmUdf(int imm16)7252   static Instr ImmUdf(int imm16) {
7253     VIXL_ASSERT(IsUint16(imm16));
7254     return imm16 << ImmUdf_offset;
7255   }
7256 
ImmSystemRegister(int imm16)7257   static Instr ImmSystemRegister(int imm16) {
7258     VIXL_ASSERT(IsUint16(imm16));
7259     return imm16 << ImmSystemRegister_offset;
7260   }
7261 
ImmRMIFRotation(int imm6)7262   static Instr ImmRMIFRotation(int imm6) {
7263     VIXL_ASSERT(IsUint6(imm6));
7264     return imm6 << ImmRMIFRotation_offset;
7265   }
7266 
ImmHint(int imm7)7267   static Instr ImmHint(int imm7) {
7268     VIXL_ASSERT(IsUint7(imm7));
7269     return imm7 << ImmHint_offset;
7270   }
7271 
CRm(int imm4)7272   static Instr CRm(int imm4) {
7273     VIXL_ASSERT(IsUint4(imm4));
7274     return imm4 << CRm_offset;
7275   }
7276 
CRn(int imm4)7277   static Instr CRn(int imm4) {
7278     VIXL_ASSERT(IsUint4(imm4));
7279     return imm4 << CRn_offset;
7280   }
7281 
SysOp(int imm14)7282   static Instr SysOp(int imm14) {
7283     VIXL_ASSERT(IsUint14(imm14));
7284     return imm14 << SysOp_offset;
7285   }
7286 
ImmSysOp1(int imm3)7287   static Instr ImmSysOp1(int imm3) {
7288     VIXL_ASSERT(IsUint3(imm3));
7289     return imm3 << SysOp1_offset;
7290   }
7291 
ImmSysOp2(int imm3)7292   static Instr ImmSysOp2(int imm3) {
7293     VIXL_ASSERT(IsUint3(imm3));
7294     return imm3 << SysOp2_offset;
7295   }
7296 
ImmBarrierDomain(int imm2)7297   static Instr ImmBarrierDomain(int imm2) {
7298     VIXL_ASSERT(IsUint2(imm2));
7299     return imm2 << ImmBarrierDomain_offset;
7300   }
7301 
ImmBarrierType(int imm2)7302   static Instr ImmBarrierType(int imm2) {
7303     VIXL_ASSERT(IsUint2(imm2));
7304     return imm2 << ImmBarrierType_offset;
7305   }
7306 
7307   // Move immediates encoding.
ImmMoveWide(uint64_t imm)7308   static Instr ImmMoveWide(uint64_t imm) {
7309     VIXL_ASSERT(IsUint16(imm));
7310     return static_cast<Instr>(imm << ImmMoveWide_offset);
7311   }
7312 
ShiftMoveWide(int64_t shift)7313   static Instr ShiftMoveWide(int64_t shift) {
7314     VIXL_ASSERT(IsUint2(shift));
7315     return static_cast<Instr>(shift << ShiftMoveWide_offset);
7316   }
7317 
7318   // FP Immediates.
7319   static Instr ImmFP16(Float16 imm);
7320   static Instr ImmFP32(float imm);
7321   static Instr ImmFP64(double imm);
7322 
7323   // FP register type.
FPType(VRegister fd)7324   static Instr FPType(VRegister fd) {
7325     VIXL_ASSERT(fd.IsScalar());
7326     switch (fd.GetSizeInBits()) {
7327       case 16:
7328         return FP16;
7329       case 32:
7330         return FP32;
7331       case 64:
7332         return FP64;
7333       default:
7334         VIXL_UNREACHABLE();
7335         return 0;
7336     }
7337   }
7338 
FPScale(unsigned scale)7339   static Instr FPScale(unsigned scale) {
7340     VIXL_ASSERT(IsUint6(scale));
7341     return scale << FPScale_offset;
7342   }
7343 
7344   // Immediate field checking helpers.
7345   static bool IsImmAddSub(int64_t immediate);
7346   static bool IsImmConditionalCompare(int64_t immediate);
7347   static bool IsImmFP16(Float16 imm);
7348   static bool IsImmFP32(float imm);
7349   static bool IsImmFP64(double imm);
7350   static bool IsImmLogical(uint64_t value,
7351                            unsigned width,
7352                            unsigned* n = NULL,
7353                            unsigned* imm_s = NULL,
7354                            unsigned* imm_r = NULL);
7355   static bool IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2);
7356   static bool IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2);
7357   static bool IsImmLSUnscaled(int64_t offset);
7358   static bool IsImmMovn(uint64_t imm, unsigned reg_size);
7359   static bool IsImmMovz(uint64_t imm, unsigned reg_size);
7360 
7361   // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)7362   static Instr VFormat(VRegister vd) {
7363     if (vd.Is64Bits()) {
7364       switch (vd.GetLanes()) {
7365         case 2:
7366           return NEON_2S;
7367         case 4:
7368           return NEON_4H;
7369         case 8:
7370           return NEON_8B;
7371         default:
7372           return 0xffffffff;
7373       }
7374     } else {
7375       VIXL_ASSERT(vd.Is128Bits());
7376       switch (vd.GetLanes()) {
7377         case 2:
7378           return NEON_2D;
7379         case 4:
7380           return NEON_4S;
7381         case 8:
7382           return NEON_8H;
7383         case 16:
7384           return NEON_16B;
7385         default:
7386           return 0xffffffff;
7387       }
7388     }
7389   }
7390 
7391   // Instruction bits for vector format in floating point data processing
7392   // operations.
FPFormat(VRegister vd)7393   static Instr FPFormat(VRegister vd) {
7394     switch (vd.GetLanes()) {
7395       case 1:
7396         // Floating point scalar formats.
7397         switch (vd.GetSizeInBits()) {
7398           case 16:
7399             return FP16;
7400           case 32:
7401             return FP32;
7402           case 64:
7403             return FP64;
7404           default:
7405             VIXL_UNREACHABLE();
7406         }
7407         break;
7408       case 2:
7409         // Two lane floating point vector formats.
7410         switch (vd.GetSizeInBits()) {
7411           case 64:
7412             return NEON_FP_2S;
7413           case 128:
7414             return NEON_FP_2D;
7415           default:
7416             VIXL_UNREACHABLE();
7417         }
7418         break;
7419       case 4:
7420         // Four lane floating point vector formats.
7421         switch (vd.GetSizeInBits()) {
7422           case 64:
7423             return NEON_FP_4H;
7424           case 128:
7425             return NEON_FP_4S;
7426           default:
7427             VIXL_UNREACHABLE();
7428         }
7429         break;
7430       case 8:
7431         // Eight lane floating point vector format.
7432         VIXL_ASSERT(vd.Is128Bits());
7433         return NEON_FP_8H;
7434       default:
7435         VIXL_UNREACHABLE();
7436         return 0;
7437     }
7438     VIXL_UNREACHABLE();
7439     return 0;
7440   }
7441 
7442   // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)7443   static Instr LSVFormat(VRegister vd) {
7444     if (vd.Is64Bits()) {
7445       switch (vd.GetLanes()) {
7446         case 1:
7447           return LS_NEON_1D;
7448         case 2:
7449           return LS_NEON_2S;
7450         case 4:
7451           return LS_NEON_4H;
7452         case 8:
7453           return LS_NEON_8B;
7454         default:
7455           return 0xffffffff;
7456       }
7457     } else {
7458       VIXL_ASSERT(vd.Is128Bits());
7459       switch (vd.GetLanes()) {
7460         case 2:
7461           return LS_NEON_2D;
7462         case 4:
7463           return LS_NEON_4S;
7464         case 8:
7465           return LS_NEON_8H;
7466         case 16:
7467           return LS_NEON_16B;
7468         default:
7469           return 0xffffffff;
7470       }
7471     }
7472   }
7473 
7474   // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)7475   static Instr SFormat(VRegister vd) {
7476     VIXL_ASSERT(vd.GetLanes() == 1);
7477     switch (vd.GetSizeInBytes()) {
7478       case 1:
7479         return NEON_B;
7480       case 2:
7481         return NEON_H;
7482       case 4:
7483         return NEON_S;
7484       case 8:
7485         return NEON_D;
7486       default:
7487         return 0xffffffff;
7488     }
7489   }
7490 
7491   template <typename T>
SVESize(const T & rd)7492   static Instr SVESize(const T& rd) {
7493     VIXL_ASSERT(rd.IsZRegister() || rd.IsPRegister());
7494     VIXL_ASSERT(rd.HasLaneSize());
7495     switch (rd.GetLaneSizeInBytes()) {
7496       case 1:
7497         return SVE_B;
7498       case 2:
7499         return SVE_H;
7500       case 4:
7501         return SVE_S;
7502       case 8:
7503         return SVE_D;
7504       default:
7505         return 0xffffffff;
7506     }
7507   }
7508 
ImmSVEPredicateConstraint(int pattern)7509   static Instr ImmSVEPredicateConstraint(int pattern) {
7510     VIXL_ASSERT(IsUint5(pattern));
7511     return (pattern << ImmSVEPredicateConstraint_offset) &
7512            ImmSVEPredicateConstraint_mask;
7513   }
7514 
ImmNEONHLM(int index,int num_bits)7515   static Instr ImmNEONHLM(int index, int num_bits) {
7516     int h, l, m;
7517     if (num_bits == 3) {
7518       VIXL_ASSERT(IsUint3(index));
7519       h = (index >> 2) & 1;
7520       l = (index >> 1) & 1;
7521       m = (index >> 0) & 1;
7522     } else if (num_bits == 2) {
7523       VIXL_ASSERT(IsUint2(index));
7524       h = (index >> 1) & 1;
7525       l = (index >> 0) & 1;
7526       m = 0;
7527     } else {
7528       VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
7529       h = (index >> 0) & 1;
7530       l = 0;
7531       m = 0;
7532     }
7533     return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
7534   }
7535 
ImmRotFcadd(int rot)7536   static Instr ImmRotFcadd(int rot) {
7537     VIXL_ASSERT(rot == 90 || rot == 270);
7538     return (((rot == 270) ? 1 : 0) << ImmRotFcadd_offset);
7539   }
7540 
ImmRotFcmlaSca(int rot)7541   static Instr ImmRotFcmlaSca(int rot) {
7542     VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
7543     return (rot / 90) << ImmRotFcmlaSca_offset;
7544   }
7545 
ImmRotFcmlaVec(int rot)7546   static Instr ImmRotFcmlaVec(int rot) {
7547     VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
7548     return (rot / 90) << ImmRotFcmlaVec_offset;
7549   }
7550 
ImmNEONExt(int imm4)7551   static Instr ImmNEONExt(int imm4) {
7552     VIXL_ASSERT(IsUint4(imm4));
7553     return imm4 << ImmNEONExt_offset;
7554   }
7555 
ImmNEON5(Instr format,int index)7556   static Instr ImmNEON5(Instr format, int index) {
7557     VIXL_ASSERT(IsUint4(index));
7558     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
7559     int imm5 = (index << (s + 1)) | (1 << s);
7560     return imm5 << ImmNEON5_offset;
7561   }
7562 
ImmNEON4(Instr format,int index)7563   static Instr ImmNEON4(Instr format, int index) {
7564     VIXL_ASSERT(IsUint4(index));
7565     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
7566     int imm4 = index << s;
7567     return imm4 << ImmNEON4_offset;
7568   }
7569 
ImmNEONabcdefgh(int imm8)7570   static Instr ImmNEONabcdefgh(int imm8) {
7571     VIXL_ASSERT(IsUint8(imm8));
7572     Instr instr;
7573     instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
7574     instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
7575     return instr;
7576   }
7577 
NEONCmode(int cmode)7578   static Instr NEONCmode(int cmode) {
7579     VIXL_ASSERT(IsUint4(cmode));
7580     return cmode << NEONCmode_offset;
7581   }
7582 
NEONModImmOp(int op)7583   static Instr NEONModImmOp(int op) {
7584     VIXL_ASSERT(IsUint1(op));
7585     return op << NEONModImmOp_offset;
7586   }
7587 
7588   // Size of the code generated since label to the current position.
GetSizeOfCodeGeneratedSince(Label * label)7589   size_t GetSizeOfCodeGeneratedSince(Label* label) const {
7590     VIXL_ASSERT(label->IsBound());
7591     return GetBuffer().GetOffsetFrom(label->GetLocation());
7592   }
7593   VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince",
7594                   size_t SizeOfCodeGeneratedSince(Label* label) const) {
7595     return GetSizeOfCodeGeneratedSince(label);
7596   }
7597 
7598   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
7599                   size_t GetBufferCapacity() const) {
7600     return GetBuffer().GetCapacity();
7601   }
7602   VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) {
7603     return GetBuffer().GetCapacity();
7604   }
7605 
7606   VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
7607                   size_t GetRemainingBufferSpace() const) {
7608     return GetBuffer().GetRemainingBytes();
7609   }
7610   VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
7611                   size_t RemainingBufferSpace() const) {
7612     return GetBuffer().GetRemainingBytes();
7613   }
7614 
GetPic()7615   PositionIndependentCodeOption GetPic() const { return pic_; }
7616   VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) {
7617     return GetPic();
7618   }
7619 
GetCPUFeatures()7620   CPUFeatures* GetCPUFeatures() { return &cpu_features_; }
7621 
SetCPUFeatures(const CPUFeatures & cpu_features)7622   void SetCPUFeatures(const CPUFeatures& cpu_features) {
7623     cpu_features_ = cpu_features;
7624   }
7625 
AllowPageOffsetDependentCode()7626   bool AllowPageOffsetDependentCode() const {
7627     return (GetPic() == PageOffsetDependentCode) ||
7628            (GetPic() == PositionDependentCode);
7629   }
7630 
AppropriateZeroRegFor(const CPURegister & reg)7631   static Register AppropriateZeroRegFor(const CPURegister& reg) {
7632     return reg.Is64Bits() ? Register(xzr) : Register(wzr);
7633   }
7634 
7635  protected:
7636   void LoadStore(const CPURegister& rt,
7637                  const MemOperand& addr,
7638                  LoadStoreOp op,
7639                  LoadStoreScalingOption option = PreferScaledOffset);
7640 
7641   void LoadStorePAC(const Register& xt,
7642                     const MemOperand& addr,
7643                     LoadStorePACOp op);
7644 
7645   void LoadStorePair(const CPURegister& rt,
7646                      const CPURegister& rt2,
7647                      const MemOperand& addr,
7648                      LoadStorePairOp op);
7649   void LoadStoreStruct(const VRegister& vt,
7650                        const MemOperand& addr,
7651                        NEONLoadStoreMultiStructOp op);
7652   void LoadStoreStruct1(const VRegister& vt,
7653                         int reg_count,
7654                         const MemOperand& addr);
7655   void LoadStoreStructSingle(const VRegister& vt,
7656                              uint32_t lane,
7657                              const MemOperand& addr,
7658                              NEONLoadStoreSingleStructOp op);
7659   void LoadStoreStructSingleAllLanes(const VRegister& vt,
7660                                      const MemOperand& addr,
7661                                      NEONLoadStoreSingleStructOp op);
7662   void LoadStoreStructVerify(const VRegister& vt,
7663                              const MemOperand& addr,
7664                              Instr op);
7665 
7666   // Set `is_load` to false in default as it's only used in the
7667   // scalar-plus-vector form.
7668   Instr SVEMemOperandHelper(unsigned msize_in_bytes_log2,
7669                             int num_regs,
7670                             const SVEMemOperand& addr,
7671                             bool is_load = false);
7672 
7673   // E.g. st1b, st1h, ...
7674   // This supports both contiguous and scatter stores.
7675   void SVESt1Helper(unsigned msize_in_bytes_log2,
7676                     const ZRegister& zt,
7677                     const PRegister& pg,
7678                     const SVEMemOperand& addr);
7679 
7680   // E.g. ld1b, ld1h, ...
7681   // This supports both contiguous and gather loads.
7682   void SVELd1Helper(unsigned msize_in_bytes_log2,
7683                     const ZRegister& zt,
7684                     const PRegisterZ& pg,
7685                     const SVEMemOperand& addr,
7686                     bool is_signed);
7687 
7688   // E.g. ld1rb, ld1rh, ...
7689   void SVELd1BroadcastHelper(unsigned msize_in_bytes_log2,
7690                              const ZRegister& zt,
7691                              const PRegisterZ& pg,
7692                              const SVEMemOperand& addr,
7693                              bool is_signed);
7694 
7695   // E.g. ldff1b, ldff1h, ...
7696   // This supports both contiguous and gather loads.
7697   void SVELdff1Helper(unsigned msize_in_bytes_log2,
7698                       const ZRegister& zt,
7699                       const PRegisterZ& pg,
7700                       const SVEMemOperand& addr,
7701                       bool is_signed);
7702 
7703   // Common code for the helpers above.
7704   void SVELdSt1Helper(unsigned msize_in_bytes_log2,
7705                       const ZRegister& zt,
7706                       const PRegister& pg,
7707                       const SVEMemOperand& addr,
7708                       bool is_signed,
7709                       Instr op);
7710 
7711   // Common code for the helpers above.
7712   void SVEScatterGatherHelper(unsigned msize_in_bytes_log2,
7713                               const ZRegister& zt,
7714                               const PRegister& pg,
7715                               const SVEMemOperand& addr,
7716                               bool is_load,
7717                               bool is_signed,
7718                               bool is_first_fault);
7719 
7720   // E.g. st2b, st3h, ...
7721   void SVESt234Helper(int num_regs,
7722                       const ZRegister& zt1,
7723                       const PRegister& pg,
7724                       const SVEMemOperand& addr);
7725 
7726   // E.g. ld2b, ld3h, ...
7727   void SVELd234Helper(int num_regs,
7728                       const ZRegister& zt1,
7729                       const PRegisterZ& pg,
7730                       const SVEMemOperand& addr);
7731 
7732   // Common code for the helpers above.
7733   void SVELdSt234Helper(int num_regs,
7734                         const ZRegister& zt1,
7735                         const PRegister& pg,
7736                         const SVEMemOperand& addr,
7737                         Instr op);
7738 
7739   // E.g. ld1qb, ld1qh, ldnt1b, ...
7740   void SVELd1St1ScaImmHelper(const ZRegister& zt,
7741                              const PRegister& pg,
7742                              const SVEMemOperand& addr,
7743                              Instr regoffset_op,
7744                              Instr immoffset_op,
7745                              int imm_divisor = 1);
7746 
7747   void SVELd1VecScaHelper(const ZRegister& zt,
7748                           const PRegister& pg,
7749                           const SVEMemOperand& addr,
7750                           uint32_t msize,
7751                           bool is_signed);
7752   void SVESt1VecScaHelper(const ZRegister& zt,
7753                           const PRegister& pg,
7754                           const SVEMemOperand& addr,
7755                           uint32_t msize);
7756 
7757   void Prefetch(PrefetchOperation op,
7758                 const MemOperand& addr,
7759                 LoadStoreScalingOption option = PreferScaledOffset);
7760   void Prefetch(int op,
7761                 const MemOperand& addr,
7762                 LoadStoreScalingOption option = PreferScaledOffset);
7763 
7764   // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
7765   // reports a bogus uninitialised warning then.
7766   void Logical(const Register& rd,
7767                const Register& rn,
7768                const Operand operand,
7769                LogicalOp op);
7770 
7771   void SVELogicalImmediate(const ZRegister& zd, uint64_t imm, Instr op);
7772 
7773   void LogicalImmediate(const Register& rd,
7774                         const Register& rn,
7775                         unsigned n,
7776                         unsigned imm_s,
7777                         unsigned imm_r,
7778                         LogicalOp op);
7779 
7780   void ConditionalCompare(const Register& rn,
7781                           const Operand& operand,
7782                           StatusFlags nzcv,
7783                           Condition cond,
7784                           ConditionalCompareOp op);
7785 
7786   void AddSubWithCarry(const Register& rd,
7787                        const Register& rn,
7788                        const Operand& operand,
7789                        FlagsUpdate S,
7790                        AddSubWithCarryOp op);
7791 
7792   void CompareVectors(const PRegisterWithLaneSize& pd,
7793                       const PRegisterZ& pg,
7794                       const ZRegister& zn,
7795                       const ZRegister& zm,
7796                       SVEIntCompareVectorsOp op);
7797 
7798   void CompareVectors(const PRegisterWithLaneSize& pd,
7799                       const PRegisterZ& pg,
7800                       const ZRegister& zn,
7801                       int imm,
7802                       SVEIntCompareSignedImmOp op);
7803 
7804   void CompareVectors(const PRegisterWithLaneSize& pd,
7805                       const PRegisterZ& pg,
7806                       const ZRegister& zn,
7807                       unsigned imm,
7808                       SVEIntCompareUnsignedImmOp op);
7809 
7810   void SVEIntAddSubtractImmUnpredicatedHelper(
7811       SVEIntAddSubtractImm_UnpredicatedOp op,
7812       const ZRegister& zd,
7813       int imm8,
7814       int shift);
7815 
7816   void SVEElementCountToRegisterHelper(Instr op,
7817                                        const Register& rd,
7818                                        int pattern,
7819                                        int multiplier);
7820 
7821   Instr EncodeSVEShiftLeftImmediate(int shift, int lane_size_in_bits);
7822 
7823   Instr EncodeSVEShiftRightImmediate(int shift, int lane_size_in_bits);
7824 
7825   void SVEBitwiseShiftImmediate(const ZRegister& zd,
7826                                 const ZRegister& zn,
7827                                 Instr encoded_imm,
7828                                 Instr op);
7829 
7830   void SVEBitwiseShiftImmediatePred(const ZRegister& zdn,
7831                                     const PRegisterM& pg,
7832                                     Instr encoded_imm,
7833                                     Instr op);
7834 
7835   Instr SVEMulIndexHelper(unsigned lane_size_in_bytes_log2,
7836                           const ZRegister& zm,
7837                           int index,
7838                           Instr op_h,
7839                           Instr op_s,
7840                           Instr op_d);
7841 
7842   Instr SVEMulLongIndexHelper(const ZRegister& zm, int index);
7843 
7844   Instr SVEMulComplexIndexHelper(const ZRegister& zm, int index);
7845 
7846   void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop,
7847                                                    const PRegister& pg,
7848                                                    const SVEMemOperand& addr,
7849                                                    int prefetch_size);
7850 
7851   void SVEContiguousPrefetchScalarPlusVectorHelper(PrefetchOperation prfop,
7852                                                    const PRegister& pg,
7853                                                    const SVEMemOperand& addr,
7854                                                    int prefetch_size);
7855 
7856   void SVEGatherPrefetchVectorPlusImmediateHelper(PrefetchOperation prfop,
7857                                                   const PRegister& pg,
7858                                                   const SVEMemOperand& addr,
7859                                                   int prefetch_size);
7860 
7861   void SVEGatherPrefetchScalarPlusImmediateHelper(PrefetchOperation prfop,
7862                                                   const PRegister& pg,
7863                                                   const SVEMemOperand& addr,
7864                                                   int prefetch_size);
7865 
7866   void SVEPrefetchHelper(PrefetchOperation prfop,
7867                          const PRegister& pg,
7868                          const SVEMemOperand& addr,
7869                          int prefetch_size);
7870 
SVEImmPrefetchOperation(PrefetchOperation prfop)7871   static Instr SVEImmPrefetchOperation(PrefetchOperation prfop) {
7872     // SVE only supports PLD and PST, not PLI.
7873     VIXL_ASSERT(((prfop >= PLDL1KEEP) && (prfop <= PLDL3STRM)) ||
7874                 ((prfop >= PSTL1KEEP) && (prfop <= PSTL3STRM)));
7875     // Check that we can simply map bits.
7876     VIXL_STATIC_ASSERT(PLDL1KEEP == 0b00000);
7877     VIXL_STATIC_ASSERT(PSTL1KEEP == 0b10000);
7878     // Remaining operations map directly.
7879     return ((prfop & 0b10000) >> 1) | (prfop & 0b00111);
7880   }
7881 
7882   // Functions for emulating operands not directly supported by the instruction
7883   // set.
7884   void EmitShift(const Register& rd,
7885                  const Register& rn,
7886                  Shift shift,
7887                  unsigned amount);
7888   void EmitExtendShift(const Register& rd,
7889                        const Register& rn,
7890                        Extend extend,
7891                        unsigned left_shift);
7892 
7893   void AddSub(const Register& rd,
7894               const Register& rn,
7895               const Operand& operand,
7896               FlagsUpdate S,
7897               AddSubOp op);
7898 
7899   void NEONTable(const VRegister& vd,
7900                  const VRegister& vn,
7901                  const VRegister& vm,
7902                  NEONTableOp op);
7903 
7904   // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
7905   // registers. Only simple loads are supported; sign- and zero-extension (such
7906   // as in LDPSW_x or LDRB_w) are not supported.
7907   static LoadStoreOp LoadOpFor(const CPURegister& rt);
7908   static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
7909                                        const CPURegister& rt2);
7910   static LoadStoreOp StoreOpFor(const CPURegister& rt);
7911   static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
7912                                         const CPURegister& rt2);
7913   static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
7914       const CPURegister& rt, const CPURegister& rt2);
7915   static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
7916       const CPURegister& rt, const CPURegister& rt2);
7917   static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
7918 
7919   // Convenience pass-through for CPU feature checks.
7920   bool CPUHas(CPUFeatures::Feature feature0,
7921               CPUFeatures::Feature feature1 = CPUFeatures::kNone,
7922               CPUFeatures::Feature feature2 = CPUFeatures::kNone,
7923               CPUFeatures::Feature feature3 = CPUFeatures::kNone) const {
7924     return cpu_features_.Has(feature0, feature1, feature2, feature3);
7925   }
7926 
7927   // Determine whether the target CPU has the specified registers, based on the
7928   // currently-enabled CPU features. Presence of a register does not imply
7929   // support for arbitrary operations on it. For example, CPUs with FP have H
7930   // registers, but most half-precision operations require the FPHalf feature.
7931   //
7932   // These are used to check CPU features in loads and stores that have the same
7933   // entry point for both integer and FP registers.
7934   bool CPUHas(const CPURegister& rt) const;
7935   bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const;
7936 
7937   bool CPUHas(SystemRegister sysreg) const;
7938 
7939  private:
7940   static uint32_t FP16ToImm8(Float16 imm);
7941   static uint32_t FP32ToImm8(float imm);
7942   static uint32_t FP64ToImm8(double imm);
7943 
7944   // Instruction helpers.
7945   void MoveWide(const Register& rd,
7946                 uint64_t imm,
7947                 int shift,
7948                 MoveWideImmediateOp mov_op);
7949   void DataProcShiftedRegister(const Register& rd,
7950                                const Register& rn,
7951                                const Operand& operand,
7952                                FlagsUpdate S,
7953                                Instr op);
7954   void DataProcExtendedRegister(const Register& rd,
7955                                 const Register& rn,
7956                                 const Operand& operand,
7957                                 FlagsUpdate S,
7958                                 Instr op);
7959   void LoadStorePairNonTemporal(const CPURegister& rt,
7960                                 const CPURegister& rt2,
7961                                 const MemOperand& addr,
7962                                 LoadStorePairNonTemporalOp op);
7963   void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
7964   void ConditionalSelect(const Register& rd,
7965                          const Register& rn,
7966                          const Register& rm,
7967                          Condition cond,
7968                          ConditionalSelectOp op);
7969   void DataProcessing1Source(const Register& rd,
7970                              const Register& rn,
7971                              DataProcessing1SourceOp op);
7972   void DataProcessing3Source(const Register& rd,
7973                              const Register& rn,
7974                              const Register& rm,
7975                              const Register& ra,
7976                              DataProcessing3SourceOp op);
7977   void FPDataProcessing1Source(const VRegister& fd,
7978                                const VRegister& fn,
7979                                FPDataProcessing1SourceOp op);
7980   void FPDataProcessing3Source(const VRegister& fd,
7981                                const VRegister& fn,
7982                                const VRegister& fm,
7983                                const VRegister& fa,
7984                                FPDataProcessing3SourceOp op);
7985   void NEONAcrossLanesL(const VRegister& vd,
7986                         const VRegister& vn,
7987                         NEONAcrossLanesOp op);
7988   void NEONAcrossLanes(const VRegister& vd,
7989                        const VRegister& vn,
7990                        NEONAcrossLanesOp op,
7991                        Instr op_half);
7992   void NEONModifiedImmShiftLsl(const VRegister& vd,
7993                                const int imm8,
7994                                const int left_shift,
7995                                NEONModifiedImmediateOp op);
7996   void NEONModifiedImmShiftMsl(const VRegister& vd,
7997                                const int imm8,
7998                                const int shift_amount,
7999                                NEONModifiedImmediateOp op);
8000   void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop);
8001   void NEON3Same(const VRegister& vd,
8002                  const VRegister& vn,
8003                  const VRegister& vm,
8004                  NEON3SameOp vop);
8005   void NEON3SameFP16(const VRegister& vd,
8006                      const VRegister& vn,
8007                      const VRegister& vm,
8008                      Instr op);
8009   void NEONFP3Same(const VRegister& vd,
8010                    const VRegister& vn,
8011                    const VRegister& vm,
8012                    Instr op);
8013   void NEON3DifferentL(const VRegister& vd,
8014                        const VRegister& vn,
8015                        const VRegister& vm,
8016                        NEON3DifferentOp vop);
8017   void NEON3DifferentW(const VRegister& vd,
8018                        const VRegister& vn,
8019                        const VRegister& vm,
8020                        NEON3DifferentOp vop);
8021   void NEON3DifferentHN(const VRegister& vd,
8022                         const VRegister& vn,
8023                         const VRegister& vm,
8024                         NEON3DifferentOp vop);
8025   void NEONFP2RegMisc(const VRegister& vd,
8026                       const VRegister& vn,
8027                       NEON2RegMiscOp vop,
8028                       double value = 0.0);
8029   void NEONFP2RegMiscFP16(const VRegister& vd,
8030                           const VRegister& vn,
8031                           NEON2RegMiscFP16Op vop,
8032                           double value = 0.0);
8033   void NEON2RegMisc(const VRegister& vd,
8034                     const VRegister& vn,
8035                     NEON2RegMiscOp vop,
8036                     int value = 0);
8037   void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
8038   void NEONFP2RegMiscFP16(const VRegister& vd, const VRegister& vn, Instr op);
8039   void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
8040   void NEONPerm(const VRegister& vd,
8041                 const VRegister& vn,
8042                 const VRegister& vm,
8043                 NEONPermOp op);
8044   void NEONFPByElement(const VRegister& vd,
8045                        const VRegister& vn,
8046                        const VRegister& vm,
8047                        int vm_index,
8048                        NEONByIndexedElementOp op,
8049                        NEONByIndexedElementOp op_half);
8050   void NEONByElement(const VRegister& vd,
8051                      const VRegister& vn,
8052                      const VRegister& vm,
8053                      int vm_index,
8054                      NEONByIndexedElementOp op);
8055   void NEONByElementL(const VRegister& vd,
8056                       const VRegister& vn,
8057                       const VRegister& vm,
8058                       int vm_index,
8059                       NEONByIndexedElementOp op);
8060   void NEONShiftImmediate(const VRegister& vd,
8061                           const VRegister& vn,
8062                           NEONShiftImmediateOp op,
8063                           int immh_immb);
8064   void NEONShiftLeftImmediate(const VRegister& vd,
8065                               const VRegister& vn,
8066                               int shift,
8067                               NEONShiftImmediateOp op);
8068   void NEONShiftRightImmediate(const VRegister& vd,
8069                                const VRegister& vn,
8070                                int shift,
8071                                NEONShiftImmediateOp op);
8072   void NEONShiftImmediateL(const VRegister& vd,
8073                            const VRegister& vn,
8074                            int shift,
8075                            NEONShiftImmediateOp op);
8076   void NEONShiftImmediateN(const VRegister& vd,
8077                            const VRegister& vn,
8078                            int shift,
8079                            NEONShiftImmediateOp op);
8080   void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
8081 
8082   // If *shift is -1, find values of *imm8 and *shift such that IsInt8(*imm8)
8083   // and *shift is either 0 or 8. Otherwise, leave the values unchanged.
8084   void ResolveSVEImm8Shift(int* imm8, int* shift);
8085 
8086   Instr LoadStoreStructAddrModeField(const MemOperand& addr);
8087 
8088   // Encode the specified MemOperand for the specified access size and scaling
8089   // preference.
8090   Instr LoadStoreMemOperand(const MemOperand& addr,
8091                             unsigned access_size_in_bytes_log2,
8092                             LoadStoreScalingOption option);
8093 
8094   // Link the current (not-yet-emitted) instruction to the specified label, then
8095   // return an offset to be encoded in the instruction. If the label is not yet
8096   // bound, an offset of 0 is returned.
8097   ptrdiff_t LinkAndGetByteOffsetTo(Label* label);
8098   ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label);
8099   ptrdiff_t LinkAndGetPageOffsetTo(Label* label);
8100 
8101   // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
8102   template <int element_shift>
8103   ptrdiff_t LinkAndGetOffsetTo(Label* label);
8104 
8105   // Literal load offset are in words (32-bit).
8106   ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
8107 
8108   // Emit the instruction in buffer_.
Emit(Instr instruction)8109   void Emit(Instr instruction) {
8110     VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
8111     VIXL_ASSERT(AllowAssembler());
8112     GetBuffer()->Emit32(instruction);
8113   }
8114 
8115   PositionIndependentCodeOption pic_;
8116 
8117   CPUFeatures cpu_features_;
8118 };
8119 
8120 
8121 template <typename T>
UpdateValue(T new_value,const Assembler * assembler)8122 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) {
8123   return UpdateValue(new_value,
8124                      assembler->GetBuffer().GetStartAddress<uint8_t*>());
8125 }
8126 
8127 
8128 template <typename T>
UpdateValue(T high64,T low64,const Assembler * assembler)8129 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) {
8130   return UpdateValue(high64,
8131                      low64,
8132                      assembler->GetBuffer().GetStartAddress<uint8_t*>());
8133 }
8134 
8135 
8136 }  // namespace aarch64
8137 
8138 // Required InvalSet template specialisations.
8139 // TODO: These template specialisations should not live in this file.  Move
8140 // Label out of the aarch64 namespace in order to share its implementation
8141 // later.
8142 #define INVAL_SET_TEMPLATE_PARAMETERS                                \
8143   ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t,         \
8144       aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \
8145       aarch64::Label::kReclaimFactor
8146 template <>
GetKey(const ptrdiff_t & element)8147 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::GetKey(
8148     const ptrdiff_t& element) {
8149   return element;
8150 }
8151 template <>
SetKey(ptrdiff_t * element,ptrdiff_t key)8152 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(ptrdiff_t* element,
8153                                                             ptrdiff_t key) {
8154   *element = key;
8155 }
8156 #undef INVAL_SET_TEMPLATE_PARAMETERS
8157 
8158 }  // namespace vixl
8159 
8160 #endif  // VIXL_AARCH64_ASSEMBLER_AARCH64_H_
8161