• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_
28 #define VIXL_AARCH64_ASSEMBLER_AARCH64_H_
29 
30 #include "../assembler-base-vixl.h"
31 #include "../code-generation-scopes-vixl.h"
32 #include "../cpu-features.h"
33 #include "../globals-vixl.h"
34 #include "../invalset-vixl.h"
35 #include "../utils-vixl.h"
36 
37 #include "operands-aarch64.h"
38 
39 namespace vixl {
40 namespace aarch64 {
41 
42 class LabelTestHelper;  // Forward declaration.
43 
44 
45 class Label {
46  public:
47 #ifndef PANDA_BUILD
Label()48   Label() : location_(kLocationUnbound) {}
49 #else
50   Label() = delete;
51   Label(AllocatorWrapper allocator) : links_(allocator), location_(kLocationUnbound) {}
52 #endif
~Label()53   virtual ~Label() {
54     // All links to a label must have been resolved before it is destructed.
55 #ifndef PANDA_BUILD
56     VIXL_ASSERT(!IsLinked());
57 #else
58     // Codegen may create unlinked labels
59 #endif
60   }
61 
IsBound()62   bool IsBound() const { return location_ >= 0; }
IsLinked()63   bool IsLinked() const { return !links_.empty(); }
64 
GetLocation()65   ptrdiff_t GetLocation() const { return location_; }
66   VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) {
67     return GetLocation();
68   }
69 
70   static const int kNPreallocatedLinks = 4;
71   static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
72   static const size_t kReclaimFrom = 512;
73   static const size_t kReclaimFactor = 2;
74 
75   typedef InvalSet<ptrdiff_t,
76                    kNPreallocatedLinks,
77                    ptrdiff_t,
78                    kInvalidLinkKey,
79                    kReclaimFrom,
80                    kReclaimFactor>
81       LinksSetBase;
82   typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
83 
84  private:
85   class LinksSet : public LinksSetBase {
86    public:
87 #ifndef PANDA_BUILD
LinksSet()88     LinksSet() : LinksSetBase() {}
89 #else
90     LinksSet() = delete;
91     LinksSet(AllocatorWrapper allocator) : LinksSetBase(allocator) {}
92 #endif
93   };
94 
95   // Allows iterating over the links of a label. The behaviour is undefined if
96   // the list of links is modified in any way while iterating.
97   class LabelLinksIterator : public LabelLinksIteratorBase {
98    public:
LabelLinksIterator(Label * label)99     explicit LabelLinksIterator(Label* label)
100         : LabelLinksIteratorBase(&label->links_) {}
101 
102     // TODO: Remove these and use the STL-like interface instead.
103     using LabelLinksIteratorBase::Advance;
104     using LabelLinksIteratorBase::Current;
105   };
106 
Bind(ptrdiff_t location)107   void Bind(ptrdiff_t location) {
108     // Labels can only be bound once.
109 #ifndef PANDA_BUILD
110     VIXL_ASSERT(!IsBound());
111 #else
112     // Disabled for unit-tests (it bind non-bound locs)
113 #endif
114     location_ = location;
115   }
116 
AddLink(ptrdiff_t instruction)117   void AddLink(ptrdiff_t instruction) {
118     // If a label is bound, the assembler already has the information it needs
119     // to write the instruction, so there is no need to add it to links_.
120     VIXL_ASSERT(!IsBound());
121     links_.insert(instruction);
122   }
123 
DeleteLink(ptrdiff_t instruction)124   void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); }
125 
ClearAllLinks()126   void ClearAllLinks() { links_.clear(); }
127 
128   // TODO: The comment below considers average case complexity for our
129   // usual use-cases. The elements of interest are:
130   // - Branches to a label are emitted in order: branch instructions to a label
131   // are generated at an offset in the code generation buffer greater than any
132   // other branch to that same label already generated. As an example, this can
133   // be broken when an instruction is patched to become a branch. Note that the
134   // code will still work, but the complexity considerations below may locally
135   // not apply any more.
136   // - Veneers are generated in order: for multiple branches of the same type
137   // branching to the same unbound label going out of range, veneers are
138   // generated in growing order of the branch instruction offset from the start
139   // of the buffer.
140   //
141   // When creating a veneer for a branch going out of range, the link for this
142   // branch needs to be removed from this `links_`. Since all branches are
143   // tracked in one underlying InvalSet, the complexity for this deletion is the
144   // same as for finding the element, ie. O(n), where n is the number of links
145   // in the set.
146   // This could be reduced to O(1) by using the same trick as used when tracking
147   // branch information for veneers: split the container to use one set per type
148   // of branch. With that setup, when a veneer is created and the link needs to
149   // be deleted, if the two points above hold, it must be the minimum element of
150   // the set for its type of branch, and that minimum element will be accessible
151   // in O(1).
152 
153   // The offsets of the instructions that have linked to this label.
154   LinksSet links_;
155   // The label location.
156   ptrdiff_t location_;
157 
158   static const ptrdiff_t kLocationUnbound = -1;
159 
160 // It is not safe to copy labels, so disable the copy constructor and operator
161 // by declaring them private (without an implementation).
162 #if __cplusplus >= 201103L
163   Label(const Label&) = delete;
164   void operator=(const Label&) = delete;
165 #else
166   Label(const Label&);
167   void operator=(const Label&);
168 #endif
169 
170   // The Assembler class is responsible for binding and linking labels, since
171   // the stored offsets need to be consistent with the Assembler's buffer.
172   friend class Assembler;
173   // The MacroAssembler and VeneerPool handle resolution of branches to distant
174   // targets.
175   friend class MacroAssembler;
176   friend class VeneerPool;
177 };
178 
179 class Assembler;
180 class LiteralPool;
181 
182 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
183 // stream and loaded through a pc relative load. The same literal can be
184 // referred to by multiple instructions but a literal can only reside at one
185 // place in memory. A literal can be used by a load before or after being
186 // placed in memory.
187 //
188 // Internally an offset of 0 is associated with a literal which has been
189 // neither used nor placed. Then two possibilities arise:
190 //  1) the label is placed, the offset (stored as offset + 1) is used to
191 //     resolve any subsequent load using the label.
192 //  2) the label is not placed and offset is the offset of the last load using
193 //     the literal (stored as -offset -1). If multiple loads refer to this
194 //     literal then the last load holds the offset of the preceding load and
195 //     all loads form a chain. Once the offset is placed all the loads in the
196 //     chain are resolved and future loads fall back to possibility 1.
197 class RawLiteral {
198  public:
199   enum DeletionPolicy {
200     kDeletedOnPlacementByPool,
201     kDeletedOnPoolDestruction,
202     kManuallyDeleted
203   };
204 
205   RawLiteral(size_t size,
206              LiteralPool* literal_pool,
207              DeletionPolicy deletion_policy = kManuallyDeleted);
208 
209   // The literal pool only sees and deletes `RawLiteral*` pointers, but they are
210   // actually pointing to `Literal<T>` objects.
~RawLiteral()211   virtual ~RawLiteral() {}
212 
GetSize()213   size_t GetSize() const {
214     VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
215     VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
216     VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) ||
217                 (size_ == kQRegSizeInBytes));
218     return size_;
219   }
220   VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); }
221 
GetRawValue128Low64()222   uint64_t GetRawValue128Low64() const {
223     VIXL_ASSERT(size_ == kQRegSizeInBytes);
224     return low64_;
225   }
226   VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) {
227     return GetRawValue128Low64();
228   }
229 
GetRawValue128High64()230   uint64_t GetRawValue128High64() const {
231     VIXL_ASSERT(size_ == kQRegSizeInBytes);
232     return high64_;
233   }
234   VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) {
235     return GetRawValue128High64();
236   }
237 
GetRawValue64()238   uint64_t GetRawValue64() const {
239     VIXL_ASSERT(size_ == kXRegSizeInBytes);
240     VIXL_ASSERT(high64_ == 0);
241     return low64_;
242   }
243   VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) {
244     return GetRawValue64();
245   }
246 
GetRawValue32()247   uint32_t GetRawValue32() const {
248     VIXL_ASSERT(size_ == kWRegSizeInBytes);
249     VIXL_ASSERT(high64_ == 0);
250     VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_));
251     return static_cast<uint32_t>(low64_);
252   }
253   VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) {
254     return GetRawValue32();
255   }
256 
IsUsed()257   bool IsUsed() const { return offset_ < 0; }
IsPlaced()258   bool IsPlaced() const { return offset_ > 0; }
259 
GetLiteralPool()260   LiteralPool* GetLiteralPool() const { return literal_pool_; }
261 
GetOffset()262   ptrdiff_t GetOffset() const {
263     VIXL_ASSERT(IsPlaced());
264     return offset_ - 1;
265   }
266   VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); }
267 
268  protected:
SetOffset(ptrdiff_t offset)269   void SetOffset(ptrdiff_t offset) {
270     VIXL_ASSERT(offset >= 0);
271     VIXL_ASSERT(IsWordAligned(offset));
272     VIXL_ASSERT(!IsPlaced());
273     offset_ = offset + 1;
274   }
set_offset(ptrdiff_t offset)275   VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) {
276     SetOffset(offset);
277   }
278 
GetLastUse()279   ptrdiff_t GetLastUse() const {
280     VIXL_ASSERT(IsUsed());
281     return -offset_ - 1;
282   }
283   VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); }
284 
SetLastUse(ptrdiff_t offset)285   void SetLastUse(ptrdiff_t offset) {
286     VIXL_ASSERT(offset >= 0);
287     VIXL_ASSERT(IsWordAligned(offset));
288     VIXL_ASSERT(!IsPlaced());
289     offset_ = -offset - 1;
290   }
set_last_use(ptrdiff_t offset)291   VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) {
292     SetLastUse(offset);
293   }
294 
295   size_t size_;
296   ptrdiff_t offset_;
297   uint64_t low64_;
298   uint64_t high64_;
299 
300  private:
301   LiteralPool* literal_pool_;
302   DeletionPolicy deletion_policy_;
303 
304   friend class Assembler;
305   friend class LiteralPool;
306 };
307 
308 
309 template <typename T>
310 class Literal : public RawLiteral {
311  public:
312   explicit Literal(T value,
313                    LiteralPool* literal_pool = NULL,
314                    RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(sizeof (value),literal_pool,ownership)315       : RawLiteral(sizeof(value), literal_pool, ownership) {
316     VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes);
317     UpdateValue(value);
318   }
319 
320   Literal(T high64,
321           T low64,
322           LiteralPool* literal_pool = NULL,
323           RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(kQRegSizeInBytes,literal_pool,ownership)324       : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) {
325     VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2));
326     UpdateValue(high64, low64);
327   }
328 
~Literal()329   virtual ~Literal() {}
330 
331   // Update the value of this literal, if necessary by rewriting the value in
332   // the pool.
333   // If the literal has already been placed in a literal pool, the address of
334   // the start of the code buffer must be provided, as the literal only knows it
335   // offset from there. This also allows patching the value after the code has
336   // been moved in memory.
337   void UpdateValue(T new_value, uint8_t* code_buffer = NULL) {
338     VIXL_ASSERT(sizeof(new_value) == size_);
339     memcpy(&low64_, &new_value, sizeof(new_value));
340     if (IsPlaced()) {
341       VIXL_ASSERT(code_buffer != NULL);
342       RewriteValueInCode(code_buffer);
343     }
344   }
345 
346   void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) {
347     VIXL_ASSERT(sizeof(low64) == size_ / 2);
348     memcpy(&low64_, &low64, sizeof(low64));
349     memcpy(&high64_, &high64, sizeof(high64));
350     if (IsPlaced()) {
351       VIXL_ASSERT(code_buffer != NULL);
352       RewriteValueInCode(code_buffer);
353     }
354   }
355 
356   void UpdateValue(T new_value, const Assembler* assembler);
357   void UpdateValue(T high64, T low64, const Assembler* assembler);
358 
359  private:
RewriteValueInCode(uint8_t * code_buffer)360   void RewriteValueInCode(uint8_t* code_buffer) {
361     VIXL_ASSERT(IsPlaced());
362     VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
363     switch (GetSize()) {
364       case kSRegSizeInBytes:
365         *reinterpret_cast<uint32_t*>(code_buffer + GetOffset()) =
366             GetRawValue32();
367         break;
368       case kDRegSizeInBytes:
369         *reinterpret_cast<uint64_t*>(code_buffer + GetOffset()) =
370             GetRawValue64();
371         break;
372       default:
373         VIXL_ASSERT(GetSize() == kQRegSizeInBytes);
374         uint64_t* base_address =
375             reinterpret_cast<uint64_t*>(code_buffer + GetOffset());
376         *base_address = GetRawValue128Low64();
377         *(base_address + 1) = GetRawValue128High64();
378     }
379   }
380 };
381 
382 
383 // Control whether or not position-independent code should be emitted.
384 enum PositionIndependentCodeOption {
385   // All code generated will be position-independent; all branches and
386   // references to labels generated with the Label class will use PC-relative
387   // addressing.
388   PositionIndependentCode,
389 
390   // Allow VIXL to generate code that refers to absolute addresses. With this
391   // option, it will not be possible to copy the code buffer and run it from a
392   // different address; code must be generated in its final location.
393   PositionDependentCode,
394 
395   // Allow VIXL to assume that the bottom 12 bits of the address will be
396   // constant, but that the top 48 bits may change. This allows `adrp` to
397   // function in systems which copy code between pages, but otherwise maintain
398   // 4KB page alignment.
399   PageOffsetDependentCode
400 };
401 
402 
403 // Control how scaled- and unscaled-offset loads and stores are generated.
404 enum LoadStoreScalingOption {
405   // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
406   // register-offset, pre-index or post-index instructions if necessary.
407   PreferScaledOffset,
408 
409   // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
410   // register-offset, pre-index or post-index instructions if necessary.
411   PreferUnscaledOffset,
412 
413   // Require scaled-immediate-offset instructions.
414   RequireScaledOffset,
415 
416   // Require unscaled-immediate-offset instructions.
417   RequireUnscaledOffset
418 };
419 
420 
421 // Assembler.
422 class Assembler : public vixl::internal::AssemblerBase {
423  public:
424   explicit Assembler(
425       PositionIndependentCodeOption pic = PositionIndependentCode)
pic_(pic)426       : pic_(pic), cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
427 
428 #ifdef PANDA_BUILD
429   explicit Assembler(
430       size_t capacity,
431       PositionIndependentCodeOption pic = PositionIndependentCode) = delete;
432 #else
433   explicit Assembler(
434       size_t capacity,
435       PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(capacity)436       : AssemblerBase(capacity),
437         pic_(pic),
438         cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
439 #endif
440   Assembler(byte* buffer,
441             size_t capacity,
442             PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(buffer,capacity)443       : AssemblerBase(buffer, capacity),
444         pic_(pic),
445         cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
446 
447   // Upon destruction, the code will assert that one of the following is true:
448   //  * The Assembler object has not been used.
449   //  * Nothing has been emitted since the last Reset() call.
450   //  * Nothing has been emitted since the last FinalizeCode() call.
~Assembler()451   ~Assembler() {}
452 
453   // System functions.
454 
455   // Start generating code from the beginning of the buffer, discarding any code
456   // and data that has already been emitted into the buffer.
457   void Reset();
458 
459   // Bind a label to the current PC.
460   void bind(Label* label);
461 
462   // Bind a label to a specified offset from the start of the buffer.
463   void BindToOffset(Label* label, ptrdiff_t offset);
464 
465   // Place a literal at the current PC.
466   void place(RawLiteral* literal);
467 
468   VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) {
469     return GetCursorOffset();
470   }
471 
472   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
473                   ptrdiff_t GetBufferEndOffset() const) {
474     return static_cast<ptrdiff_t>(GetBuffer().GetCapacity());
475   }
476   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
477                   ptrdiff_t BufferEndOffset() const) {
478     return GetBuffer().GetCapacity();
479   }
480 
481   // Return the address of a bound label.
482   template <typename T>
GetLabelAddress(const Label * label)483   T GetLabelAddress(const Label* label) const {
484     VIXL_ASSERT(label->IsBound());
485     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
486     return GetBuffer().GetOffsetAddress<T>(label->GetLocation());
487   }
488 
GetInstructionAt(ptrdiff_t instruction_offset)489   Instruction* GetInstructionAt(ptrdiff_t instruction_offset) {
490     return GetBuffer()->GetOffsetAddress<Instruction*>(instruction_offset);
491   }
492   VIXL_DEPRECATED("GetInstructionAt",
493                   Instruction* InstructionAt(ptrdiff_t instruction_offset)) {
494     return GetInstructionAt(instruction_offset);
495   }
496 
GetInstructionOffset(Instruction * instruction)497   ptrdiff_t GetInstructionOffset(Instruction* instruction) {
498     VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
499     ptrdiff_t offset =
500         instruction - GetBuffer()->GetStartAddress<Instruction*>();
501     VIXL_ASSERT((0 <= offset) &&
502                 (offset < static_cast<ptrdiff_t>(GetBuffer()->GetCapacity())));
503     return offset;
504   }
505   VIXL_DEPRECATED("GetInstructionOffset",
506                   ptrdiff_t InstructionOffset(Instruction* instruction)) {
507     return GetInstructionOffset(instruction);
508   }
509 
510   // Instruction set functions.
511 
512   // Branch / Jump instructions.
513 
514   // Branch to register.
515   void br(const Register& xn);
516 
517   // Branch with link to register.
518   void blr(const Register& xn);
519 
520   // Branch to register with return hint.
521   void ret(const Register& xn = lr);
522 
523   // Branch to register, with pointer authentication. Using key A and a modifier
524   // of zero [Armv8.3].
525   void braaz(const Register& xn);
526 
527   // Branch to register, with pointer authentication. Using key B and a modifier
528   // of zero [Armv8.3].
529   void brabz(const Register& xn);
530 
531   // Branch with link to register, with pointer authentication. Using key A and
532   // a modifier of zero [Armv8.3].
533   void blraaz(const Register& xn);
534 
535   // Branch with link to register, with pointer authentication. Using key B and
536   // a modifier of zero [Armv8.3].
537   void blrabz(const Register& xn);
538 
539   // Return from subroutine, with pointer authentication. Using key A [Armv8.3].
540   void retaa();
541 
542   // Return from subroutine, with pointer authentication. Using key B [Armv8.3].
543   void retab();
544 
545   // Branch to register, with pointer authentication. Using key A [Armv8.3].
546   void braa(const Register& xn, const Register& xm);
547 
548   // Branch to register, with pointer authentication. Using key B [Armv8.3].
549   void brab(const Register& xn, const Register& xm);
550 
551   // Branch with link to register, with pointer authentication. Using key A
552   // [Armv8.3].
553   void blraa(const Register& xn, const Register& xm);
554 
555   // Branch with link to register, with pointer authentication. Using key B
556   // [Armv8.3].
557   void blrab(const Register& xn, const Register& xm);
558 
559   // Unconditional branch to label.
560   void b(Label* label);
561 
562   // Conditional branch to label.
563   void b(Label* label, Condition cond);
564 
565   // Unconditional branch to PC offset.
566   void b(int64_t imm26);
567 
568   // Conditional branch to PC offset.
569   void b(int64_t imm19, Condition cond);
570 
571   // Branch with link to label.
572   void bl(Label* label);
573 
574   // Branch with link to PC offset.
575   void bl(int64_t imm26);
576 
577   // Compare and branch to label if zero.
578   void cbz(const Register& rt, Label* label);
579 
580   // Compare and branch to PC offset if zero.
581   void cbz(const Register& rt, int64_t imm19);
582 
583   // Compare and branch to label if not zero.
584   void cbnz(const Register& rt, Label* label);
585 
586   // Compare and branch to PC offset if not zero.
587   void cbnz(const Register& rt, int64_t imm19);
588 
589   // Table lookup from one register.
590   void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
591 
592   // Table lookup from two registers.
593   void tbl(const VRegister& vd,
594            const VRegister& vn,
595            const VRegister& vn2,
596            const VRegister& vm);
597 
598   // Table lookup from three registers.
599   void tbl(const VRegister& vd,
600            const VRegister& vn,
601            const VRegister& vn2,
602            const VRegister& vn3,
603            const VRegister& vm);
604 
605   // Table lookup from four registers.
606   void tbl(const VRegister& vd,
607            const VRegister& vn,
608            const VRegister& vn2,
609            const VRegister& vn3,
610            const VRegister& vn4,
611            const VRegister& vm);
612 
613   // Table lookup extension from one register.
614   void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
615 
616   // Table lookup extension from two registers.
617   void tbx(const VRegister& vd,
618            const VRegister& vn,
619            const VRegister& vn2,
620            const VRegister& vm);
621 
622   // Table lookup extension from three registers.
623   void tbx(const VRegister& vd,
624            const VRegister& vn,
625            const VRegister& vn2,
626            const VRegister& vn3,
627            const VRegister& vm);
628 
629   // Table lookup extension from four registers.
630   void tbx(const VRegister& vd,
631            const VRegister& vn,
632            const VRegister& vn2,
633            const VRegister& vn3,
634            const VRegister& vn4,
635            const VRegister& vm);
636 
637   // Test bit and branch to label if zero.
638   void tbz(const Register& rt, unsigned bit_pos, Label* label);
639 
640   // Test bit and branch to PC offset if zero.
641   void tbz(const Register& rt, unsigned bit_pos, int64_t imm14);
642 
643   // Test bit and branch to label if not zero.
644   void tbnz(const Register& rt, unsigned bit_pos, Label* label);
645 
646   // Test bit and branch to PC offset if not zero.
647   void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14);
648 
649   // Address calculation instructions.
650   // Calculate a PC-relative address. Unlike for branches the offset in adr is
651   // unscaled (i.e. the result can be unaligned).
652 
653   // Calculate the address of a label.
654   void adr(const Register& xd, Label* label);
655 
656   // Calculate the address of a PC offset.
657   void adr(const Register& xd, int64_t imm21);
658 
659   // Calculate the page address of a label.
660   void adrp(const Register& xd, Label* label);
661 
662   // Calculate the page address of a PC offset.
663   void adrp(const Register& xd, int64_t imm21);
664 
665   // Data Processing instructions.
666 
667   // Add.
668   void add(const Register& rd, const Register& rn, const Operand& operand);
669 
670   // Add and update status flags.
671   void adds(const Register& rd, const Register& rn, const Operand& operand);
672 
673   // Compare negative.
674   void cmn(const Register& rn, const Operand& operand);
675 
676   // Subtract.
677   void sub(const Register& rd, const Register& rn, const Operand& operand);
678 
679   // Subtract and update status flags.
680   void subs(const Register& rd, const Register& rn, const Operand& operand);
681 
682   // Compare.
683   void cmp(const Register& rn, const Operand& operand);
684 
685   // Negate.
686   void neg(const Register& rd, const Operand& operand);
687 
688   // Negate and update status flags.
689   void negs(const Register& rd, const Operand& operand);
690 
691   // Add with carry bit.
692   void adc(const Register& rd, const Register& rn, const Operand& operand);
693 
694   // Add with carry bit and update status flags.
695   void adcs(const Register& rd, const Register& rn, const Operand& operand);
696 
697   // Subtract with carry bit.
698   void sbc(const Register& rd, const Register& rn, const Operand& operand);
699 
700   // Subtract with carry bit and update status flags.
701   void sbcs(const Register& rd, const Register& rn, const Operand& operand);
702 
703   // Rotate register right and insert into NZCV flags under the control of a
704   // mask [Armv8.4].
705   void rmif(const Register& xn, unsigned rotation, StatusFlags flags);
706 
707   // Set NZCV flags from register, treated as an 8-bit value [Armv8.4].
708   void setf8(const Register& rn);
709 
710   // Set NZCV flags from register, treated as an 16-bit value [Armv8.4].
711   void setf16(const Register& rn);
712 
713   // Negate with carry bit.
714   void ngc(const Register& rd, const Operand& operand);
715 
716   // Negate with carry bit and update status flags.
717   void ngcs(const Register& rd, const Operand& operand);
718 
719   // Logical instructions.
720 
721   // Bitwise and (A & B).
722   void and_(const Register& rd, const Register& rn, const Operand& operand);
723 
724   // Bitwise and (A & B) and update status flags.
725   void ands(const Register& rd, const Register& rn, const Operand& operand);
726 
727   // Bit test and set flags.
728   void tst(const Register& rn, const Operand& operand);
729 
730   // Bit clear (A & ~B).
731   void bic(const Register& rd, const Register& rn, const Operand& operand);
732 
733   // Bit clear (A & ~B) and update status flags.
734   void bics(const Register& rd, const Register& rn, const Operand& operand);
735 
736   // Bitwise or (A | B).
737   void orr(const Register& rd, const Register& rn, const Operand& operand);
738 
739   // Bitwise nor (A | ~B).
740   void orn(const Register& rd, const Register& rn, const Operand& operand);
741 
742   // Bitwise eor/xor (A ^ B).
743   void eor(const Register& rd, const Register& rn, const Operand& operand);
744 
745   // Bitwise enor/xnor (A ^ ~B).
746   void eon(const Register& rd, const Register& rn, const Operand& operand);
747 
748   // Logical shift left by variable.
749   void lslv(const Register& rd, const Register& rn, const Register& rm);
750 
751   // Logical shift right by variable.
752   void lsrv(const Register& rd, const Register& rn, const Register& rm);
753 
754   // Arithmetic shift right by variable.
755   void asrv(const Register& rd, const Register& rn, const Register& rm);
756 
757   // Rotate right by variable.
758   void rorv(const Register& rd, const Register& rn, const Register& rm);
759 
760   // Bitfield instructions.
761 
762   // Bitfield move.
763   void bfm(const Register& rd,
764            const Register& rn,
765            unsigned immr,
766            unsigned imms);
767 
768   // Signed bitfield move.
769   void sbfm(const Register& rd,
770             const Register& rn,
771             unsigned immr,
772             unsigned imms);
773 
774   // Unsigned bitfield move.
775   void ubfm(const Register& rd,
776             const Register& rn,
777             unsigned immr,
778             unsigned imms);
779 
780   // Bfm aliases.
781 
782   // Bitfield insert.
bfi(const Register & rd,const Register & rn,unsigned lsb,unsigned width)783   void bfi(const Register& rd,
784            const Register& rn,
785            unsigned lsb,
786            unsigned width) {
787     VIXL_ASSERT(width >= 1);
788     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
789     bfm(rd,
790         rn,
791         (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
792         width - 1);
793   }
794 
795   // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,unsigned lsb,unsigned width)796   void bfxil(const Register& rd,
797              const Register& rn,
798              unsigned lsb,
799              unsigned width) {
800     VIXL_ASSERT(width >= 1);
801     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
802     bfm(rd, rn, lsb, lsb + width - 1);
803   }
804 
805   // Bitfield clear [Armv8.2].
bfc(const Register & rd,unsigned lsb,unsigned width)806   void bfc(const Register& rd, unsigned lsb, unsigned width) {
807     bfi(rd, AppropriateZeroRegFor(rd), lsb, width);
808   }
809 
810   // Sbfm aliases.
811 
812   // Arithmetic shift right.
asr(const Register & rd,const Register & rn,unsigned shift)813   void asr(const Register& rd, const Register& rn, unsigned shift) {
814     VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
815     sbfm(rd, rn, shift, rd.GetSizeInBits() - 1);
816   }
817 
818   // Signed bitfield insert with zero at right.
sbfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)819   void sbfiz(const Register& rd,
820              const Register& rn,
821              unsigned lsb,
822              unsigned width) {
823     VIXL_ASSERT(width >= 1);
824     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
825     sbfm(rd,
826          rn,
827          (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
828          width - 1);
829   }
830 
831   // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)832   void sbfx(const Register& rd,
833             const Register& rn,
834             unsigned lsb,
835             unsigned width) {
836     VIXL_ASSERT(width >= 1);
837     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
838     sbfm(rd, rn, lsb, lsb + width - 1);
839   }
840 
841   // Signed extend byte.
sxtb(const Register & rd,const Register & rn)842   void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); }
843 
844   // Signed extend halfword.
sxth(const Register & rd,const Register & rn)845   void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); }
846 
847   // Signed extend word.
sxtw(const Register & rd,const Register & rn)848   void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); }
849 
850   // Ubfm aliases.
851 
852   // Logical shift left.
lsl(const Register & rd,const Register & rn,unsigned shift)853   void lsl(const Register& rd, const Register& rn, unsigned shift) {
854     unsigned reg_size = rd.GetSizeInBits();
855     VIXL_ASSERT(shift < reg_size);
856     // NOLINTNEXTLINE(clang-analyzer-core.DivideZero)
857     ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
858   }
859 
860   // Logical shift right.
lsr(const Register & rd,const Register & rn,unsigned shift)861   void lsr(const Register& rd, const Register& rn, unsigned shift) {
862     VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
863     ubfm(rd, rn, shift, rd.GetSizeInBits() - 1);
864   }
865 
866   // Unsigned bitfield insert with zero at right.
ubfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)867   void ubfiz(const Register& rd,
868              const Register& rn,
869              unsigned lsb,
870              unsigned width) {
871     VIXL_ASSERT(width >= 1);
872     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
873     ubfm(rd,
874          rn,
875          (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
876          width - 1);
877   }
878 
879   // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)880   void ubfx(const Register& rd,
881             const Register& rn,
882             unsigned lsb,
883             unsigned width) {
884     VIXL_ASSERT(width >= 1);
885     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
886     ubfm(rd, rn, lsb, lsb + width - 1);
887   }
888 
889   // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)890   void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); }
891 
892   // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)893   void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); }
894 
895   // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)896   void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); }
897 
898   // Extract.
899   void extr(const Register& rd,
900             const Register& rn,
901             const Register& rm,
902             unsigned lsb);
903 
904   // Conditional select: rd = cond ? rn : rm.
905   void csel(const Register& rd,
906             const Register& rn,
907             const Register& rm,
908             Condition cond);
909 
910   // Conditional select increment: rd = cond ? rn : rm + 1.
911   void csinc(const Register& rd,
912              const Register& rn,
913              const Register& rm,
914              Condition cond);
915 
916   // Conditional select inversion: rd = cond ? rn : ~rm.
917   void csinv(const Register& rd,
918              const Register& rn,
919              const Register& rm,
920              Condition cond);
921 
922   // Conditional select negation: rd = cond ? rn : -rm.
923   void csneg(const Register& rd,
924              const Register& rn,
925              const Register& rm,
926              Condition cond);
927 
928   // Conditional set: rd = cond ? 1 : 0.
929   void cset(const Register& rd, Condition cond);
930 
931   // Conditional set mask: rd = cond ? -1 : 0.
932   void csetm(const Register& rd, Condition cond);
933 
934   // Conditional increment: rd = cond ? rn + 1 : rn.
935   void cinc(const Register& rd, const Register& rn, Condition cond);
936 
937   // Conditional invert: rd = cond ? ~rn : rn.
938   void cinv(const Register& rd, const Register& rn, Condition cond);
939 
940   // Conditional negate: rd = cond ? -rn : rn.
941   void cneg(const Register& rd, const Register& rn, Condition cond);
942 
943   // Rotate right.
ror(const Register & rd,const Register & rs,unsigned shift)944   void ror(const Register& rd, const Register& rs, unsigned shift) {
945     extr(rd, rs, rs, shift);
946   }
947 
948   // Conditional comparison.
949 
950   // Conditional compare negative.
951   void ccmn(const Register& rn,
952             const Operand& operand,
953             StatusFlags nzcv,
954             Condition cond);
955 
956   // Conditional compare.
957   void ccmp(const Register& rn,
958             const Operand& operand,
959             StatusFlags nzcv,
960             Condition cond);
961 
962   // CRC-32 checksum from byte.
963   void crc32b(const Register& wd, const Register& wn, const Register& wm);
964 
965   // CRC-32 checksum from half-word.
966   void crc32h(const Register& wd, const Register& wn, const Register& wm);
967 
968   // CRC-32 checksum from word.
969   void crc32w(const Register& wd, const Register& wn, const Register& wm);
970 
971   // CRC-32 checksum from double word.
972   void crc32x(const Register& wd, const Register& wn, const Register& xm);
973 
974   // CRC-32 C checksum from byte.
975   void crc32cb(const Register& wd, const Register& wn, const Register& wm);
976 
977   // CRC-32 C checksum from half-word.
978   void crc32ch(const Register& wd, const Register& wn, const Register& wm);
979 
980   // CRC-32 C checksum from word.
981   void crc32cw(const Register& wd, const Register& wn, const Register& wm);
982 
983   // CRC-32C checksum from double word.
984   void crc32cx(const Register& wd, const Register& wn, const Register& xm);
985 
986   // Multiply.
987   void mul(const Register& rd, const Register& rn, const Register& rm);
988 
989   // Negated multiply.
990   void mneg(const Register& rd, const Register& rn, const Register& rm);
991 
992   // Signed long multiply: 32 x 32 -> 64-bit.
993   void smull(const Register& xd, const Register& wn, const Register& wm);
994 
995   // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
996   void smulh(const Register& xd, const Register& xn, const Register& xm);
997 
998   // Multiply and accumulate.
999   void madd(const Register& rd,
1000             const Register& rn,
1001             const Register& rm,
1002             const Register& ra);
1003 
1004   // Multiply and subtract.
1005   void msub(const Register& rd,
1006             const Register& rn,
1007             const Register& rm,
1008             const Register& ra);
1009 
1010   // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1011   void smaddl(const Register& xd,
1012               const Register& wn,
1013               const Register& wm,
1014               const Register& xa);
1015 
1016   // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1017   void umaddl(const Register& xd,
1018               const Register& wn,
1019               const Register& wm,
1020               const Register& xa);
1021 
1022   // Unsigned long multiply: 32 x 32 -> 64-bit.
umull(const Register & xd,const Register & wn,const Register & wm)1023   void umull(const Register& xd, const Register& wn, const Register& wm) {
1024     umaddl(xd, wn, wm, xzr);
1025   }
1026 
1027   // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
1028   void umulh(const Register& xd, const Register& xn, const Register& xm);
1029 
1030   // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1031   void smsubl(const Register& xd,
1032               const Register& wn,
1033               const Register& wm,
1034               const Register& xa);
1035 
1036   // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1037   void umsubl(const Register& xd,
1038               const Register& wn,
1039               const Register& wm,
1040               const Register& xa);
1041 
1042   // Signed integer divide.
1043   void sdiv(const Register& rd, const Register& rn, const Register& rm);
1044 
1045   // Unsigned integer divide.
1046   void udiv(const Register& rd, const Register& rn, const Register& rm);
1047 
1048   // Bit reverse.
1049   void rbit(const Register& rd, const Register& rn);
1050 
1051   // Reverse bytes in 16-bit half words.
1052   void rev16(const Register& rd, const Register& rn);
1053 
1054   // Reverse bytes in 32-bit words.
1055   void rev32(const Register& xd, const Register& xn);
1056 
1057   // Reverse bytes in 64-bit general purpose register, an alias for rev
1058   // [Armv8.2].
rev64(const Register & xd,const Register & xn)1059   void rev64(const Register& xd, const Register& xn) {
1060     VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits());
1061     rev(xd, xn);
1062   }
1063 
1064   // Reverse bytes.
1065   void rev(const Register& rd, const Register& rn);
1066 
1067   // Count leading zeroes.
1068   void clz(const Register& rd, const Register& rn);
1069 
1070   // Count leading sign bits.
1071   void cls(const Register& rd, const Register& rn);
1072 
1073   // Pointer Authentication Code for Instruction address, using key A [Armv8.3].
1074   void pacia(const Register& xd, const Register& rn);
1075 
1076   // Pointer Authentication Code for Instruction address, using key A and a
1077   // modifier of zero [Armv8.3].
1078   void paciza(const Register& xd);
1079 
1080   // Pointer Authentication Code for Instruction address, using key A, with
1081   // address in x17 and modifier in x16 [Armv8.3].
1082   void pacia1716();
1083 
1084   // Pointer Authentication Code for Instruction address, using key A, with
1085   // address in LR and modifier in SP [Armv8.3].
1086   void paciasp();
1087 
1088   // Pointer Authentication Code for Instruction address, using key A, with
1089   // address in LR and a modifier of zero [Armv8.3].
1090   void paciaz();
1091 
1092   // Pointer Authentication Code for Instruction address, using key B [Armv8.3].
1093   void pacib(const Register& xd, const Register& xn);
1094 
1095   // Pointer Authentication Code for Instruction address, using key B and a
1096   // modifier of zero [Armv8.3].
1097   void pacizb(const Register& xd);
1098 
1099   // Pointer Authentication Code for Instruction address, using key B, with
1100   // address in x17 and modifier in x16 [Armv8.3].
1101   void pacib1716();
1102 
1103   // Pointer Authentication Code for Instruction address, using key B, with
1104   // address in LR and modifier in SP [Armv8.3].
1105   void pacibsp();
1106 
1107   // Pointer Authentication Code for Instruction address, using key B, with
1108   // address in LR and a modifier of zero [Armv8.3].
1109   void pacibz();
1110 
1111   // Pointer Authentication Code for Data address, using key A [Armv8.3].
1112   void pacda(const Register& xd, const Register& xn);
1113 
1114   // Pointer Authentication Code for Data address, using key A and a modifier of
1115   // zero [Armv8.3].
1116   void pacdza(const Register& xd);
1117 
1118   // Pointer Authentication Code for Data address, using key B [Armv8.3].
1119   void pacdb(const Register& xd, const Register& xn);
1120 
1121   // Pointer Authentication Code for Data address, using key B and a modifier of
1122   // zero [Armv8.3].
1123   void pacdzb(const Register& xd);
1124 
1125   // Pointer Authentication Code, using Generic key [Armv8.3].
1126   void pacga(const Register& xd, const Register& xn, const Register& xm);
1127 
1128   // Authenticate Instruction address, using key A [Armv8.3].
1129   void autia(const Register& xd, const Register& xn);
1130 
1131   // Authenticate Instruction address, using key A and a modifier of zero
1132   // [Armv8.3].
1133   void autiza(const Register& xd);
1134 
1135   // Authenticate Instruction address, using key A, with address in x17 and
1136   // modifier in x16 [Armv8.3].
1137   void autia1716();
1138 
1139   // Authenticate Instruction address, using key A, with address in LR and
1140   // modifier in SP [Armv8.3].
1141   void autiasp();
1142 
1143   // Authenticate Instruction address, using key A, with address in LR and a
1144   // modifier of zero [Armv8.3].
1145   void autiaz();
1146 
1147   // Authenticate Instruction address, using key B [Armv8.3].
1148   void autib(const Register& xd, const Register& xn);
1149 
1150   // Authenticate Instruction address, using key B and a modifier of zero
1151   // [Armv8.3].
1152   void autizb(const Register& xd);
1153 
1154   // Authenticate Instruction address, using key B, with address in x17 and
1155   // modifier in x16 [Armv8.3].
1156   void autib1716();
1157 
1158   // Authenticate Instruction address, using key B, with address in LR and
1159   // modifier in SP [Armv8.3].
1160   void autibsp();
1161 
1162   // Authenticate Instruction address, using key B, with address in LR and a
1163   // modifier of zero [Armv8.3].
1164   void autibz();
1165 
1166   // Authenticate Data address, using key A [Armv8.3].
1167   void autda(const Register& xd, const Register& xn);
1168 
1169   // Authenticate Data address, using key A and a modifier of zero [Armv8.3].
1170   void autdza(const Register& xd);
1171 
1172   // Authenticate Data address, using key B [Armv8.3].
1173   void autdb(const Register& xd, const Register& xn);
1174 
1175   // Authenticate Data address, using key B and a modifier of zero [Armv8.3].
1176   void autdzb(const Register& xd);
1177 
1178   // Strip Pointer Authentication Code of Data address [Armv8.3].
1179   void xpacd(const Register& xd);
1180 
1181   // Strip Pointer Authentication Code of Instruction address [Armv8.3].
1182   void xpaci(const Register& xd);
1183 
1184   // Strip Pointer Authentication Code of Instruction address in LR [Armv8.3].
1185   void xpaclri();
1186 
1187   // Memory instructions.
1188 
1189   // Load integer or FP register.
1190   void ldr(const CPURegister& rt,
1191            const MemOperand& src,
1192            LoadStoreScalingOption option = PreferScaledOffset);
1193 
1194   // Store integer or FP register.
1195   void str(const CPURegister& rt,
1196            const MemOperand& dst,
1197            LoadStoreScalingOption option = PreferScaledOffset);
1198 
1199   // Load word with sign extension.
1200   void ldrsw(const Register& xt,
1201              const MemOperand& src,
1202              LoadStoreScalingOption option = PreferScaledOffset);
1203 
1204   // Load byte.
1205   void ldrb(const Register& rt,
1206             const MemOperand& src,
1207             LoadStoreScalingOption option = PreferScaledOffset);
1208 
1209   // Store byte.
1210   void strb(const Register& rt,
1211             const MemOperand& dst,
1212             LoadStoreScalingOption option = PreferScaledOffset);
1213 
1214   // Load byte with sign extension.
1215   void ldrsb(const Register& rt,
1216              const MemOperand& src,
1217              LoadStoreScalingOption option = PreferScaledOffset);
1218 
1219   // Load half-word.
1220   void ldrh(const Register& rt,
1221             const MemOperand& src,
1222             LoadStoreScalingOption option = PreferScaledOffset);
1223 
1224   // Store half-word.
1225   void strh(const Register& rt,
1226             const MemOperand& dst,
1227             LoadStoreScalingOption option = PreferScaledOffset);
1228 
1229   // Load half-word with sign extension.
1230   void ldrsh(const Register& rt,
1231              const MemOperand& src,
1232              LoadStoreScalingOption option = PreferScaledOffset);
1233 
1234   // Load integer or FP register (with unscaled offset).
1235   void ldur(const CPURegister& rt,
1236             const MemOperand& src,
1237             LoadStoreScalingOption option = PreferUnscaledOffset);
1238 
1239   // Store integer or FP register (with unscaled offset).
1240   void stur(const CPURegister& rt,
1241             const MemOperand& src,
1242             LoadStoreScalingOption option = PreferUnscaledOffset);
1243 
1244   // Load word with sign extension.
1245   void ldursw(const Register& xt,
1246               const MemOperand& src,
1247               LoadStoreScalingOption option = PreferUnscaledOffset);
1248 
1249   // Load byte (with unscaled offset).
1250   void ldurb(const Register& rt,
1251              const MemOperand& src,
1252              LoadStoreScalingOption option = PreferUnscaledOffset);
1253 
1254   // Store byte (with unscaled offset).
1255   void sturb(const Register& rt,
1256              const MemOperand& dst,
1257              LoadStoreScalingOption option = PreferUnscaledOffset);
1258 
1259   // Load byte with sign extension (and unscaled offset).
1260   void ldursb(const Register& rt,
1261               const MemOperand& src,
1262               LoadStoreScalingOption option = PreferUnscaledOffset);
1263 
1264   // Load half-word (with unscaled offset).
1265   void ldurh(const Register& rt,
1266              const MemOperand& src,
1267              LoadStoreScalingOption option = PreferUnscaledOffset);
1268 
1269   // Store half-word (with unscaled offset).
1270   void sturh(const Register& rt,
1271              const MemOperand& dst,
1272              LoadStoreScalingOption option = PreferUnscaledOffset);
1273 
1274   // Load half-word with sign extension (and unscaled offset).
1275   void ldursh(const Register& rt,
1276               const MemOperand& src,
1277               LoadStoreScalingOption option = PreferUnscaledOffset);
1278 
1279   // Load double-word with pointer authentication, using data key A and a
1280   // modifier of zero [Armv8.3].
1281   void ldraa(const Register& xt, const MemOperand& src);
1282 
1283   // Load double-word with pointer authentication, using data key B and a
1284   // modifier of zero [Armv8.3].
1285   void ldrab(const Register& xt, const MemOperand& src);
1286 
1287   // Load integer or FP register pair.
1288   void ldp(const CPURegister& rt,
1289            const CPURegister& rt2,
1290            const MemOperand& src);
1291 
1292   // Store integer or FP register pair.
1293   void stp(const CPURegister& rt,
1294            const CPURegister& rt2,
1295            const MemOperand& dst);
1296 
1297   // Load word pair with sign extension.
1298   void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src);
1299 
1300   // Load integer or FP register pair, non-temporal.
1301   void ldnp(const CPURegister& rt,
1302             const CPURegister& rt2,
1303             const MemOperand& src);
1304 
1305   // Store integer or FP register pair, non-temporal.
1306   void stnp(const CPURegister& rt,
1307             const CPURegister& rt2,
1308             const MemOperand& dst);
1309 
1310   // Load integer or FP register from literal pool.
1311   void ldr(const CPURegister& rt, RawLiteral* literal);
1312 
1313   // Load word with sign extension from literal pool.
1314   void ldrsw(const Register& xt, RawLiteral* literal);
1315 
1316   // Load integer or FP register from pc + imm19 << 2.
1317   void ldr(const CPURegister& rt, int64_t imm19);
1318 
1319   // Load word with sign extension from pc + imm19 << 2.
1320   void ldrsw(const Register& xt, int64_t imm19);
1321 
1322   // Store exclusive byte.
1323   void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1324 
1325   // Store exclusive half-word.
1326   void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1327 
1328   // Store exclusive register.
1329   void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1330 
1331   // Load exclusive byte.
1332   void ldxrb(const Register& rt, const MemOperand& src);
1333 
1334   // Load exclusive half-word.
1335   void ldxrh(const Register& rt, const MemOperand& src);
1336 
1337   // Load exclusive register.
1338   void ldxr(const Register& rt, const MemOperand& src);
1339 
1340   // Store exclusive register pair.
1341   void stxp(const Register& rs,
1342             const Register& rt,
1343             const Register& rt2,
1344             const MemOperand& dst);
1345 
1346   // Load exclusive register pair.
1347   void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1348 
1349   // Store-release exclusive byte.
1350   void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1351 
1352   // Store-release exclusive half-word.
1353   void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1354 
1355   // Store-release exclusive register.
1356   void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1357 
1358   // Load-acquire exclusive byte.
1359   void ldaxrb(const Register& rt, const MemOperand& src);
1360 
1361   // Load-acquire exclusive half-word.
1362   void ldaxrh(const Register& rt, const MemOperand& src);
1363 
1364   // Load-acquire exclusive register.
1365   void ldaxr(const Register& rt, const MemOperand& src);
1366 
1367   // Store-release exclusive register pair.
1368   void stlxp(const Register& rs,
1369              const Register& rt,
1370              const Register& rt2,
1371              const MemOperand& dst);
1372 
1373   // Load-acquire exclusive register pair.
1374   void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1375 
1376   // Store-release byte.
1377   void stlrb(const Register& rt, const MemOperand& dst);
1378 
1379   // Store-release half-word.
1380   void stlrh(const Register& rt, const MemOperand& dst);
1381 
1382   // Store-release register.
1383   void stlr(const Register& rt, const MemOperand& dst);
1384 
1385   // Load-acquire byte.
1386   void ldarb(const Register& rt, const MemOperand& src);
1387 
1388   // Load-acquire half-word.
1389   void ldarh(const Register& rt, const MemOperand& src);
1390 
1391   // Load-acquire register.
1392   void ldar(const Register& rt, const MemOperand& src);
1393 
1394   // Store LORelease byte [Armv8.1].
1395   void stllrb(const Register& rt, const MemOperand& dst);
1396 
1397   // Store LORelease half-word [Armv8.1].
1398   void stllrh(const Register& rt, const MemOperand& dst);
1399 
1400   // Store LORelease register [Armv8.1].
1401   void stllr(const Register& rt, const MemOperand& dst);
1402 
1403   // Load LORelease byte [Armv8.1].
1404   void ldlarb(const Register& rt, const MemOperand& src);
1405 
1406   // Load LORelease half-word [Armv8.1].
1407   void ldlarh(const Register& rt, const MemOperand& src);
1408 
1409   // Load LORelease register [Armv8.1].
1410   void ldlar(const Register& rt, const MemOperand& src);
1411 
1412   // Compare and Swap word or doubleword in memory [Armv8.1].
1413   void cas(const Register& rs, const Register& rt, const MemOperand& src);
1414 
1415   // Compare and Swap word or doubleword in memory [Armv8.1].
1416   void casa(const Register& rs, const Register& rt, const MemOperand& src);
1417 
1418   // Compare and Swap word or doubleword in memory [Armv8.1].
1419   void casl(const Register& rs, const Register& rt, const MemOperand& src);
1420 
1421   // Compare and Swap word or doubleword in memory [Armv8.1].
1422   void casal(const Register& rs, const Register& rt, const MemOperand& src);
1423 
1424   // Compare and Swap byte in memory [Armv8.1].
1425   void casb(const Register& rs, const Register& rt, const MemOperand& src);
1426 
1427   // Compare and Swap byte in memory [Armv8.1].
1428   void casab(const Register& rs, const Register& rt, const MemOperand& src);
1429 
1430   // Compare and Swap byte in memory [Armv8.1].
1431   void caslb(const Register& rs, const Register& rt, const MemOperand& src);
1432 
1433   // Compare and Swap byte in memory [Armv8.1].
1434   void casalb(const Register& rs, const Register& rt, const MemOperand& src);
1435 
1436   // Compare and Swap halfword in memory [Armv8.1].
1437   void cash(const Register& rs, const Register& rt, const MemOperand& src);
1438 
1439   // Compare and Swap halfword in memory [Armv8.1].
1440   void casah(const Register& rs, const Register& rt, const MemOperand& src);
1441 
1442   // Compare and Swap halfword in memory [Armv8.1].
1443   void caslh(const Register& rs, const Register& rt, const MemOperand& src);
1444 
1445   // Compare and Swap halfword in memory [Armv8.1].
1446   void casalh(const Register& rs, const Register& rt, const MemOperand& src);
1447 
1448   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1449   void casp(const Register& rs,
1450             const Register& rs2,
1451             const Register& rt,
1452             const Register& rt2,
1453             const MemOperand& src);
1454 
1455   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1456   void caspa(const Register& rs,
1457              const Register& rs2,
1458              const Register& rt,
1459              const Register& rt2,
1460              const MemOperand& src);
1461 
1462   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1463   void caspl(const Register& rs,
1464              const Register& rs2,
1465              const Register& rt,
1466              const Register& rt2,
1467              const MemOperand& src);
1468 
1469   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1470   void caspal(const Register& rs,
1471               const Register& rs2,
1472               const Register& rt,
1473               const Register& rt2,
1474               const MemOperand& src);
1475 
1476   // Store-release byte (with unscaled offset) [Armv8.4].
1477   void stlurb(const Register& rt, const MemOperand& dst);
1478 
1479   // Load-acquire RCpc Register byte (with unscaled offset) [Armv8.4].
1480   void ldapurb(const Register& rt, const MemOperand& src);
1481 
1482   // Load-acquire RCpc Register signed byte (with unscaled offset) [Armv8.4].
1483   void ldapursb(const Register& rt, const MemOperand& src);
1484 
1485   // Store-release half-word (with unscaled offset) [Armv8.4].
1486   void stlurh(const Register& rt, const MemOperand& dst);
1487 
1488   // Load-acquire RCpc Register half-word (with unscaled offset) [Armv8.4].
1489   void ldapurh(const Register& rt, const MemOperand& src);
1490 
1491   // Load-acquire RCpc Register signed half-word (with unscaled offset)
1492   // [Armv8.4].
1493   void ldapursh(const Register& rt, const MemOperand& src);
1494 
1495   // Store-release word or double-word (with unscaled offset) [Armv8.4].
1496   void stlur(const Register& rt, const MemOperand& dst);
1497 
1498   // Load-acquire RCpc Register word or double-word (with unscaled offset)
1499   // [Armv8.4].
1500   void ldapur(const Register& rt, const MemOperand& src);
1501 
1502   // Load-acquire RCpc Register signed word (with unscaled offset) [Armv8.4].
1503   void ldapursw(const Register& xt, const MemOperand& src);
1504 
1505   // Atomic add on byte in memory [Armv8.1]
1506   void ldaddb(const Register& rs, const Register& rt, const MemOperand& src);
1507 
1508   // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1]
1509   void ldaddab(const Register& rs, const Register& rt, const MemOperand& src);
1510 
1511   // Atomic add on byte in memory, with Store-release semantics [Armv8.1]
1512   void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src);
1513 
1514   // Atomic add on byte in memory, with Load-acquire and Store-release semantics
1515   // [Armv8.1]
1516   void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src);
1517 
1518   // Atomic add on halfword in memory [Armv8.1]
1519   void ldaddh(const Register& rs, const Register& rt, const MemOperand& src);
1520 
1521   // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1]
1522   void ldaddah(const Register& rs, const Register& rt, const MemOperand& src);
1523 
1524   // Atomic add on halfword in memory, with Store-release semantics [Armv8.1]
1525   void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src);
1526 
1527   // Atomic add on halfword in memory, with Load-acquire and Store-release
1528   // semantics [Armv8.1]
1529   void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src);
1530 
1531   // Atomic add on word or doubleword in memory [Armv8.1]
1532   void ldadd(const Register& rs, const Register& rt, const MemOperand& src);
1533 
1534   // Atomic add on word or doubleword in memory, with Load-acquire semantics
1535   // [Armv8.1]
1536   void ldadda(const Register& rs, const Register& rt, const MemOperand& src);
1537 
1538   // Atomic add on word or doubleword in memory, with Store-release semantics
1539   // [Armv8.1]
1540   void ldaddl(const Register& rs, const Register& rt, const MemOperand& src);
1541 
1542   // Atomic add on word or doubleword in memory, with Load-acquire and
1543   // Store-release semantics [Armv8.1]
1544   void ldaddal(const Register& rs, const Register& rt, const MemOperand& src);
1545 
1546   // Atomic bit clear on byte in memory [Armv8.1]
1547   void ldclrb(const Register& rs, const Register& rt, const MemOperand& src);
1548 
1549   // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1]
1550   void ldclrab(const Register& rs, const Register& rt, const MemOperand& src);
1551 
1552   // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1]
1553   void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src);
1554 
1555   // Atomic bit clear on byte in memory, with Load-acquire and Store-release
1556   // semantics [Armv8.1]
1557   void ldclralb(const Register& rs, const Register& rt, const MemOperand& src);
1558 
1559   // Atomic bit clear on halfword in memory [Armv8.1]
1560   void ldclrh(const Register& rs, const Register& rt, const MemOperand& src);
1561 
1562   // Atomic bit clear on halfword in memory, with Load-acquire semantics
1563   // [Armv8.1]
1564   void ldclrah(const Register& rs, const Register& rt, const MemOperand& src);
1565 
1566   // Atomic bit clear on halfword in memory, with Store-release semantics
1567   // [Armv8.1]
1568   void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src);
1569 
1570   // Atomic bit clear on halfword in memory, with Load-acquire and Store-release
1571   // semantics [Armv8.1]
1572   void ldclralh(const Register& rs, const Register& rt, const MemOperand& src);
1573 
1574   // Atomic bit clear on word or doubleword in memory [Armv8.1]
1575   void ldclr(const Register& rs, const Register& rt, const MemOperand& src);
1576 
1577   // Atomic bit clear on word or doubleword in memory, with Load-acquire
1578   // semantics [Armv8.1]
1579   void ldclra(const Register& rs, const Register& rt, const MemOperand& src);
1580 
1581   // Atomic bit clear on word or doubleword in memory, with Store-release
1582   // semantics [Armv8.1]
1583   void ldclrl(const Register& rs, const Register& rt, const MemOperand& src);
1584 
1585   // Atomic bit clear on word or doubleword in memory, with Load-acquire and
1586   // Store-release semantics [Armv8.1]
1587   void ldclral(const Register& rs, const Register& rt, const MemOperand& src);
1588 
1589   // Atomic exclusive OR on byte in memory [Armv8.1]
1590   void ldeorb(const Register& rs, const Register& rt, const MemOperand& src);
1591 
1592   // Atomic exclusive OR on byte in memory, with Load-acquire semantics
1593   // [Armv8.1]
1594   void ldeorab(const Register& rs, const Register& rt, const MemOperand& src);
1595 
1596   // Atomic exclusive OR on byte in memory, with Store-release semantics
1597   // [Armv8.1]
1598   void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src);
1599 
1600   // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release
1601   // semantics [Armv8.1]
1602   void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src);
1603 
1604   // Atomic exclusive OR on halfword in memory [Armv8.1]
1605   void ldeorh(const Register& rs, const Register& rt, const MemOperand& src);
1606 
1607   // Atomic exclusive OR on halfword in memory, with Load-acquire semantics
1608   // [Armv8.1]
1609   void ldeorah(const Register& rs, const Register& rt, const MemOperand& src);
1610 
1611   // Atomic exclusive OR on halfword in memory, with Store-release semantics
1612   // [Armv8.1]
1613   void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src);
1614 
1615   // Atomic exclusive OR on halfword in memory, with Load-acquire and
1616   // Store-release semantics [Armv8.1]
1617   void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src);
1618 
1619   // Atomic exclusive OR on word or doubleword in memory [Armv8.1]
1620   void ldeor(const Register& rs, const Register& rt, const MemOperand& src);
1621 
1622   // Atomic exclusive OR on word or doubleword in memory, with Load-acquire
1623   // semantics [Armv8.1]
1624   void ldeora(const Register& rs, const Register& rt, const MemOperand& src);
1625 
1626   // Atomic exclusive OR on word or doubleword in memory, with Store-release
1627   // semantics [Armv8.1]
1628   void ldeorl(const Register& rs, const Register& rt, const MemOperand& src);
1629 
1630   // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and
1631   // Store-release semantics [Armv8.1]
1632   void ldeoral(const Register& rs, const Register& rt, const MemOperand& src);
1633 
1634   // Atomic bit set on byte in memory [Armv8.1]
1635   void ldsetb(const Register& rs, const Register& rt, const MemOperand& src);
1636 
1637   // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1]
1638   void ldsetab(const Register& rs, const Register& rt, const MemOperand& src);
1639 
1640   // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1]
1641   void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src);
1642 
1643   // Atomic bit set on byte in memory, with Load-acquire and Store-release
1644   // semantics [Armv8.1]
1645   void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src);
1646 
1647   // Atomic bit set on halfword in memory [Armv8.1]
1648   void ldseth(const Register& rs, const Register& rt, const MemOperand& src);
1649 
1650   // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1]
1651   void ldsetah(const Register& rs, const Register& rt, const MemOperand& src);
1652 
1653   // Atomic bit set on halfword in memory, with Store-release semantics
1654   // [Armv8.1]
1655   void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src);
1656 
1657   // Atomic bit set on halfword in memory, with Load-acquire and Store-release
1658   // semantics [Armv8.1]
1659   void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src);
1660 
1661   // Atomic bit set on word or doubleword in memory [Armv8.1]
1662   void ldset(const Register& rs, const Register& rt, const MemOperand& src);
1663 
1664   // Atomic bit set on word or doubleword in memory, with Load-acquire semantics
1665   // [Armv8.1]
1666   void ldseta(const Register& rs, const Register& rt, const MemOperand& src);
1667 
1668   // Atomic bit set on word or doubleword in memory, with Store-release
1669   // semantics [Armv8.1]
1670   void ldsetl(const Register& rs, const Register& rt, const MemOperand& src);
1671 
1672   // Atomic bit set on word or doubleword in memory, with Load-acquire and
1673   // Store-release semantics [Armv8.1]
1674   void ldsetal(const Register& rs, const Register& rt, const MemOperand& src);
1675 
1676   // Atomic signed maximum on byte in memory [Armv8.1]
1677   void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src);
1678 
1679   // Atomic signed maximum on byte in memory, with Load-acquire semantics
1680   // [Armv8.1]
1681   void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src);
1682 
1683   // Atomic signed maximum on byte in memory, with Store-release semantics
1684   // [Armv8.1]
1685   void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1686 
1687   // Atomic signed maximum on byte in memory, with Load-acquire and
1688   // Store-release semantics [Armv8.1]
1689   void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1690 
1691   // Atomic signed maximum on halfword in memory [Armv8.1]
1692   void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src);
1693 
1694   // Atomic signed maximum on halfword in memory, with Load-acquire semantics
1695   // [Armv8.1]
1696   void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src);
1697 
1698   // Atomic signed maximum on halfword in memory, with Store-release semantics
1699   // [Armv8.1]
1700   void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1701 
1702   // Atomic signed maximum on halfword in memory, with Load-acquire and
1703   // Store-release semantics [Armv8.1]
1704   void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1705 
1706   // Atomic signed maximum on word or doubleword in memory [Armv8.1]
1707   void ldsmax(const Register& rs, const Register& rt, const MemOperand& src);
1708 
1709   // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1710   // semantics [Armv8.1]
1711   void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src);
1712 
1713   // Atomic signed maximum on word or doubleword in memory, with Store-release
1714   // semantics [Armv8.1]
1715   void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src);
1716 
1717   // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1718   // and Store-release semantics [Armv8.1]
1719   void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src);
1720 
1721   // Atomic signed minimum on byte in memory [Armv8.1]
1722   void ldsminb(const Register& rs, const Register& rt, const MemOperand& src);
1723 
1724   // Atomic signed minimum on byte in memory, with Load-acquire semantics
1725   // [Armv8.1]
1726   void ldsminab(const Register& rs, const Register& rt, const MemOperand& src);
1727 
1728   // Atomic signed minimum on byte in memory, with Store-release semantics
1729   // [Armv8.1]
1730   void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src);
1731 
1732   // Atomic signed minimum on byte in memory, with Load-acquire and
1733   // Store-release semantics [Armv8.1]
1734   void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src);
1735 
1736   // Atomic signed minimum on halfword in memory [Armv8.1]
1737   void ldsminh(const Register& rs, const Register& rt, const MemOperand& src);
1738 
1739   // Atomic signed minimum on halfword in memory, with Load-acquire semantics
1740   // [Armv8.1]
1741   void ldsminah(const Register& rs, const Register& rt, const MemOperand& src);
1742 
1743   // Atomic signed minimum on halfword in memory, with Store-release semantics
1744   // [Armv8.1]
1745   void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src);
1746 
1747   // Atomic signed minimum on halfword in memory, with Load-acquire and
1748   // Store-release semantics [Armv8.1]
1749   void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src);
1750 
1751   // Atomic signed minimum on word or doubleword in memory [Armv8.1]
1752   void ldsmin(const Register& rs, const Register& rt, const MemOperand& src);
1753 
1754   // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1755   // semantics [Armv8.1]
1756   void ldsmina(const Register& rs, const Register& rt, const MemOperand& src);
1757 
1758   // Atomic signed minimum on word or doubleword in memory, with Store-release
1759   // semantics [Armv8.1]
1760   void ldsminl(const Register& rs, const Register& rt, const MemOperand& src);
1761 
1762   // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1763   // and Store-release semantics [Armv8.1]
1764   void ldsminal(const Register& rs, const Register& rt, const MemOperand& src);
1765 
1766   // Atomic unsigned maximum on byte in memory [Armv8.1]
1767   void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src);
1768 
1769   // Atomic unsigned maximum on byte in memory, with Load-acquire semantics
1770   // [Armv8.1]
1771   void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src);
1772 
1773   // Atomic unsigned maximum on byte in memory, with Store-release semantics
1774   // [Armv8.1]
1775   void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1776 
1777   // Atomic unsigned maximum on byte in memory, with Load-acquire and
1778   // Store-release semantics [Armv8.1]
1779   void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1780 
1781   // Atomic unsigned maximum on halfword in memory [Armv8.1]
1782   void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src);
1783 
1784   // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics
1785   // [Armv8.1]
1786   void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src);
1787 
1788   // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1789   // [Armv8.1]
1790   void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1791 
1792   // Atomic unsigned maximum on halfword in memory, with Load-acquire and
1793   // Store-release semantics [Armv8.1]
1794   void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1795 
1796   // Atomic unsigned maximum on word or doubleword in memory [Armv8.1]
1797   void ldumax(const Register& rs, const Register& rt, const MemOperand& src);
1798 
1799   // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1800   // semantics [Armv8.1]
1801   void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src);
1802 
1803   // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1804   // semantics [Armv8.1]
1805   void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src);
1806 
1807   // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1808   // and Store-release semantics [Armv8.1]
1809   void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src);
1810 
1811   // Atomic unsigned minimum on byte in memory [Armv8.1]
1812   void lduminb(const Register& rs, const Register& rt, const MemOperand& src);
1813 
1814   // Atomic unsigned minimum on byte in memory, with Load-acquire semantics
1815   // [Armv8.1]
1816   void lduminab(const Register& rs, const Register& rt, const MemOperand& src);
1817 
1818   // Atomic unsigned minimum on byte in memory, with Store-release semantics
1819   // [Armv8.1]
1820   void lduminlb(const Register& rs, const Register& rt, const MemOperand& src);
1821 
1822   // Atomic unsigned minimum on byte in memory, with Load-acquire and
1823   // Store-release semantics [Armv8.1]
1824   void lduminalb(const Register& rs, const Register& rt, const MemOperand& src);
1825 
1826   // Atomic unsigned minimum on halfword in memory [Armv8.1]
1827   void lduminh(const Register& rs, const Register& rt, const MemOperand& src);
1828 
1829   // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics
1830   // [Armv8.1]
1831   void lduminah(const Register& rs, const Register& rt, const MemOperand& src);
1832 
1833   // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1834   // [Armv8.1]
1835   void lduminlh(const Register& rs, const Register& rt, const MemOperand& src);
1836 
1837   // Atomic unsigned minimum on halfword in memory, with Load-acquire and
1838   // Store-release semantics [Armv8.1]
1839   void lduminalh(const Register& rs, const Register& rt, const MemOperand& src);
1840 
1841   // Atomic unsigned minimum on word or doubleword in memory [Armv8.1]
1842   void ldumin(const Register& rs, const Register& rt, const MemOperand& src);
1843 
1844   // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1845   // semantics [Armv8.1]
1846   void ldumina(const Register& rs, const Register& rt, const MemOperand& src);
1847 
1848   // Atomic unsigned minimum on word or doubleword in memory, with Store-release
1849   // semantics [Armv8.1]
1850   void lduminl(const Register& rs, const Register& rt, const MemOperand& src);
1851 
1852   // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1853   // and Store-release semantics [Armv8.1]
1854   void lduminal(const Register& rs, const Register& rt, const MemOperand& src);
1855 
1856   // Atomic add on byte in memory, without return. [Armv8.1]
1857   void staddb(const Register& rs, const MemOperand& src);
1858 
1859   // Atomic add on byte in memory, with Store-release semantics and without
1860   // return. [Armv8.1]
1861   void staddlb(const Register& rs, const MemOperand& src);
1862 
1863   // Atomic add on halfword in memory, without return. [Armv8.1]
1864   void staddh(const Register& rs, const MemOperand& src);
1865 
1866   // Atomic add on halfword in memory, with Store-release semantics and without
1867   // return. [Armv8.1]
1868   void staddlh(const Register& rs, const MemOperand& src);
1869 
1870   // Atomic add on word or doubleword in memory, without return. [Armv8.1]
1871   void stadd(const Register& rs, const MemOperand& src);
1872 
1873   // Atomic add on word or doubleword in memory, with Store-release semantics
1874   // and without return. [Armv8.1]
1875   void staddl(const Register& rs, const MemOperand& src);
1876 
1877   // Atomic bit clear on byte in memory, without return. [Armv8.1]
1878   void stclrb(const Register& rs, const MemOperand& src);
1879 
1880   // Atomic bit clear on byte in memory, with Store-release semantics and
1881   // without return. [Armv8.1]
1882   void stclrlb(const Register& rs, const MemOperand& src);
1883 
1884   // Atomic bit clear on halfword in memory, without return. [Armv8.1]
1885   void stclrh(const Register& rs, const MemOperand& src);
1886 
1887   // Atomic bit clear on halfword in memory, with Store-release semantics and
1888   // without return. [Armv8.1]
1889   void stclrlh(const Register& rs, const MemOperand& src);
1890 
1891   // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1]
1892   void stclr(const Register& rs, const MemOperand& src);
1893 
1894   // Atomic bit clear on word or doubleword in memory, with Store-release
1895   // semantics and without return. [Armv8.1]
1896   void stclrl(const Register& rs, const MemOperand& src);
1897 
1898   // Atomic exclusive OR on byte in memory, without return. [Armv8.1]
1899   void steorb(const Register& rs, const MemOperand& src);
1900 
1901   // Atomic exclusive OR on byte in memory, with Store-release semantics and
1902   // without return. [Armv8.1]
1903   void steorlb(const Register& rs, const MemOperand& src);
1904 
1905   // Atomic exclusive OR on halfword in memory, without return. [Armv8.1]
1906   void steorh(const Register& rs, const MemOperand& src);
1907 
1908   // Atomic exclusive OR on halfword in memory, with Store-release semantics
1909   // and without return. [Armv8.1]
1910   void steorlh(const Register& rs, const MemOperand& src);
1911 
1912   // Atomic exclusive OR on word or doubleword in memory, without return.
1913   // [Armv8.1]
1914   void steor(const Register& rs, const MemOperand& src);
1915 
1916   // Atomic exclusive OR on word or doubleword in memory, with Store-release
1917   // semantics and without return. [Armv8.1]
1918   void steorl(const Register& rs, const MemOperand& src);
1919 
1920   // Atomic bit set on byte in memory, without return. [Armv8.1]
1921   void stsetb(const Register& rs, const MemOperand& src);
1922 
1923   // Atomic bit set on byte in memory, with Store-release semantics and without
1924   // return. [Armv8.1]
1925   void stsetlb(const Register& rs, const MemOperand& src);
1926 
1927   // Atomic bit set on halfword in memory, without return. [Armv8.1]
1928   void stseth(const Register& rs, const MemOperand& src);
1929 
1930   // Atomic bit set on halfword in memory, with Store-release semantics and
1931   // without return. [Armv8.1]
1932   void stsetlh(const Register& rs, const MemOperand& src);
1933 
1934   // Atomic bit set on word or doubleword in memory, without return. [Armv8.1]
1935   void stset(const Register& rs, const MemOperand& src);
1936 
1937   // Atomic bit set on word or doubleword in memory, with Store-release
1938   // semantics and without return. [Armv8.1]
1939   void stsetl(const Register& rs, const MemOperand& src);
1940 
1941   // Atomic signed maximum on byte in memory, without return. [Armv8.1]
1942   void stsmaxb(const Register& rs, const MemOperand& src);
1943 
1944   // Atomic signed maximum on byte in memory, with Store-release semantics and
1945   // without return. [Armv8.1]
1946   void stsmaxlb(const Register& rs, const MemOperand& src);
1947 
1948   // Atomic signed maximum on halfword in memory, without return. [Armv8.1]
1949   void stsmaxh(const Register& rs, const MemOperand& src);
1950 
1951   // Atomic signed maximum on halfword in memory, with Store-release semantics
1952   // and without return. [Armv8.1]
1953   void stsmaxlh(const Register& rs, const MemOperand& src);
1954 
1955   // Atomic signed maximum on word or doubleword in memory, without return.
1956   // [Armv8.1]
1957   void stsmax(const Register& rs, const MemOperand& src);
1958 
1959   // Atomic signed maximum on word or doubleword in memory, with Store-release
1960   // semantics and without return. [Armv8.1]
1961   void stsmaxl(const Register& rs, const MemOperand& src);
1962 
1963   // Atomic signed minimum on byte in memory, without return. [Armv8.1]
1964   void stsminb(const Register& rs, const MemOperand& src);
1965 
1966   // Atomic signed minimum on byte in memory, with Store-release semantics and
1967   // without return. [Armv8.1]
1968   void stsminlb(const Register& rs, const MemOperand& src);
1969 
1970   // Atomic signed minimum on halfword in memory, without return. [Armv8.1]
1971   void stsminh(const Register& rs, const MemOperand& src);
1972 
1973   // Atomic signed minimum on halfword in memory, with Store-release semantics
1974   // and without return. [Armv8.1]
1975   void stsminlh(const Register& rs, const MemOperand& src);
1976 
1977   // Atomic signed minimum on word or doubleword in memory, without return.
1978   // [Armv8.1]
1979   void stsmin(const Register& rs, const MemOperand& src);
1980 
1981   // Atomic signed minimum on word or doubleword in memory, with Store-release
1982   // semantics and without return. semantics [Armv8.1]
1983   void stsminl(const Register& rs, const MemOperand& src);
1984 
1985   // Atomic unsigned maximum on byte in memory, without return. [Armv8.1]
1986   void stumaxb(const Register& rs, const MemOperand& src);
1987 
1988   // Atomic unsigned maximum on byte in memory, with Store-release semantics and
1989   // without return. [Armv8.1]
1990   void stumaxlb(const Register& rs, const MemOperand& src);
1991 
1992   // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1]
1993   void stumaxh(const Register& rs, const MemOperand& src);
1994 
1995   // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1996   // and without return. [Armv8.1]
1997   void stumaxlh(const Register& rs, const MemOperand& src);
1998 
1999   // Atomic unsigned maximum on word or doubleword in memory, without return.
2000   // [Armv8.1]
2001   void stumax(const Register& rs, const MemOperand& src);
2002 
2003   // Atomic unsigned maximum on word or doubleword in memory, with Store-release
2004   // semantics and without return. [Armv8.1]
2005   void stumaxl(const Register& rs, const MemOperand& src);
2006 
2007   // Atomic unsigned minimum on byte in memory, without return. [Armv8.1]
2008   void stuminb(const Register& rs, const MemOperand& src);
2009 
2010   // Atomic unsigned minimum on byte in memory, with Store-release semantics and
2011   // without return. [Armv8.1]
2012   void stuminlb(const Register& rs, const MemOperand& src);
2013 
2014   // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1]
2015   void stuminh(const Register& rs, const MemOperand& src);
2016 
2017   // Atomic unsigned minimum on halfword in memory, with Store-release semantics
2018   // and without return. [Armv8.1]
2019   void stuminlh(const Register& rs, const MemOperand& src);
2020 
2021   // Atomic unsigned minimum on word or doubleword in memory, without return.
2022   // [Armv8.1]
2023   void stumin(const Register& rs, const MemOperand& src);
2024 
2025   // Atomic unsigned minimum on word or doubleword in memory, with Store-release
2026   // semantics and without return. [Armv8.1]
2027   void stuminl(const Register& rs, const MemOperand& src);
2028 
2029   // Swap byte in memory [Armv8.1]
2030   void swpb(const Register& rs, const Register& rt, const MemOperand& src);
2031 
2032   // Swap byte in memory, with Load-acquire semantics [Armv8.1]
2033   void swpab(const Register& rs, const Register& rt, const MemOperand& src);
2034 
2035   // Swap byte in memory, with Store-release semantics [Armv8.1]
2036   void swplb(const Register& rs, const Register& rt, const MemOperand& src);
2037 
2038   // Swap byte in memory, with Load-acquire and Store-release semantics
2039   // [Armv8.1]
2040   void swpalb(const Register& rs, const Register& rt, const MemOperand& src);
2041 
2042   // Swap halfword in memory [Armv8.1]
2043   void swph(const Register& rs, const Register& rt, const MemOperand& src);
2044 
2045   // Swap halfword in memory, with Load-acquire semantics [Armv8.1]
2046   void swpah(const Register& rs, const Register& rt, const MemOperand& src);
2047 
2048   // Swap halfword in memory, with Store-release semantics [Armv8.1]
2049   void swplh(const Register& rs, const Register& rt, const MemOperand& src);
2050 
2051   // Swap halfword in memory, with Load-acquire and Store-release semantics
2052   // [Armv8.1]
2053   void swpalh(const Register& rs, const Register& rt, const MemOperand& src);
2054 
2055   // Swap word or doubleword in memory [Armv8.1]
2056   void swp(const Register& rs, const Register& rt, const MemOperand& src);
2057 
2058   // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1]
2059   void swpa(const Register& rs, const Register& rt, const MemOperand& src);
2060 
2061   // Swap word or doubleword in memory, with Store-release semantics [Armv8.1]
2062   void swpl(const Register& rs, const Register& rt, const MemOperand& src);
2063 
2064   // Swap word or doubleword in memory, with Load-acquire and Store-release
2065   // semantics [Armv8.1]
2066   void swpal(const Register& rs, const Register& rt, const MemOperand& src);
2067 
2068   // Load-Acquire RCpc Register byte [Armv8.3]
2069   void ldaprb(const Register& rt, const MemOperand& src);
2070 
2071   // Load-Acquire RCpc Register halfword [Armv8.3]
2072   void ldaprh(const Register& rt, const MemOperand& src);
2073 
2074   // Load-Acquire RCpc Register word or doubleword [Armv8.3]
2075   void ldapr(const Register& rt, const MemOperand& src);
2076 
2077   // Prefetch memory.
2078   void prfm(PrefetchOperation op,
2079             const MemOperand& addr,
2080             LoadStoreScalingOption option = PreferScaledOffset);
2081 
2082   // Prefetch memory (with unscaled offset).
2083   void prfum(PrefetchOperation op,
2084              const MemOperand& addr,
2085              LoadStoreScalingOption option = PreferUnscaledOffset);
2086 
2087   // Prefetch memory in the literal pool.
2088   void prfm(PrefetchOperation op, RawLiteral* literal);
2089 
2090   // Prefetch from pc + imm19 << 2.
2091   void prfm(PrefetchOperation op, int64_t imm19);
2092 
2093   // Prefetch memory (allowing unallocated hints).
2094   void prfm(int op,
2095             const MemOperand& addr,
2096             LoadStoreScalingOption option = PreferScaledOffset);
2097 
2098   // Prefetch memory (with unscaled offset, allowing unallocated hints).
2099   void prfum(int op,
2100              const MemOperand& addr,
2101              LoadStoreScalingOption option = PreferUnscaledOffset);
2102 
2103   // Prefetch memory in the literal pool (allowing unallocated hints).
2104   void prfm(int op, RawLiteral* literal);
2105 
2106   // Prefetch from pc + imm19 << 2 (allowing unallocated hints).
2107   void prfm(int op, int64_t imm19);
2108 
2109   // Move instructions. The default shift of -1 indicates that the move
2110   // instruction will calculate an appropriate 16-bit immediate and left shift
2111   // that is equal to the 64-bit immediate argument. If an explicit left shift
2112   // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
2113   //
2114   // For movk, an explicit shift can be used to indicate which half word should
2115   // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
2116   // half word with zero, whereas movk(x0, 0, 48) will overwrite the
2117   // most-significant.
2118 
2119   // Move immediate and keep.
2120   void movk(const Register& rd, uint64_t imm, int shift = -1) {
2121     MoveWide(rd, imm, shift, MOVK);
2122   }
2123 
2124   // Move inverted immediate.
2125   void movn(const Register& rd, uint64_t imm, int shift = -1) {
2126     MoveWide(rd, imm, shift, MOVN);
2127   }
2128 
2129   // Move immediate.
2130   void movz(const Register& rd, uint64_t imm, int shift = -1) {
2131     MoveWide(rd, imm, shift, MOVZ);
2132   }
2133 
2134   // Move immediate, aliases for movz, movn, orr.
mov(const Register & rd,uint64_t imm)2135   void mov(const Register& rd, uint64_t imm) {
2136     if (!OneInstrMoveImmediateHelper(this, rd, imm)) {
2137       VIXL_UNIMPLEMENTED();
2138     }
2139   }
2140 
2141   // Misc instructions.
2142 
2143   // Monitor debug-mode breakpoint.
2144   void brk(int code);
2145 
2146   // Halting debug-mode breakpoint.
2147   void hlt(int code);
2148 
2149   // Generate exception targeting EL1.
2150   void svc(int code);
2151 
2152   // Generate undefined instruction exception.
2153   void udf(int code);
2154 
2155   // Move register to register.
2156   void mov(const Register& rd, const Register& rn);
2157 
2158   // Move inverted operand to register.
2159   void mvn(const Register& rd, const Operand& operand);
2160 
2161   // System instructions.
2162 
2163   // Move to register from system register.
2164   void mrs(const Register& xt, SystemRegister sysreg);
2165 
2166   // Move from register to system register.
2167   void msr(SystemRegister sysreg, const Register& xt);
2168 
2169   // Invert carry flag [Armv8.4].
2170   void cfinv();
2171 
2172   // Convert floating-point condition flags from alternative format to Arm
2173   // format [Armv8.5].
2174   void xaflag();
2175 
2176   // Convert floating-point condition flags from Arm format to alternative
2177   // format [Armv8.5].
2178   void axflag();
2179 
2180   // System instruction.
2181   void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr);
2182 
2183   // System instruction with pre-encoded op (op1:crn:crm:op2).
2184   void sys(int op, const Register& xt = xzr);
2185 
2186   // System data cache operation.
2187   void dc(DataCacheOp op, const Register& rt);
2188 
2189   // System instruction cache operation.
2190   void ic(InstructionCacheOp op, const Register& rt);
2191 
2192   // System hint (named type).
2193   void hint(SystemHint code);
2194 
2195   // System hint (numbered type).
2196   void hint(int imm7);
2197 
2198   // Clear exclusive monitor.
2199   void clrex(int imm4 = 0xf);
2200 
2201   // Data memory barrier.
2202   void dmb(BarrierDomain domain, BarrierType type);
2203 
2204   // Data synchronization barrier.
2205   void dsb(BarrierDomain domain, BarrierType type);
2206 
2207   // Instruction synchronization barrier.
2208   void isb();
2209 
2210   // Error synchronization barrier.
2211   void esb();
2212 
2213   // Conditional speculation dependency barrier.
2214   void csdb();
2215 
2216   // No-op.
nop()2217   void nop() { hint(NOP); }
2218 
2219   // Branch target identification.
2220   void bti(BranchTargetIdentifier id);
2221 
2222   // FP and NEON instructions.
2223 
2224   // Move double precision immediate to FP register.
2225   void fmov(const VRegister& vd, double imm);
2226 
2227   // Move single precision immediate to FP register.
2228   void fmov(const VRegister& vd, float imm);
2229 
2230   // Move half precision immediate to FP register [Armv8.2].
2231   void fmov(const VRegister& vd, Float16 imm);
2232 
2233   // Move FP register to register.
2234   void fmov(const Register& rd, const VRegister& fn);
2235 
2236   // Move register to FP register.
2237   void fmov(const VRegister& vd, const Register& rn);
2238 
2239   // Move FP register to FP register.
2240   void fmov(const VRegister& vd, const VRegister& fn);
2241 
2242   // Move 64-bit register to top half of 128-bit FP register.
2243   void fmov(const VRegister& vd, int index, const Register& rn);
2244 
2245   // Move top half of 128-bit FP register to 64-bit register.
2246   void fmov(const Register& rd, const VRegister& vn, int index);
2247 
2248   // FP add.
2249   void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2250 
2251   // FP subtract.
2252   void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2253 
2254   // FP multiply.
2255   void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2256 
2257   // FP fused multiply-add.
2258   void fmadd(const VRegister& vd,
2259              const VRegister& vn,
2260              const VRegister& vm,
2261              const VRegister& va);
2262 
2263   // FP fused multiply-subtract.
2264   void fmsub(const VRegister& vd,
2265              const VRegister& vn,
2266              const VRegister& vm,
2267              const VRegister& va);
2268 
2269   // FP fused multiply-add and negate.
2270   void fnmadd(const VRegister& vd,
2271               const VRegister& vn,
2272               const VRegister& vm,
2273               const VRegister& va);
2274 
2275   // FP fused multiply-subtract and negate.
2276   void fnmsub(const VRegister& vd,
2277               const VRegister& vn,
2278               const VRegister& vm,
2279               const VRegister& va);
2280 
2281   // FP multiply-negate scalar.
2282   void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2283 
2284   // FP reciprocal exponent scalar.
2285   void frecpx(const VRegister& vd, const VRegister& vn);
2286 
2287   // FP divide.
2288   void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2289 
2290   // FP maximum.
2291   void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2292 
2293   // FP minimum.
2294   void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2295 
2296   // FP maximum number.
2297   void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2298 
2299   // FP minimum number.
2300   void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2301 
2302   // FP absolute.
2303   void fabs(const VRegister& vd, const VRegister& vn);
2304 
2305   // FP negate.
2306   void fneg(const VRegister& vd, const VRegister& vn);
2307 
2308   // FP square root.
2309   void fsqrt(const VRegister& vd, const VRegister& vn);
2310 
2311   // FP round to integer, nearest with ties to away.
2312   void frinta(const VRegister& vd, const VRegister& vn);
2313 
2314   // FP round to integer, implicit rounding.
2315   void frinti(const VRegister& vd, const VRegister& vn);
2316 
2317   // FP round to integer, toward minus infinity.
2318   void frintm(const VRegister& vd, const VRegister& vn);
2319 
2320   // FP round to integer, nearest with ties to even.
2321   void frintn(const VRegister& vd, const VRegister& vn);
2322 
2323   // FP round to integer, toward plus infinity.
2324   void frintp(const VRegister& vd, const VRegister& vn);
2325 
2326   // FP round to integer, exact, implicit rounding.
2327   void frintx(const VRegister& vd, const VRegister& vn);
2328 
2329   // FP round to integer, towards zero.
2330   void frintz(const VRegister& vd, const VRegister& vn);
2331 
2332   // FP round to 32-bit integer, exact, implicit rounding [Armv8.5].
2333   void frint32x(const VRegister& vd, const VRegister& vn);
2334 
2335   // FP round to 32-bit integer, towards zero [Armv8.5].
2336   void frint32z(const VRegister& vd, const VRegister& vn);
2337 
2338   // FP round to 64-bit integer, exact, implicit rounding [Armv8.5].
2339   void frint64x(const VRegister& vd, const VRegister& vn);
2340 
2341   // FP round to 64-bit integer, towards zero [Armv8.5].
2342   void frint64z(const VRegister& vd, const VRegister& vn);
2343 
2344   void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap);
2345 
2346   void FPCompareMacro(const VRegister& vn,
2347                       const VRegister& vm,
2348                       FPTrapFlags trap);
2349 
2350   // FP compare registers.
2351   void fcmp(const VRegister& vn, const VRegister& vm);
2352 
2353   // FP compare immediate.
2354   void fcmp(const VRegister& vn, double value);
2355 
2356   void FPCCompareMacro(const VRegister& vn,
2357                        const VRegister& vm,
2358                        StatusFlags nzcv,
2359                        Condition cond,
2360                        FPTrapFlags trap);
2361 
2362   // FP conditional compare.
2363   void fccmp(const VRegister& vn,
2364              const VRegister& vm,
2365              StatusFlags nzcv,
2366              Condition cond);
2367 
2368   // FP signaling compare registers.
2369   void fcmpe(const VRegister& vn, const VRegister& vm);
2370 
2371   // FP signaling compare immediate.
2372   void fcmpe(const VRegister& vn, double value);
2373 
2374   // FP conditional signaling compare.
2375   void fccmpe(const VRegister& vn,
2376               const VRegister& vm,
2377               StatusFlags nzcv,
2378               Condition cond);
2379 
2380   // FP conditional select.
2381   void fcsel(const VRegister& vd,
2382              const VRegister& vn,
2383              const VRegister& vm,
2384              Condition cond);
2385 
2386   // Common FP Convert functions.
2387   void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
2388   void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2389   void NEONFP16ConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2390 
2391   // FP convert between precisions.
2392   void fcvt(const VRegister& vd, const VRegister& vn);
2393 
2394   // FP convert to higher precision.
2395   void fcvtl(const VRegister& vd, const VRegister& vn);
2396 
2397   // FP convert to higher precision (second part).
2398   void fcvtl2(const VRegister& vd, const VRegister& vn);
2399 
2400   // FP convert to lower precision.
2401   void fcvtn(const VRegister& vd, const VRegister& vn);
2402 
2403   // FP convert to lower prevision (second part).
2404   void fcvtn2(const VRegister& vd, const VRegister& vn);
2405 
2406   // FP convert to lower precision, rounding to odd.
2407   void fcvtxn(const VRegister& vd, const VRegister& vn);
2408 
2409   // FP convert to lower precision, rounding to odd (second part).
2410   void fcvtxn2(const VRegister& vd, const VRegister& vn);
2411 
2412   // FP convert to signed integer, nearest with ties to away.
2413   void fcvtas(const Register& rd, const VRegister& vn);
2414 
2415   // FP convert to unsigned integer, nearest with ties to away.
2416   void fcvtau(const Register& rd, const VRegister& vn);
2417 
2418   // FP convert to signed integer, nearest with ties to away.
2419   void fcvtas(const VRegister& vd, const VRegister& vn);
2420 
2421   // FP convert to unsigned integer, nearest with ties to away.
2422   void fcvtau(const VRegister& vd, const VRegister& vn);
2423 
2424   // FP convert to signed integer, round towards -infinity.
2425   void fcvtms(const Register& rd, const VRegister& vn);
2426 
2427   // FP convert to unsigned integer, round towards -infinity.
2428   void fcvtmu(const Register& rd, const VRegister& vn);
2429 
2430   // FP convert to signed integer, round towards -infinity.
2431   void fcvtms(const VRegister& vd, const VRegister& vn);
2432 
2433   // FP convert to unsigned integer, round towards -infinity.
2434   void fcvtmu(const VRegister& vd, const VRegister& vn);
2435 
2436   // FP convert to signed integer, nearest with ties to even.
2437   void fcvtns(const Register& rd, const VRegister& vn);
2438 
2439   // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
2440   void fjcvtzs(const Register& rd, const VRegister& vn);
2441 
2442   // FP convert to unsigned integer, nearest with ties to even.
2443   void fcvtnu(const Register& rd, const VRegister& vn);
2444 
2445   // FP convert to signed integer, nearest with ties to even.
2446   void fcvtns(const VRegister& rd, const VRegister& vn);
2447 
2448   // FP convert to unsigned integer, nearest with ties to even.
2449   void fcvtnu(const VRegister& rd, const VRegister& vn);
2450 
2451   // FP convert to signed integer or fixed-point, round towards zero.
2452   void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2453 
2454   // FP convert to unsigned integer or fixed-point, round towards zero.
2455   void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2456 
2457   // FP convert to signed integer or fixed-point, round towards zero.
2458   void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2459 
2460   // FP convert to unsigned integer or fixed-point, round towards zero.
2461   void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2462 
2463   // FP convert to signed integer, round towards +infinity.
2464   void fcvtps(const Register& rd, const VRegister& vn);
2465 
2466   // FP convert to unsigned integer, round towards +infinity.
2467   void fcvtpu(const Register& rd, const VRegister& vn);
2468 
2469   // FP convert to signed integer, round towards +infinity.
2470   void fcvtps(const VRegister& vd, const VRegister& vn);
2471 
2472   // FP convert to unsigned integer, round towards +infinity.
2473   void fcvtpu(const VRegister& vd, const VRegister& vn);
2474 
2475   // Convert signed integer or fixed point to FP.
2476   void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2477 
2478   // Convert unsigned integer or fixed point to FP.
2479   void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2480 
2481   // Convert signed integer or fixed-point to FP.
2482   void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2483 
2484   // Convert unsigned integer or fixed-point to FP.
2485   void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2486 
2487   // Unsigned absolute difference.
2488   void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2489 
2490   // Signed absolute difference.
2491   void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2492 
2493   // Unsigned absolute difference and accumulate.
2494   void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2495 
2496   // Signed absolute difference and accumulate.
2497   void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2498 
2499   // Add.
2500   void add(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2501 
2502   // Subtract.
2503   void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2504 
2505   // Unsigned halving add.
2506   void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2507 
2508   // Signed halving add.
2509   void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2510 
2511   // Unsigned rounding halving add.
2512   void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2513 
2514   // Signed rounding halving add.
2515   void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2516 
2517   // Unsigned halving sub.
2518   void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2519 
2520   // Signed halving sub.
2521   void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2522 
2523   // Unsigned saturating add.
2524   void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2525 
2526   // Signed saturating add.
2527   void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2528 
2529   // Unsigned saturating subtract.
2530   void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2531 
2532   // Signed saturating subtract.
2533   void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2534 
2535   // Add pairwise.
2536   void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2537 
2538   // Add pair of elements scalar.
2539   void addp(const VRegister& vd, const VRegister& vn);
2540 
2541   // Multiply-add to accumulator.
2542   void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2543 
2544   // Multiply-subtract to accumulator.
2545   void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2546 
2547   // Multiply.
2548   void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2549 
2550   // Multiply by scalar element.
2551   void mul(const VRegister& vd,
2552            const VRegister& vn,
2553            const VRegister& vm,
2554            int vm_index);
2555 
2556   // Multiply-add by scalar element.
2557   void mla(const VRegister& vd,
2558            const VRegister& vn,
2559            const VRegister& vm,
2560            int vm_index);
2561 
2562   // Multiply-subtract by scalar element.
2563   void mls(const VRegister& vd,
2564            const VRegister& vn,
2565            const VRegister& vm,
2566            int vm_index);
2567 
2568   // Signed long multiply-add by scalar element.
2569   void smlal(const VRegister& vd,
2570              const VRegister& vn,
2571              const VRegister& vm,
2572              int vm_index);
2573 
2574   // Signed long multiply-add by scalar element (second part).
2575   void smlal2(const VRegister& vd,
2576               const VRegister& vn,
2577               const VRegister& vm,
2578               int vm_index);
2579 
2580   // Unsigned long multiply-add by scalar element.
2581   void umlal(const VRegister& vd,
2582              const VRegister& vn,
2583              const VRegister& vm,
2584              int vm_index);
2585 
2586   // Unsigned long multiply-add by scalar element (second part).
2587   void umlal2(const VRegister& vd,
2588               const VRegister& vn,
2589               const VRegister& vm,
2590               int vm_index);
2591 
2592   // Signed long multiply-sub by scalar element.
2593   void smlsl(const VRegister& vd,
2594              const VRegister& vn,
2595              const VRegister& vm,
2596              int vm_index);
2597 
2598   // Signed long multiply-sub by scalar element (second part).
2599   void smlsl2(const VRegister& vd,
2600               const VRegister& vn,
2601               const VRegister& vm,
2602               int vm_index);
2603 
2604   // Unsigned long multiply-sub by scalar element.
2605   void umlsl(const VRegister& vd,
2606              const VRegister& vn,
2607              const VRegister& vm,
2608              int vm_index);
2609 
2610   // Unsigned long multiply-sub by scalar element (second part).
2611   void umlsl2(const VRegister& vd,
2612               const VRegister& vn,
2613               const VRegister& vm,
2614               int vm_index);
2615 
2616   // Signed long multiply by scalar element.
2617   void smull(const VRegister& vd,
2618              const VRegister& vn,
2619              const VRegister& vm,
2620              int vm_index);
2621 
2622   // Signed long multiply by scalar element (second part).
2623   void smull2(const VRegister& vd,
2624               const VRegister& vn,
2625               const VRegister& vm,
2626               int vm_index);
2627 
2628   // Unsigned long multiply by scalar element.
2629   void umull(const VRegister& vd,
2630              const VRegister& vn,
2631              const VRegister& vm,
2632              int vm_index);
2633 
2634   // Unsigned long multiply by scalar element (second part).
2635   void umull2(const VRegister& vd,
2636               const VRegister& vn,
2637               const VRegister& vm,
2638               int vm_index);
2639 
2640   // Signed saturating double long multiply by element.
2641   void sqdmull(const VRegister& vd,
2642                const VRegister& vn,
2643                const VRegister& vm,
2644                int vm_index);
2645 
2646   // Signed saturating double long multiply by element (second part).
2647   void sqdmull2(const VRegister& vd,
2648                 const VRegister& vn,
2649                 const VRegister& vm,
2650                 int vm_index);
2651 
2652   // Signed saturating doubling long multiply-add by element.
2653   void sqdmlal(const VRegister& vd,
2654                const VRegister& vn,
2655                const VRegister& vm,
2656                int vm_index);
2657 
2658   // Signed saturating doubling long multiply-add by element (second part).
2659   void sqdmlal2(const VRegister& vd,
2660                 const VRegister& vn,
2661                 const VRegister& vm,
2662                 int vm_index);
2663 
2664   // Signed saturating doubling long multiply-sub by element.
2665   void sqdmlsl(const VRegister& vd,
2666                const VRegister& vn,
2667                const VRegister& vm,
2668                int vm_index);
2669 
2670   // Signed saturating doubling long multiply-sub by element (second part).
2671   void sqdmlsl2(const VRegister& vd,
2672                 const VRegister& vn,
2673                 const VRegister& vm,
2674                 int vm_index);
2675 
2676   // Compare equal.
2677   void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2678 
2679   // Compare signed greater than or equal.
2680   void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2681 
2682   // Compare signed greater than.
2683   void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2684 
2685   // Compare unsigned higher.
2686   void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2687 
2688   // Compare unsigned higher or same.
2689   void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2690 
2691   // Compare bitwise test bits nonzero.
2692   void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2693 
2694   // Compare bitwise to zero.
2695   void cmeq(const VRegister& vd, const VRegister& vn, int value);
2696 
2697   // Compare signed greater than or equal to zero.
2698   void cmge(const VRegister& vd, const VRegister& vn, int value);
2699 
2700   // Compare signed greater than zero.
2701   void cmgt(const VRegister& vd, const VRegister& vn, int value);
2702 
2703   // Compare signed less than or equal to zero.
2704   void cmle(const VRegister& vd, const VRegister& vn, int value);
2705 
2706   // Compare signed less than zero.
2707   void cmlt(const VRegister& vd, const VRegister& vn, int value);
2708 
2709   // Signed shift left by register.
2710   void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2711 
2712   // Unsigned shift left by register.
2713   void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2714 
2715   // Signed saturating shift left by register.
2716   void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2717 
2718   // Unsigned saturating shift left by register.
2719   void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2720 
2721   // Signed rounding shift left by register.
2722   void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2723 
2724   // Unsigned rounding shift left by register.
2725   void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2726 
2727   // Signed saturating rounding shift left by register.
2728   void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2729 
2730   // Unsigned saturating rounding shift left by register.
2731   void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2732 
2733   // Bitwise and.
2734   void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2735 
2736   // Bitwise or.
2737   void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2738 
2739   // Bitwise or immediate.
2740   void orr(const VRegister& vd, const int imm8, const int left_shift = 0);
2741 
2742   // Move register to register.
2743   void mov(const VRegister& vd, const VRegister& vn);
2744 
2745   // Bitwise orn.
2746   void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2747 
2748   // Bitwise eor.
2749   void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2750 
2751   // Bit clear immediate.
2752   void bic(const VRegister& vd, const int imm8, const int left_shift = 0);
2753 
2754   // Bit clear.
2755   void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2756 
2757   // Bitwise insert if false.
2758   void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2759 
2760   // Bitwise insert if true.
2761   void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2762 
2763   // Bitwise select.
2764   void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2765 
2766   // Polynomial multiply.
2767   void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2768 
2769   // Vector move immediate.
2770   void movi(const VRegister& vd,
2771             const uint64_t imm,
2772             Shift shift = LSL,
2773             const int shift_amount = 0);
2774 
2775   // Bitwise not.
2776   void mvn(const VRegister& vd, const VRegister& vn);
2777 
2778   // Vector move inverted immediate.
2779   void mvni(const VRegister& vd,
2780             const int imm8,
2781             Shift shift = LSL,
2782             const int shift_amount = 0);
2783 
2784   // Signed saturating accumulate of unsigned value.
2785   void suqadd(const VRegister& vd, const VRegister& vn);
2786 
2787   // Unsigned saturating accumulate of signed value.
2788   void usqadd(const VRegister& vd, const VRegister& vn);
2789 
2790   // Absolute value.
2791   void abs(const VRegister& vd, const VRegister& vn);
2792 
2793   // Signed saturating absolute value.
2794   void sqabs(const VRegister& vd, const VRegister& vn);
2795 
2796   // Negate.
2797   void neg(const VRegister& vd, const VRegister& vn);
2798 
2799   // Signed saturating negate.
2800   void sqneg(const VRegister& vd, const VRegister& vn);
2801 
2802   // Bitwise not.
2803   void not_(const VRegister& vd, const VRegister& vn);
2804 
2805   // Extract narrow.
2806   void xtn(const VRegister& vd, const VRegister& vn);
2807 
2808   // Extract narrow (second part).
2809   void xtn2(const VRegister& vd, const VRegister& vn);
2810 
2811   // Signed saturating extract narrow.
2812   void sqxtn(const VRegister& vd, const VRegister& vn);
2813 
2814   // Signed saturating extract narrow (second part).
2815   void sqxtn2(const VRegister& vd, const VRegister& vn);
2816 
2817   // Unsigned saturating extract narrow.
2818   void uqxtn(const VRegister& vd, const VRegister& vn);
2819 
2820   // Unsigned saturating extract narrow (second part).
2821   void uqxtn2(const VRegister& vd, const VRegister& vn);
2822 
2823   // Signed saturating extract unsigned narrow.
2824   void sqxtun(const VRegister& vd, const VRegister& vn);
2825 
2826   // Signed saturating extract unsigned narrow (second part).
2827   void sqxtun2(const VRegister& vd, const VRegister& vn);
2828 
2829   // Extract vector from pair of vectors.
2830   void ext(const VRegister& vd,
2831            const VRegister& vn,
2832            const VRegister& vm,
2833            int index);
2834 
2835   // Duplicate vector element to vector or scalar.
2836   void dup(const VRegister& vd, const VRegister& vn, int vn_index);
2837 
2838   // Move vector element to scalar.
2839   void mov(const VRegister& vd, const VRegister& vn, int vn_index);
2840 
2841   // Duplicate general-purpose register to vector.
2842   void dup(const VRegister& vd, const Register& rn);
2843 
2844   // Insert vector element from another vector element.
2845   void ins(const VRegister& vd,
2846            int vd_index,
2847            const VRegister& vn,
2848            int vn_index);
2849 
2850   // Move vector element to another vector element.
2851   void mov(const VRegister& vd,
2852            int vd_index,
2853            const VRegister& vn,
2854            int vn_index);
2855 
2856   // Insert vector element from general-purpose register.
2857   void ins(const VRegister& vd, int vd_index, const Register& rn);
2858 
2859   // Move general-purpose register to a vector element.
2860   void mov(const VRegister& vd, int vd_index, const Register& rn);
2861 
2862   // Unsigned move vector element to general-purpose register.
2863   void umov(const Register& rd, const VRegister& vn, int vn_index);
2864 
2865   // Move vector element to general-purpose register.
2866   void mov(const Register& rd, const VRegister& vn, int vn_index);
2867 
2868   // Signed move vector element to general-purpose register.
2869   void smov(const Register& rd, const VRegister& vn, int vn_index);
2870 
2871   // One-element structure load to one register.
2872   void ld1(const VRegister& vt, const MemOperand& src);
2873 
2874   // One-element structure load to two registers.
2875   void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2876 
2877   // One-element structure load to three registers.
2878   void ld1(const VRegister& vt,
2879            const VRegister& vt2,
2880            const VRegister& vt3,
2881            const MemOperand& src);
2882 
2883   // One-element structure load to four registers.
2884   void ld1(const VRegister& vt,
2885            const VRegister& vt2,
2886            const VRegister& vt3,
2887            const VRegister& vt4,
2888            const MemOperand& src);
2889 
2890   // One-element single structure load to one lane.
2891   void ld1(const VRegister& vt, int lane, const MemOperand& src);
2892 
2893   // One-element single structure load to all lanes.
2894   void ld1r(const VRegister& vt, const MemOperand& src);
2895 
2896   // Two-element structure load.
2897   void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2898 
2899   // Two-element single structure load to one lane.
2900   void ld2(const VRegister& vt,
2901            const VRegister& vt2,
2902            int lane,
2903            const MemOperand& src);
2904 
2905   // Two-element single structure load to all lanes.
2906   void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2907 
2908   // Three-element structure load.
2909   void ld3(const VRegister& vt,
2910            const VRegister& vt2,
2911            const VRegister& vt3,
2912            const MemOperand& src);
2913 
2914   // Three-element single structure load to one lane.
2915   void ld3(const VRegister& vt,
2916            const VRegister& vt2,
2917            const VRegister& vt3,
2918            int lane,
2919            const MemOperand& src);
2920 
2921   // Three-element single structure load to all lanes.
2922   void ld3r(const VRegister& vt,
2923             const VRegister& vt2,
2924             const VRegister& vt3,
2925             const MemOperand& src);
2926 
2927   // Four-element structure load.
2928   void ld4(const VRegister& vt,
2929            const VRegister& vt2,
2930            const VRegister& vt3,
2931            const VRegister& vt4,
2932            const MemOperand& src);
2933 
2934   // Four-element single structure load to one lane.
2935   void ld4(const VRegister& vt,
2936            const VRegister& vt2,
2937            const VRegister& vt3,
2938            const VRegister& vt4,
2939            int lane,
2940            const MemOperand& src);
2941 
2942   // Four-element single structure load to all lanes.
2943   void ld4r(const VRegister& vt,
2944             const VRegister& vt2,
2945             const VRegister& vt3,
2946             const VRegister& vt4,
2947             const MemOperand& src);
2948 
2949   // Count leading sign bits.
2950   void cls(const VRegister& vd, const VRegister& vn);
2951 
2952   // Count leading zero bits (vector).
2953   void clz(const VRegister& vd, const VRegister& vn);
2954 
2955   // Population count per byte.
2956   void cnt(const VRegister& vd, const VRegister& vn);
2957 
2958   // Reverse bit order.
2959   void rbit(const VRegister& vd, const VRegister& vn);
2960 
2961   // Reverse elements in 16-bit halfwords.
2962   void rev16(const VRegister& vd, const VRegister& vn);
2963 
2964   // Reverse elements in 32-bit words.
2965   void rev32(const VRegister& vd, const VRegister& vn);
2966 
2967   // Reverse elements in 64-bit doublewords.
2968   void rev64(const VRegister& vd, const VRegister& vn);
2969 
2970   // Unsigned reciprocal square root estimate.
2971   void ursqrte(const VRegister& vd, const VRegister& vn);
2972 
2973   // Unsigned reciprocal estimate.
2974   void urecpe(const VRegister& vd, const VRegister& vn);
2975 
2976   // Signed pairwise long add.
2977   void saddlp(const VRegister& vd, const VRegister& vn);
2978 
2979   // Unsigned pairwise long add.
2980   void uaddlp(const VRegister& vd, const VRegister& vn);
2981 
2982   // Signed pairwise long add and accumulate.
2983   void sadalp(const VRegister& vd, const VRegister& vn);
2984 
2985   // Unsigned pairwise long add and accumulate.
2986   void uadalp(const VRegister& vd, const VRegister& vn);
2987 
2988   // Shift left by immediate.
2989   void shl(const VRegister& vd, const VRegister& vn, int shift);
2990 
2991   // Signed saturating shift left by immediate.
2992   void sqshl(const VRegister& vd, const VRegister& vn, int shift);
2993 
2994   // Signed saturating shift left unsigned by immediate.
2995   void sqshlu(const VRegister& vd, const VRegister& vn, int shift);
2996 
2997   // Unsigned saturating shift left by immediate.
2998   void uqshl(const VRegister& vd, const VRegister& vn, int shift);
2999 
3000   // Signed shift left long by immediate.
3001   void sshll(const VRegister& vd, const VRegister& vn, int shift);
3002 
3003   // Signed shift left long by immediate (second part).
3004   void sshll2(const VRegister& vd, const VRegister& vn, int shift);
3005 
3006   // Signed extend long.
3007   void sxtl(const VRegister& vd, const VRegister& vn);
3008 
3009   // Signed extend long (second part).
3010   void sxtl2(const VRegister& vd, const VRegister& vn);
3011 
3012   // Unsigned shift left long by immediate.
3013   void ushll(const VRegister& vd, const VRegister& vn, int shift);
3014 
3015   // Unsigned shift left long by immediate (second part).
3016   void ushll2(const VRegister& vd, const VRegister& vn, int shift);
3017 
3018   // Shift left long by element size.
3019   void shll(const VRegister& vd, const VRegister& vn, int shift);
3020 
3021   // Shift left long by element size (second part).
3022   void shll2(const VRegister& vd, const VRegister& vn, int shift);
3023 
3024   // Unsigned extend long.
3025   void uxtl(const VRegister& vd, const VRegister& vn);
3026 
3027   // Unsigned extend long (second part).
3028   void uxtl2(const VRegister& vd, const VRegister& vn);
3029 
3030   // Shift left by immediate and insert.
3031   void sli(const VRegister& vd, const VRegister& vn, int shift);
3032 
3033   // Shift right by immediate and insert.
3034   void sri(const VRegister& vd, const VRegister& vn, int shift);
3035 
3036   // Signed maximum.
3037   void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3038 
3039   // Signed pairwise maximum.
3040   void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3041 
3042   // Add across vector.
3043   void addv(const VRegister& vd, const VRegister& vn);
3044 
3045   // Signed add long across vector.
3046   void saddlv(const VRegister& vd, const VRegister& vn);
3047 
3048   // Unsigned add long across vector.
3049   void uaddlv(const VRegister& vd, const VRegister& vn);
3050 
3051   // FP maximum number across vector.
3052   void fmaxnmv(const VRegister& vd, const VRegister& vn);
3053 
3054   // FP maximum across vector.
3055   void fmaxv(const VRegister& vd, const VRegister& vn);
3056 
3057   // FP minimum number across vector.
3058   void fminnmv(const VRegister& vd, const VRegister& vn);
3059 
3060   // FP minimum across vector.
3061   void fminv(const VRegister& vd, const VRegister& vn);
3062 
3063   // Signed maximum across vector.
3064   void smaxv(const VRegister& vd, const VRegister& vn);
3065 
3066   // Signed minimum.
3067   void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3068 
3069   // Signed minimum pairwise.
3070   void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3071 
3072   // Signed minimum across vector.
3073   void sminv(const VRegister& vd, const VRegister& vn);
3074 
3075   // One-element structure store from one register.
3076   void st1(const VRegister& vt, const MemOperand& src);
3077 
3078   // One-element structure store from two registers.
3079   void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3080 
3081   // One-element structure store from three registers.
3082   void st1(const VRegister& vt,
3083            const VRegister& vt2,
3084            const VRegister& vt3,
3085            const MemOperand& src);
3086 
3087   // One-element structure store from four registers.
3088   void st1(const VRegister& vt,
3089            const VRegister& vt2,
3090            const VRegister& vt3,
3091            const VRegister& vt4,
3092            const MemOperand& src);
3093 
3094   // One-element single structure store from one lane.
3095   void st1(const VRegister& vt, int lane, const MemOperand& src);
3096 
3097   // Two-element structure store from two registers.
3098   void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3099 
3100   // Two-element single structure store from two lanes.
3101   void st2(const VRegister& vt,
3102            const VRegister& vt2,
3103            int lane,
3104            const MemOperand& src);
3105 
3106   // Three-element structure store from three registers.
3107   void st3(const VRegister& vt,
3108            const VRegister& vt2,
3109            const VRegister& vt3,
3110            const MemOperand& src);
3111 
3112   // Three-element single structure store from three lanes.
3113   void st3(const VRegister& vt,
3114            const VRegister& vt2,
3115            const VRegister& vt3,
3116            int lane,
3117            const MemOperand& src);
3118 
3119   // Four-element structure store from four registers.
3120   void st4(const VRegister& vt,
3121            const VRegister& vt2,
3122            const VRegister& vt3,
3123            const VRegister& vt4,
3124            const MemOperand& src);
3125 
3126   // Four-element single structure store from four lanes.
3127   void st4(const VRegister& vt,
3128            const VRegister& vt2,
3129            const VRegister& vt3,
3130            const VRegister& vt4,
3131            int lane,
3132            const MemOperand& src);
3133 
3134   // Unsigned add long.
3135   void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3136 
3137   // Unsigned add long (second part).
3138   void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3139 
3140   // Unsigned add wide.
3141   void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3142 
3143   // Unsigned add wide (second part).
3144   void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3145 
3146   // Signed add long.
3147   void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3148 
3149   // Signed add long (second part).
3150   void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3151 
3152   // Signed add wide.
3153   void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3154 
3155   // Signed add wide (second part).
3156   void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3157 
3158   // Unsigned subtract long.
3159   void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3160 
3161   // Unsigned subtract long (second part).
3162   void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3163 
3164   // Unsigned subtract wide.
3165   void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3166 
3167   // Unsigned subtract wide (second part).
3168   void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3169 
3170   // Signed subtract long.
3171   void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3172 
3173   // Signed subtract long (second part).
3174   void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3175 
3176   // Signed integer subtract wide.
3177   void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3178 
3179   // Signed integer subtract wide (second part).
3180   void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3181 
3182   // Unsigned maximum.
3183   void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3184 
3185   // Unsigned pairwise maximum.
3186   void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3187 
3188   // Unsigned maximum across vector.
3189   void umaxv(const VRegister& vd, const VRegister& vn);
3190 
3191   // Unsigned minimum.
3192   void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3193 
3194   // Unsigned pairwise minimum.
3195   void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3196 
3197   // Unsigned minimum across vector.
3198   void uminv(const VRegister& vd, const VRegister& vn);
3199 
3200   // Transpose vectors (primary).
3201   void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3202 
3203   // Transpose vectors (secondary).
3204   void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3205 
3206   // Unzip vectors (primary).
3207   void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3208 
3209   // Unzip vectors (secondary).
3210   void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3211 
3212   // Zip vectors (primary).
3213   void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3214 
3215   // Zip vectors (secondary).
3216   void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3217 
3218   // Signed shift right by immediate.
3219   void sshr(const VRegister& vd, const VRegister& vn, int shift);
3220 
3221   // Unsigned shift right by immediate.
3222   void ushr(const VRegister& vd, const VRegister& vn, int shift);
3223 
3224   // Signed rounding shift right by immediate.
3225   void srshr(const VRegister& vd, const VRegister& vn, int shift);
3226 
3227   // Unsigned rounding shift right by immediate.
3228   void urshr(const VRegister& vd, const VRegister& vn, int shift);
3229 
3230   // Signed shift right by immediate and accumulate.
3231   void ssra(const VRegister& vd, const VRegister& vn, int shift);
3232 
3233   // Unsigned shift right by immediate and accumulate.
3234   void usra(const VRegister& vd, const VRegister& vn, int shift);
3235 
3236   // Signed rounding shift right by immediate and accumulate.
3237   void srsra(const VRegister& vd, const VRegister& vn, int shift);
3238 
3239   // Unsigned rounding shift right by immediate and accumulate.
3240   void ursra(const VRegister& vd, const VRegister& vn, int shift);
3241 
3242   // Shift right narrow by immediate.
3243   void shrn(const VRegister& vd, const VRegister& vn, int shift);
3244 
3245   // Shift right narrow by immediate (second part).
3246   void shrn2(const VRegister& vd, const VRegister& vn, int shift);
3247 
3248   // Rounding shift right narrow by immediate.
3249   void rshrn(const VRegister& vd, const VRegister& vn, int shift);
3250 
3251   // Rounding shift right narrow by immediate (second part).
3252   void rshrn2(const VRegister& vd, const VRegister& vn, int shift);
3253 
3254   // Unsigned saturating shift right narrow by immediate.
3255   void uqshrn(const VRegister& vd, const VRegister& vn, int shift);
3256 
3257   // Unsigned saturating shift right narrow by immediate (second part).
3258   void uqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3259 
3260   // Unsigned saturating rounding shift right narrow by immediate.
3261   void uqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3262 
3263   // Unsigned saturating rounding shift right narrow by immediate (second part).
3264   void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3265 
3266   // Signed saturating shift right narrow by immediate.
3267   void sqshrn(const VRegister& vd, const VRegister& vn, int shift);
3268 
3269   // Signed saturating shift right narrow by immediate (second part).
3270   void sqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3271 
3272   // Signed saturating rounded shift right narrow by immediate.
3273   void sqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3274 
3275   // Signed saturating rounded shift right narrow by immediate (second part).
3276   void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3277 
3278   // Signed saturating shift right unsigned narrow by immediate.
3279   void sqshrun(const VRegister& vd, const VRegister& vn, int shift);
3280 
3281   // Signed saturating shift right unsigned narrow by immediate (second part).
3282   void sqshrun2(const VRegister& vd, const VRegister& vn, int shift);
3283 
3284   // Signed sat rounded shift right unsigned narrow by immediate.
3285   void sqrshrun(const VRegister& vd, const VRegister& vn, int shift);
3286 
3287   // Signed sat rounded shift right unsigned narrow by immediate (second part).
3288   void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift);
3289 
3290   // FP reciprocal step.
3291   void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3292 
3293   // FP reciprocal estimate.
3294   void frecpe(const VRegister& vd, const VRegister& vn);
3295 
3296   // FP reciprocal square root estimate.
3297   void frsqrte(const VRegister& vd, const VRegister& vn);
3298 
3299   // FP reciprocal square root step.
3300   void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3301 
3302   // Signed absolute difference and accumulate long.
3303   void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3304 
3305   // Signed absolute difference and accumulate long (second part).
3306   void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3307 
3308   // Unsigned absolute difference and accumulate long.
3309   void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3310 
3311   // Unsigned absolute difference and accumulate long (second part).
3312   void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3313 
3314   // Signed absolute difference long.
3315   void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3316 
3317   // Signed absolute difference long (second part).
3318   void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3319 
3320   // Unsigned absolute difference long.
3321   void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3322 
3323   // Unsigned absolute difference long (second part).
3324   void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3325 
3326   // Polynomial multiply long.
3327   void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3328 
3329   // Polynomial multiply long (second part).
3330   void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3331 
3332   // Signed long multiply-add.
3333   void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3334 
3335   // Signed long multiply-add (second part).
3336   void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3337 
3338   // Unsigned long multiply-add.
3339   void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3340 
3341   // Unsigned long multiply-add (second part).
3342   void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3343 
3344   // Signed long multiply-sub.
3345   void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3346 
3347   // Signed long multiply-sub (second part).
3348   void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3349 
3350   // Unsigned long multiply-sub.
3351   void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3352 
3353   // Unsigned long multiply-sub (second part).
3354   void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3355 
3356   // Signed long multiply.
3357   void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3358 
3359   // Signed long multiply (second part).
3360   void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3361 
3362   // Signed saturating doubling long multiply-add.
3363   void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3364 
3365   // Signed saturating doubling long multiply-add (second part).
3366   void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3367 
3368   // Signed saturating doubling long multiply-subtract.
3369   void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3370 
3371   // Signed saturating doubling long multiply-subtract (second part).
3372   void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3373 
3374   // Signed saturating doubling long multiply.
3375   void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3376 
3377   // Signed saturating doubling long multiply (second part).
3378   void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3379 
3380   // Signed saturating doubling multiply returning high half.
3381   void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3382 
3383   // Signed saturating rounding doubling multiply returning high half.
3384   void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3385 
3386   // Signed dot product [Armv8.2].
3387   void sdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3388 
3389   // Signed saturating rounding doubling multiply accumulate returning high
3390   // half [Armv8.1].
3391   void sqrdmlah(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3392 
3393   // Unsigned dot product [Armv8.2].
3394   void udot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3395 
3396   // Dot Product with unsigned and signed integers (vector).
3397   void usdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3398 
3399   // Dot product with signed and unsigned integers (vector, by element).
3400   void sudot(const VRegister& vd,
3401              const VRegister& vn,
3402              const VRegister& vm,
3403              int vm_index);
3404 
3405   // Dot product with unsigned and signed integers (vector, by element).
3406   void usdot(const VRegister& vd,
3407              const VRegister& vn,
3408              const VRegister& vm,
3409              int vm_index);
3410 
3411   // Signed saturating rounding doubling multiply subtract returning high half
3412   // [Armv8.1].
3413   void sqrdmlsh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3414 
3415   // Signed saturating doubling multiply element returning high half.
3416   void sqdmulh(const VRegister& vd,
3417                const VRegister& vn,
3418                const VRegister& vm,
3419                int vm_index);
3420 
3421   // Signed saturating rounding doubling multiply element returning high half.
3422   void sqrdmulh(const VRegister& vd,
3423                 const VRegister& vn,
3424                 const VRegister& vm,
3425                 int vm_index);
3426 
3427   // Signed dot product by element [Armv8.2].
3428   void sdot(const VRegister& vd,
3429             const VRegister& vn,
3430             const VRegister& vm,
3431             int vm_index);
3432 
3433   // Signed saturating rounding doubling multiply accumulate element returning
3434   // high half [Armv8.1].
3435   void sqrdmlah(const VRegister& vd,
3436                 const VRegister& vn,
3437                 const VRegister& vm,
3438                 int vm_index);
3439 
3440   // Unsigned dot product by element [Armv8.2].
3441   void udot(const VRegister& vd,
3442             const VRegister& vn,
3443             const VRegister& vm,
3444             int vm_index);
3445 
3446   // Signed saturating rounding doubling multiply subtract element returning
3447   // high half [Armv8.1].
3448   void sqrdmlsh(const VRegister& vd,
3449                 const VRegister& vn,
3450                 const VRegister& vm,
3451                 int vm_index);
3452 
3453   // Unsigned long multiply long.
3454   void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3455 
3456   // Unsigned long multiply (second part).
3457   void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3458 
3459   // Add narrow returning high half.
3460   void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3461 
3462   // Add narrow returning high half (second part).
3463   void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3464 
3465   // Rounding add narrow returning high half.
3466   void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3467 
3468   // Rounding add narrow returning high half (second part).
3469   void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3470 
3471   // Subtract narrow returning high half.
3472   void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3473 
3474   // Subtract narrow returning high half (second part).
3475   void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3476 
3477   // Rounding subtract narrow returning high half.
3478   void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3479 
3480   // Rounding subtract narrow returning high half (second part).
3481   void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3482 
3483   // FP vector multiply accumulate.
3484   void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3485 
3486   // FP fused multiply-add long to accumulator.
3487   void fmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3488 
3489   // FP fused multiply-add long to accumulator (second part).
3490   void fmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3491 
3492   // FP fused multiply-add long to accumulator by element.
3493   void fmlal(const VRegister& vd,
3494              const VRegister& vn,
3495              const VRegister& vm,
3496              int vm_index);
3497 
3498   // FP fused multiply-add long to accumulator by element (second part).
3499   void fmlal2(const VRegister& vd,
3500               const VRegister& vn,
3501               const VRegister& vm,
3502               int vm_index);
3503 
3504   // FP vector multiply subtract.
3505   void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3506 
3507   // FP fused multiply-subtract long to accumulator.
3508   void fmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3509 
3510   // FP fused multiply-subtract long to accumulator (second part).
3511   void fmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3512 
3513   // FP fused multiply-subtract long to accumulator by element.
3514   void fmlsl(const VRegister& vd,
3515              const VRegister& vn,
3516              const VRegister& vm,
3517              int vm_index);
3518 
3519   // FP fused multiply-subtract long to accumulator by element (second part).
3520   void fmlsl2(const VRegister& vd,
3521               const VRegister& vn,
3522               const VRegister& vm,
3523               int vm_index);
3524 
3525   // FP vector multiply extended.
3526   void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3527 
3528   // FP absolute greater than or equal.
3529   void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3530 
3531   // FP absolute greater than.
3532   void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3533 
3534   // FP multiply by element.
3535   void fmul(const VRegister& vd,
3536             const VRegister& vn,
3537             const VRegister& vm,
3538             int vm_index);
3539 
3540   // FP fused multiply-add to accumulator by element.
3541   void fmla(const VRegister& vd,
3542             const VRegister& vn,
3543             const VRegister& vm,
3544             int vm_index);
3545 
3546   // FP fused multiply-sub from accumulator by element.
3547   void fmls(const VRegister& vd,
3548             const VRegister& vn,
3549             const VRegister& vm,
3550             int vm_index);
3551 
3552   // FP multiply extended by element.
3553   void fmulx(const VRegister& vd,
3554              const VRegister& vn,
3555              const VRegister& vm,
3556              int vm_index);
3557 
3558   // FP compare equal.
3559   void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3560 
3561   // FP greater than.
3562   void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3563 
3564   // FP greater than or equal.
3565   void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3566 
3567   // FP compare equal to zero.
3568   void fcmeq(const VRegister& vd, const VRegister& vn, double imm);
3569 
3570   // FP greater than zero.
3571   void fcmgt(const VRegister& vd, const VRegister& vn, double imm);
3572 
3573   // FP greater than or equal to zero.
3574   void fcmge(const VRegister& vd, const VRegister& vn, double imm);
3575 
3576   // FP less than or equal to zero.
3577   void fcmle(const VRegister& vd, const VRegister& vn, double imm);
3578 
3579   // FP less than to zero.
3580   void fcmlt(const VRegister& vd, const VRegister& vn, double imm);
3581 
3582   // FP absolute difference.
3583   void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3584 
3585   // FP pairwise add vector.
3586   void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3587 
3588   // FP pairwise add scalar.
3589   void faddp(const VRegister& vd, const VRegister& vn);
3590 
3591   // FP pairwise maximum vector.
3592   void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3593 
3594   // FP pairwise maximum scalar.
3595   void fmaxp(const VRegister& vd, const VRegister& vn);
3596 
3597   // FP pairwise minimum vector.
3598   void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3599 
3600   // FP pairwise minimum scalar.
3601   void fminp(const VRegister& vd, const VRegister& vn);
3602 
3603   // FP pairwise maximum number vector.
3604   void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3605 
3606   // FP pairwise maximum number scalar.
3607   void fmaxnmp(const VRegister& vd, const VRegister& vn);
3608 
3609   // FP pairwise minimum number vector.
3610   void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3611 
3612   // FP pairwise minimum number scalar.
3613   void fminnmp(const VRegister& vd, const VRegister& vn);
3614 
3615   // v8.3 complex numbers - note that these are only partial/helper functions
3616   // and must be used in series in order to perform full CN operations.
3617 
3618   // FP complex multiply accumulate (by element) [Armv8.3].
3619   void fcmla(const VRegister& vd,
3620              const VRegister& vn,
3621              const VRegister& vm,
3622              int vm_index,
3623              int rot);
3624 
3625   // FP complex multiply accumulate [Armv8.3].
3626   void fcmla(const VRegister& vd,
3627              const VRegister& vn,
3628              const VRegister& vm,
3629              int rot);
3630 
3631   // FP complex add [Armv8.3].
3632   void fcadd(const VRegister& vd,
3633              const VRegister& vn,
3634              const VRegister& vm,
3635              int rot);
3636 
3637   // Signed 8-bit integer matrix multiply-accumulate (vector).
3638   void smmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3639 
3640   // Unsigned and signed 8-bit integer matrix multiply-accumulate (vector).
3641   void usmmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3642 
3643   // Unsigned 8-bit integer matrix multiply-accumulate (vector).
3644   void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3645 
3646   // Scalable Vector Extensions.
3647 
3648   // Absolute value (predicated).
3649   void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3650 
3651   // Add vectors (predicated).
3652   void add(const ZRegister& zd,
3653            const PRegisterM& pg,
3654            const ZRegister& zn,
3655            const ZRegister& zm);
3656 
3657   // Add vectors (unpredicated).
3658   void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3659 
3660   // Add immediate (unpredicated).
3661   void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
3662 
3663   // Add multiple of predicate register size to scalar register.
3664   void addpl(const Register& xd, const Register& xn, int imm6);
3665 
3666   // Add multiple of vector register size to scalar register.
3667   void addvl(const Register& xd, const Register& xn, int imm6);
3668 
3669   // Compute vector address.
3670   void adr(const ZRegister& zd, const SVEMemOperand& addr);
3671 
3672   // Bitwise AND predicates.
3673   void and_(const PRegisterWithLaneSize& pd,
3674             const PRegisterZ& pg,
3675             const PRegisterWithLaneSize& pn,
3676             const PRegisterWithLaneSize& pm);
3677 
3678   // Bitwise AND vectors (predicated).
3679   void and_(const ZRegister& zd,
3680             const PRegisterM& pg,
3681             const ZRegister& zn,
3682             const ZRegister& zm);
3683 
3684   // Bitwise AND with immediate (unpredicated).
3685   void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3686 
3687   // Bitwise AND vectors (unpredicated).
3688   void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3689 
3690   // Bitwise AND predicates.
3691   void ands(const PRegisterWithLaneSize& pd,
3692             const PRegisterZ& pg,
3693             const PRegisterWithLaneSize& pn,
3694             const PRegisterWithLaneSize& pm);
3695 
3696   // Bitwise AND reduction to scalar.
3697   void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
3698 
3699   // Arithmetic shift right by immediate (predicated).
3700   void asr(const ZRegister& zd,
3701            const PRegisterM& pg,
3702            const ZRegister& zn,
3703            int shift);
3704 
3705   // Arithmetic shift right by 64-bit wide elements (predicated).
3706   void asr(const ZRegister& zd,
3707            const PRegisterM& pg,
3708            const ZRegister& zn,
3709            const ZRegister& zm);
3710 
3711   // Arithmetic shift right by immediate (unpredicated).
3712   void asr(const ZRegister& zd, const ZRegister& zn, int shift);
3713 
3714   // Arithmetic shift right by 64-bit wide elements (unpredicated).
3715   void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3716 
3717   // Arithmetic shift right for divide by immediate (predicated).
3718   void asrd(const ZRegister& zd,
3719             const PRegisterM& pg,
3720             const ZRegister& zn,
3721             int shift);
3722 
3723   // Reversed arithmetic shift right by vector (predicated).
3724   void asrr(const ZRegister& zd,
3725             const PRegisterM& pg,
3726             const ZRegister& zn,
3727             const ZRegister& zm);
3728 
3729   // Bitwise clear predicates.
3730   void bic(const PRegisterWithLaneSize& pd,
3731            const PRegisterZ& pg,
3732            const PRegisterWithLaneSize& pn,
3733            const PRegisterWithLaneSize& pm);
3734 
3735   // Bitwise clear vectors (predicated).
3736   void bic(const ZRegister& zd,
3737            const PRegisterM& pg,
3738            const ZRegister& zn,
3739            const ZRegister& zm);
3740 
3741   // Bitwise clear bits using immediate (unpredicated).
3742   void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3743 
3744   // Bitwise clear vectors (unpredicated).
3745   void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3746 
3747   // Bitwise clear predicates.
3748   void bics(const PRegisterWithLaneSize& pd,
3749             const PRegisterZ& pg,
3750             const PRegisterWithLaneSize& pn,
3751             const PRegisterWithLaneSize& pm);
3752 
3753   // Break after first true condition.
3754   void brka(const PRegisterWithLaneSize& pd,
3755             const PRegister& pg,
3756             const PRegisterWithLaneSize& pn);
3757 
3758   // Break after first true condition.
3759   void brkas(const PRegisterWithLaneSize& pd,
3760              const PRegisterZ& pg,
3761              const PRegisterWithLaneSize& pn);
3762 
3763   // Break before first true condition.
3764   void brkb(const PRegisterWithLaneSize& pd,
3765             const PRegister& pg,
3766             const PRegisterWithLaneSize& pn);
3767 
3768   // Break before first true condition.
3769   void brkbs(const PRegisterWithLaneSize& pd,
3770              const PRegisterZ& pg,
3771              const PRegisterWithLaneSize& pn);
3772 
3773   // Propagate break to next partition.
3774   void brkn(const PRegisterWithLaneSize& pd,
3775             const PRegisterZ& pg,
3776             const PRegisterWithLaneSize& pn,
3777             const PRegisterWithLaneSize& pm);
3778 
3779   // Propagate break to next partition.
3780   void brkns(const PRegisterWithLaneSize& pd,
3781              const PRegisterZ& pg,
3782              const PRegisterWithLaneSize& pn,
3783              const PRegisterWithLaneSize& pm);
3784 
3785   // Break after first true condition, propagating from previous partition.
3786   void brkpa(const PRegisterWithLaneSize& pd,
3787              const PRegisterZ& pg,
3788              const PRegisterWithLaneSize& pn,
3789              const PRegisterWithLaneSize& pm);
3790 
3791   // Break after first true condition, propagating from previous partition.
3792   void brkpas(const PRegisterWithLaneSize& pd,
3793               const PRegisterZ& pg,
3794               const PRegisterWithLaneSize& pn,
3795               const PRegisterWithLaneSize& pm);
3796 
3797   // Break before first true condition, propagating from previous partition.
3798   void brkpb(const PRegisterWithLaneSize& pd,
3799              const PRegisterZ& pg,
3800              const PRegisterWithLaneSize& pn,
3801              const PRegisterWithLaneSize& pm);
3802 
3803   // Break before first true condition, propagating from previous partition.
3804   void brkpbs(const PRegisterWithLaneSize& pd,
3805               const PRegisterZ& pg,
3806               const PRegisterWithLaneSize& pn,
3807               const PRegisterWithLaneSize& pm);
3808 
3809   // Conditionally extract element after last to general-purpose register.
3810   void clasta(const Register& rd,
3811               const PRegister& pg,
3812               const Register& rn,
3813               const ZRegister& zm);
3814 
3815   // Conditionally extract element after last to SIMD&FP scalar register.
3816   void clasta(const VRegister& vd,
3817               const PRegister& pg,
3818               const VRegister& vn,
3819               const ZRegister& zm);
3820 
3821   // Conditionally extract element after last to vector register.
3822   void clasta(const ZRegister& zd,
3823               const PRegister& pg,
3824               const ZRegister& zn,
3825               const ZRegister& zm);
3826 
3827   // Conditionally extract last element to general-purpose register.
3828   void clastb(const Register& rd,
3829               const PRegister& pg,
3830               const Register& rn,
3831               const ZRegister& zm);
3832 
3833   // Conditionally extract last element to SIMD&FP scalar register.
3834   void clastb(const VRegister& vd,
3835               const PRegister& pg,
3836               const VRegister& vn,
3837               const ZRegister& zm);
3838 
3839   // Conditionally extract last element to vector register.
3840   void clastb(const ZRegister& zd,
3841               const PRegister& pg,
3842               const ZRegister& zn,
3843               const ZRegister& zm);
3844 
3845   // Count leading sign bits (predicated).
3846   void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3847 
3848   // Count leading zero bits (predicated).
3849   void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3850 
3851   void cmp(Condition cond,
3852            const PRegisterWithLaneSize& pd,
3853            const PRegisterZ& pg,
3854            const ZRegister& zn,
3855            const ZRegister& zm);
3856 
3857   // Compare vector to 64-bit wide elements.
3858   void cmpeq(const PRegisterWithLaneSize& pd,
3859              const PRegisterZ& pg,
3860              const ZRegister& zn,
3861              const ZRegister& zm);
3862 
3863   // Compare vector to immediate.
3864   void cmpeq(const PRegisterWithLaneSize& pd,
3865              const PRegisterZ& pg,
3866              const ZRegister& zn,
3867              int imm5);
3868 
3869   // Compare vector to 64-bit wide elements.
3870   void cmpge(const PRegisterWithLaneSize& pd,
3871              const PRegisterZ& pg,
3872              const ZRegister& zn,
3873              const ZRegister& zm);
3874 
3875   // Compare vector to immediate.
3876   void cmpge(const PRegisterWithLaneSize& pd,
3877              const PRegisterZ& pg,
3878              const ZRegister& zn,
3879              int imm5);
3880 
3881   // Compare vector to 64-bit wide elements.
3882   void cmpgt(const PRegisterWithLaneSize& pd,
3883              const PRegisterZ& pg,
3884              const ZRegister& zn,
3885              const ZRegister& zm);
3886 
3887   // Compare vector to immediate.
3888   void cmpgt(const PRegisterWithLaneSize& pd,
3889              const PRegisterZ& pg,
3890              const ZRegister& zn,
3891              int imm5);
3892 
3893   // Compare vector to 64-bit wide elements.
3894   void cmphi(const PRegisterWithLaneSize& pd,
3895              const PRegisterZ& pg,
3896              const ZRegister& zn,
3897              const ZRegister& zm);
3898 
3899   // Compare vector to immediate.
3900   void cmphi(const PRegisterWithLaneSize& pd,
3901              const PRegisterZ& pg,
3902              const ZRegister& zn,
3903              unsigned imm7);
3904 
3905   // Compare vector to 64-bit wide elements.
3906   void cmphs(const PRegisterWithLaneSize& pd,
3907              const PRegisterZ& pg,
3908              const ZRegister& zn,
3909              const ZRegister& zm);
3910 
3911   // Compare vector to immediate.
3912   void cmphs(const PRegisterWithLaneSize& pd,
3913              const PRegisterZ& pg,
3914              const ZRegister& zn,
3915              unsigned imm7);
3916 
3917   // Compare vector to 64-bit wide elements.
3918   void cmple(const PRegisterWithLaneSize& pd,
3919              const PRegisterZ& pg,
3920              const ZRegister& zn,
3921              const ZRegister& zm);
3922 
3923   // Compare vector to immediate.
3924   void cmple(const PRegisterWithLaneSize& pd,
3925              const PRegisterZ& pg,
3926              const ZRegister& zn,
3927              int imm5);
3928 
3929   // Compare vector to 64-bit wide elements.
3930   void cmplo(const PRegisterWithLaneSize& pd,
3931              const PRegisterZ& pg,
3932              const ZRegister& zn,
3933              const ZRegister& zm);
3934 
3935   // Compare vector to immediate.
3936   void cmplo(const PRegisterWithLaneSize& pd,
3937              const PRegisterZ& pg,
3938              const ZRegister& zn,
3939              unsigned imm7);
3940 
3941   // Compare vector to 64-bit wide elements.
3942   void cmpls(const PRegisterWithLaneSize& pd,
3943              const PRegisterZ& pg,
3944              const ZRegister& zn,
3945              const ZRegister& zm);
3946 
3947   // Compare vector to immediate.
3948   void cmpls(const PRegisterWithLaneSize& pd,
3949              const PRegisterZ& pg,
3950              const ZRegister& zn,
3951              unsigned imm7);
3952 
3953   // Compare vector to 64-bit wide elements.
3954   void cmplt(const PRegisterWithLaneSize& pd,
3955              const PRegisterZ& pg,
3956              const ZRegister& zn,
3957              const ZRegister& zm);
3958 
3959   // Compare vector to immediate.
3960   void cmplt(const PRegisterWithLaneSize& pd,
3961              const PRegisterZ& pg,
3962              const ZRegister& zn,
3963              int imm5);
3964 
3965   // Compare vector to 64-bit wide elements.
3966   void cmpne(const PRegisterWithLaneSize& pd,
3967              const PRegisterZ& pg,
3968              const ZRegister& zn,
3969              const ZRegister& zm);
3970 
3971   // Compare vector to immediate.
3972   void cmpne(const PRegisterWithLaneSize& pd,
3973              const PRegisterZ& pg,
3974              const ZRegister& zn,
3975              int imm5);
3976 
3977   // Logically invert boolean condition in vector (predicated).
3978   void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3979 
3980   // Count non-zero bits (predicated).
3981   void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3982 
3983   // Set scalar to multiple of predicate constraint element count.
3984   void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3985 
3986   // Set scalar to multiple of predicate constraint element count.
3987   void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3988 
3989   // Set scalar to multiple of predicate constraint element count.
3990   void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3991 
3992   // Set scalar to active predicate element count.
3993   void cntp(const Register& xd,
3994             const PRegister& pg,
3995             const PRegisterWithLaneSize& pn);
3996 
3997   // Set scalar to multiple of predicate constraint element count.
3998   void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3999 
4000   // Shuffle active elements of vector to the right and fill with zero.
4001   void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
4002 
4003   // Copy signed integer immediate to vector elements (predicated).
4004   void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
4005 
4006   // Copy general-purpose register to vector elements (predicated).
4007   void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
4008 
4009   // Copy SIMD&FP scalar register to vector elements (predicated).
4010   void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
4011 
4012   // Compare and terminate loop.
4013   void ctermeq(const Register& rn, const Register& rm);
4014 
4015   // Compare and terminate loop.
4016   void ctermne(const Register& rn, const Register& rm);
4017 
4018   // Decrement scalar by multiple of predicate constraint element count.
4019   void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4020 
4021   // Decrement scalar by multiple of predicate constraint element count.
4022   void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4023 
4024   // Decrement vector by multiple of predicate constraint element count.
4025   void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4026 
4027   // Decrement scalar by multiple of predicate constraint element count.
4028   void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4029 
4030   // Decrement vector by multiple of predicate constraint element count.
4031   void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4032 
4033   // Decrement scalar by active predicate element count.
4034   void decp(const Register& rdn, const PRegisterWithLaneSize& pg);
4035 
4036   // Decrement vector by active predicate element count.
4037   void decp(const ZRegister& zdn, const PRegister& pg);
4038 
4039   // Decrement scalar by multiple of predicate constraint element count.
4040   void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4041 
4042   // Decrement vector by multiple of predicate constraint element count.
4043   void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4044 
4045   // Broadcast general-purpose register to vector elements (unpredicated).
4046   void dup(const ZRegister& zd, const Register& xn);
4047 
4048   // Broadcast indexed element to vector (unpredicated).
4049   void dup(const ZRegister& zd, const ZRegister& zn, unsigned index);
4050 
4051   // As for movz/movk/movn, if the default shift of -1 is specified to dup, the
4052   // assembler will pick an appropriate immediate and left shift that is
4053   // equivalent to the immediate argument. If an explicit left shift is
4054   // specified (0 or 8), the immediate must be a signed 8-bit integer.
4055 
4056   // Broadcast signed immediate to vector elements (unpredicated).
4057   void dup(const ZRegister& zd, int imm8, int shift = -1);
4058 
4059   // Broadcast logical bitmask immediate to vector (unpredicated).
4060   void dupm(const ZRegister& zd, uint64_t imm);
4061 
4062   // Bitwise exclusive OR with inverted immediate (unpredicated).
4063   void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4064 
4065   // Bitwise exclusive OR predicates.
4066   void eor(const PRegisterWithLaneSize& pd,
4067            const PRegisterZ& pg,
4068            const PRegisterWithLaneSize& pn,
4069            const PRegisterWithLaneSize& pm);
4070 
4071   // Bitwise exclusive OR vectors (predicated).
4072   void eor(const ZRegister& zd,
4073            const PRegisterM& pg,
4074            const ZRegister& zn,
4075            const ZRegister& zm);
4076 
4077   // Bitwise exclusive OR with immediate (unpredicated).
4078   void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4079 
4080   // Bitwise exclusive OR vectors (unpredicated).
4081   void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4082 
4083   // Bitwise exclusive OR predicates.
4084   void eors(const PRegisterWithLaneSize& pd,
4085             const PRegisterZ& pg,
4086             const PRegisterWithLaneSize& pn,
4087             const PRegisterWithLaneSize& pm);
4088 
4089   // Bitwise XOR reduction to scalar.
4090   void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4091 
4092   // Extract vector from pair of vectors.
4093   void ext(const ZRegister& zd,
4094            const ZRegister& zn,
4095            const ZRegister& zm,
4096            unsigned offset);
4097 
4098   // Floating-point absolute difference (predicated).
4099   void fabd(const ZRegister& zd,
4100             const PRegisterM& pg,
4101             const ZRegister& zn,
4102             const ZRegister& zm);
4103 
4104   // Floating-point absolute value (predicated).
4105   void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4106 
4107   // Floating-point absolute compare vectors.
4108   void facge(const PRegisterWithLaneSize& pd,
4109              const PRegisterZ& pg,
4110              const ZRegister& zn,
4111              const ZRegister& zm);
4112 
4113   // Floating-point absolute compare vectors.
4114   void facgt(const PRegisterWithLaneSize& pd,
4115              const PRegisterZ& pg,
4116              const ZRegister& zn,
4117              const ZRegister& zm);
4118 
4119   // Floating-point add immediate (predicated).
4120   void fadd(const ZRegister& zd,
4121             const PRegisterM& pg,
4122             const ZRegister& zn,
4123             double imm);
4124 
4125   // Floating-point add vector (predicated).
4126   void fadd(const ZRegister& zd,
4127             const PRegisterM& pg,
4128             const ZRegister& zn,
4129             const ZRegister& zm);
4130 
4131   // Floating-point add vector (unpredicated).
4132   void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4133 
4134   // Floating-point add strictly-ordered reduction, accumulating in scalar.
4135   void fadda(const VRegister& vd,
4136              const PRegister& pg,
4137              const VRegister& vn,
4138              const ZRegister& zm);
4139 
4140   // Floating-point add recursive reduction to scalar.
4141   void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4142 
4143   // Floating-point complex add with rotate (predicated).
4144   void fcadd(const ZRegister& zd,
4145              const PRegisterM& pg,
4146              const ZRegister& zn,
4147              const ZRegister& zm,
4148              int rot);
4149 
4150   // Floating-point compare vector with zero.
4151   void fcmeq(const PRegisterWithLaneSize& pd,
4152              const PRegisterZ& pg,
4153              const ZRegister& zn,
4154              double zero);
4155 
4156   // Floating-point compare vectors.
4157   void fcmeq(const PRegisterWithLaneSize& pd,
4158              const PRegisterZ& pg,
4159              const ZRegister& zn,
4160              const ZRegister& zm);
4161 
4162   // Floating-point compare vector with zero.
4163   void fcmge(const PRegisterWithLaneSize& pd,
4164              const PRegisterZ& pg,
4165              const ZRegister& zn,
4166              double zero);
4167 
4168   // Floating-point compare vectors.
4169   void fcmge(const PRegisterWithLaneSize& pd,
4170              const PRegisterZ& pg,
4171              const ZRegister& zn,
4172              const ZRegister& zm);
4173 
4174   // Floating-point compare vector with zero.
4175   void fcmgt(const PRegisterWithLaneSize& pd,
4176              const PRegisterZ& pg,
4177              const ZRegister& zn,
4178              double zero);
4179 
4180   // Floating-point compare vectors.
4181   void fcmgt(const PRegisterWithLaneSize& pd,
4182              const PRegisterZ& pg,
4183              const ZRegister& zn,
4184              const ZRegister& zm);
4185 
4186   // Floating-point complex multiply-add with rotate (predicated).
4187   void fcmla(const ZRegister& zda,
4188              const PRegisterM& pg,
4189              const ZRegister& zn,
4190              const ZRegister& zm,
4191              int rot);
4192 
4193   // Floating-point complex multiply-add by indexed values with rotate.
4194   void fcmla(const ZRegister& zda,
4195              const ZRegister& zn,
4196              const ZRegister& zm,
4197              int index,
4198              int rot);
4199 
4200   // Floating-point compare vector with zero.
4201   void fcmle(const PRegisterWithLaneSize& pd,
4202              const PRegisterZ& pg,
4203              const ZRegister& zn,
4204              double zero);
4205 
4206   // Floating-point compare vector with zero.
4207   void fcmlt(const PRegisterWithLaneSize& pd,
4208              const PRegisterZ& pg,
4209              const ZRegister& zn,
4210              double zero);
4211 
4212   // Floating-point compare vector with zero.
4213   void fcmne(const PRegisterWithLaneSize& pd,
4214              const PRegisterZ& pg,
4215              const ZRegister& zn,
4216              double zero);
4217 
4218   // Floating-point compare vectors.
4219   void fcmne(const PRegisterWithLaneSize& pd,
4220              const PRegisterZ& pg,
4221              const ZRegister& zn,
4222              const ZRegister& zm);
4223 
4224   // Floating-point compare vectors.
4225   void fcmuo(const PRegisterWithLaneSize& pd,
4226              const PRegisterZ& pg,
4227              const ZRegister& zn,
4228              const ZRegister& zm);
4229 
4230   // Copy floating-point immediate to vector elements (predicated).
4231   void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm);
4232 
4233   // Copy half-precision floating-point immediate to vector elements
4234   // (predicated).
fcpy(const ZRegister & zd,const PRegisterM & pg,Float16 imm)4235   void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) {
4236     fcpy(zd, pg, FPToDouble(imm, kIgnoreDefaultNaN));
4237   }
4238 
4239   // Floating-point convert precision (predicated).
4240   void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4241 
4242   // Floating-point convert to signed integer, rounding toward zero
4243   // (predicated).
4244   void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4245 
4246   // Floating-point convert to unsigned integer, rounding toward zero
4247   // (predicated).
4248   void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4249 
4250   // Floating-point divide by vector (predicated).
4251   void fdiv(const ZRegister& zd,
4252             const PRegisterM& pg,
4253             const ZRegister& zn,
4254             const ZRegister& zm);
4255 
4256   // Floating-point reversed divide by vector (predicated).
4257   void fdivr(const ZRegister& zd,
4258              const PRegisterM& pg,
4259              const ZRegister& zn,
4260              const ZRegister& zm);
4261 
4262   // Broadcast floating-point immediate to vector elements.
4263   void fdup(const ZRegister& zd, double imm);
4264 
4265   // Broadcast half-precision floating-point immediate to vector elements.
fdup(const ZRegister & zd,Float16 imm)4266   void fdup(const ZRegister& zd, Float16 imm) {
4267     fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
4268   }
4269 
4270   // Floating-point exponential accelerator.
4271   void fexpa(const ZRegister& zd, const ZRegister& zn);
4272 
4273   // Floating-point fused multiply-add vectors (predicated), writing
4274   // multiplicand [Zdn = Za + Zdn * Zm].
4275   void fmad(const ZRegister& zdn,
4276             const PRegisterM& pg,
4277             const ZRegister& zm,
4278             const ZRegister& za);
4279 
4280   // Floating-point maximum with immediate (predicated).
4281   void fmax(const ZRegister& zd,
4282             const PRegisterM& pg,
4283             const ZRegister& zn,
4284             double imm);
4285 
4286   // Floating-point maximum (predicated).
4287   void fmax(const ZRegister& zd,
4288             const PRegisterM& pg,
4289             const ZRegister& zn,
4290             const ZRegister& zm);
4291 
4292   // Floating-point maximum number with immediate (predicated).
4293   void fmaxnm(const ZRegister& zd,
4294               const PRegisterM& pg,
4295               const ZRegister& zn,
4296               double imm);
4297 
4298   // Floating-point maximum number (predicated).
4299   void fmaxnm(const ZRegister& zd,
4300               const PRegisterM& pg,
4301               const ZRegister& zn,
4302               const ZRegister& zm);
4303 
4304   // Floating-point maximum number recursive reduction to scalar.
4305   void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4306 
4307   // Floating-point maximum recursive reduction to scalar.
4308   void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4309 
4310   // Floating-point minimum with immediate (predicated).
4311   void fmin(const ZRegister& zd,
4312             const PRegisterM& pg,
4313             const ZRegister& zn,
4314             double imm);
4315 
4316   // Floating-point minimum (predicated).
4317   void fmin(const ZRegister& zd,
4318             const PRegisterM& pg,
4319             const ZRegister& zn,
4320             const ZRegister& zm);
4321 
4322   // Floating-point minimum number with immediate (predicated).
4323   void fminnm(const ZRegister& zd,
4324               const PRegisterM& pg,
4325               const ZRegister& zn,
4326               double imm);
4327 
4328   // Floating-point minimum number (predicated).
4329   void fminnm(const ZRegister& zd,
4330               const PRegisterM& pg,
4331               const ZRegister& zn,
4332               const ZRegister& zm);
4333 
4334   // Floating-point minimum number recursive reduction to scalar.
4335   void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4336 
4337   // Floating-point minimum recursive reduction to scalar.
4338   void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4339 
4340   // Floating-point fused multiply-add vectors (predicated), writing addend
4341   // [Zda = Zda + Zn * Zm].
4342   void fmla(const ZRegister& zda,
4343             const PRegisterM& pg,
4344             const ZRegister& zn,
4345             const ZRegister& zm);
4346 
4347   // Floating-point fused multiply-add by indexed elements
4348   // (Zda = Zda + Zn * Zm[indexed]).
4349   void fmla(const ZRegister& zda,
4350             const ZRegister& zn,
4351             const ZRegister& zm,
4352             int index);
4353 
4354   // Floating-point fused multiply-subtract vectors (predicated), writing
4355   // addend [Zda = Zda + -Zn * Zm].
4356   void fmls(const ZRegister& zda,
4357             const PRegisterM& pg,
4358             const ZRegister& zn,
4359             const ZRegister& zm);
4360 
4361   // Floating-point fused multiply-subtract by indexed elements
4362   // (Zda = Zda + -Zn * Zm[indexed]).
4363   void fmls(const ZRegister& zda,
4364             const ZRegister& zn,
4365             const ZRegister& zm,
4366             int index);
4367 
4368   // Move 8-bit floating-point immediate to vector elements (unpredicated).
4369   void fmov(const ZRegister& zd, double imm);
4370 
4371   // Move 8-bit floating-point immediate to vector elements (predicated).
4372   void fmov(const ZRegister& zd, const PRegisterM& pg, double imm);
4373 
4374   // Floating-point fused multiply-subtract vectors (predicated), writing
4375   // multiplicand [Zdn = Za + -Zdn * Zm].
4376   void fmsb(const ZRegister& zdn,
4377             const PRegisterM& pg,
4378             const ZRegister& zm,
4379             const ZRegister& za);
4380 
4381   // Floating-point multiply by immediate (predicated).
4382   void fmul(const ZRegister& zd,
4383             const PRegisterM& pg,
4384             const ZRegister& zn,
4385             double imm);
4386 
4387   // Floating-point multiply vectors (predicated).
4388   void fmul(const ZRegister& zd,
4389             const PRegisterM& pg,
4390             const ZRegister& zn,
4391             const ZRegister& zm);
4392 
4393   // Floating-point multiply by indexed elements.
4394   void fmul(const ZRegister& zd,
4395             const ZRegister& zn,
4396             const ZRegister& zm,
4397             unsigned index);
4398 
4399   // Floating-point multiply vectors (unpredicated).
4400   void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4401 
4402   // Floating-point multiply-extended vectors (predicated).
4403   void fmulx(const ZRegister& zd,
4404              const PRegisterM& pg,
4405              const ZRegister& zn,
4406              const ZRegister& zm);
4407 
4408   // Floating-point negate (predicated).
4409   void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4410 
4411   // Floating-point negated fused multiply-add vectors (predicated), writing
4412   // multiplicand [Zdn = -Za + -Zdn * Zm].
4413   void fnmad(const ZRegister& zdn,
4414              const PRegisterM& pg,
4415              const ZRegister& zm,
4416              const ZRegister& za);
4417 
4418   // Floating-point negated fused multiply-add vectors (predicated), writing
4419   // addend [Zda = -Zda + -Zn * Zm].
4420   void fnmla(const ZRegister& zda,
4421              const PRegisterM& pg,
4422              const ZRegister& zn,
4423              const ZRegister& zm);
4424 
4425   // Floating-point negated fused multiply-subtract vectors (predicated),
4426   // writing addend [Zda = -Zda + Zn * Zm].
4427   void fnmls(const ZRegister& zda,
4428              const PRegisterM& pg,
4429              const ZRegister& zn,
4430              const ZRegister& zm);
4431 
4432   // Floating-point negated fused multiply-subtract vectors (predicated),
4433   // writing multiplicand [Zdn = -Za + Zdn * Zm].
4434   void fnmsb(const ZRegister& zdn,
4435              const PRegisterM& pg,
4436              const ZRegister& zm,
4437              const ZRegister& za);
4438 
4439   // Floating-point reciprocal estimate (unpredicated).
4440   void frecpe(const ZRegister& zd, const ZRegister& zn);
4441 
4442   // Floating-point reciprocal step (unpredicated).
4443   void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4444 
4445   // Floating-point reciprocal exponent (predicated).
4446   void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4447 
4448   // Floating-point round to integral value (predicated).
4449   void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4450 
4451   // Floating-point round to integral value (predicated).
4452   void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4453 
4454   // Floating-point round to integral value (predicated).
4455   void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4456 
4457   // Floating-point round to integral value (predicated).
4458   void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4459 
4460   // Floating-point round to integral value (predicated).
4461   void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4462 
4463   // Floating-point round to integral value (predicated).
4464   void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4465 
4466   // Floating-point round to integral value (predicated).
4467   void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4468 
4469   // Floating-point reciprocal square root estimate (unpredicated).
4470   void frsqrte(const ZRegister& zd, const ZRegister& zn);
4471 
4472   // Floating-point reciprocal square root step (unpredicated).
4473   void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4474 
4475   // Floating-point adjust exponent by vector (predicated).
4476   void fscale(const ZRegister& zd,
4477               const PRegisterM& pg,
4478               const ZRegister& zn,
4479               const ZRegister& zm);
4480 
4481   // Floating-point square root (predicated).
4482   void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4483 
4484   // Floating-point subtract immediate (predicated).
4485   void fsub(const ZRegister& zd,
4486             const PRegisterM& pg,
4487             const ZRegister& zn,
4488             double imm);
4489 
4490   // Floating-point subtract vectors (predicated).
4491   void fsub(const ZRegister& zd,
4492             const PRegisterM& pg,
4493             const ZRegister& zn,
4494             const ZRegister& zm);
4495 
4496   // Floating-point subtract vectors (unpredicated).
4497   void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4498 
4499   // Floating-point reversed subtract from immediate (predicated).
4500   void fsubr(const ZRegister& zd,
4501              const PRegisterM& pg,
4502              const ZRegister& zn,
4503              double imm);
4504 
4505   // Floating-point reversed subtract vectors (predicated).
4506   void fsubr(const ZRegister& zd,
4507              const PRegisterM& pg,
4508              const ZRegister& zn,
4509              const ZRegister& zm);
4510 
4511   // Floating-point trigonometric multiply-add coefficient.
4512   void ftmad(const ZRegister& zd,
4513              const ZRegister& zn,
4514              const ZRegister& zm,
4515              int imm3);
4516 
4517   // Floating-point trigonometric starting value.
4518   void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4519 
4520   // Floating-point trigonometric select coefficient.
4521   void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4522 
4523   // Increment scalar by multiple of predicate constraint element count.
4524   void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4525 
4526   // Increment scalar by multiple of predicate constraint element count.
4527   void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4528 
4529   // Increment vector by multiple of predicate constraint element count.
4530   void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4531 
4532   // Increment scalar by multiple of predicate constraint element count.
4533   void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4534 
4535   // Increment vector by multiple of predicate constraint element count.
4536   void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4537 
4538   // Increment scalar by active predicate element count.
4539   void incp(const Register& rdn, const PRegisterWithLaneSize& pg);
4540 
4541   // Increment vector by active predicate element count.
4542   void incp(const ZRegister& zdn, const PRegister& pg);
4543 
4544   // Increment scalar by multiple of predicate constraint element count.
4545   void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4546 
4547   // Increment vector by multiple of predicate constraint element count.
4548   void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4549 
4550   // Create index starting from and incremented by immediate.
4551   void index(const ZRegister& zd, int start, int step);
4552 
4553   // Create index starting from and incremented by general-purpose register.
4554   void index(const ZRegister& zd, const Register& rn, const Register& rm);
4555 
4556   // Create index starting from general-purpose register and incremented by
4557   // immediate.
4558   void index(const ZRegister& zd, const Register& rn, int imm5);
4559 
4560   // Create index starting from immediate and incremented by general-purpose
4561   // register.
4562   void index(const ZRegister& zd, int imm5, const Register& rm);
4563 
4564   // Insert general-purpose register in shifted vector.
4565   void insr(const ZRegister& zdn, const Register& rm);
4566 
4567   // Insert SIMD&FP scalar register in shifted vector.
4568   void insr(const ZRegister& zdn, const VRegister& vm);
4569 
4570   // Extract element after last to general-purpose register.
4571   void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn);
4572 
4573   // Extract element after last to SIMD&FP scalar register.
4574   void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4575 
4576   // Extract last element to general-purpose register.
4577   void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn);
4578 
4579   // Extract last element to SIMD&FP scalar register.
4580   void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4581 
4582   // Contiguous/gather load bytes to vector.
4583   void ld1b(const ZRegister& zt,
4584             const PRegisterZ& pg,
4585             const SVEMemOperand& addr);
4586 
4587   // Contiguous/gather load halfwords to vector.
4588   void ld1h(const ZRegister& zt,
4589             const PRegisterZ& pg,
4590             const SVEMemOperand& addr);
4591 
4592   // Contiguous/gather load words to vector.
4593   void ld1w(const ZRegister& zt,
4594             const PRegisterZ& pg,
4595             const SVEMemOperand& addr);
4596 
4597   // Contiguous/gather load doublewords to vector.
4598   void ld1d(const ZRegister& zt,
4599             const PRegisterZ& pg,
4600             const SVEMemOperand& addr);
4601 
4602   // TODO: Merge other loads into the SVEMemOperand versions.
4603 
4604   // Load and broadcast unsigned byte to vector.
4605   void ld1rb(const ZRegister& zt,
4606              const PRegisterZ& pg,
4607              const SVEMemOperand& addr);
4608 
4609   // Load and broadcast unsigned halfword to vector.
4610   void ld1rh(const ZRegister& zt,
4611              const PRegisterZ& pg,
4612              const SVEMemOperand& addr);
4613 
4614   // Load and broadcast unsigned word to vector.
4615   void ld1rw(const ZRegister& zt,
4616              const PRegisterZ& pg,
4617              const SVEMemOperand& addr);
4618 
4619   // Load and broadcast doubleword to vector.
4620   void ld1rd(const ZRegister& zt,
4621              const PRegisterZ& pg,
4622              const SVEMemOperand& addr);
4623 
4624   // Contiguous load and replicate sixteen bytes.
4625   void ld1rqb(const ZRegister& zt,
4626               const PRegisterZ& pg,
4627               const SVEMemOperand& addr);
4628 
4629   // Contiguous load and replicate eight halfwords.
4630   void ld1rqh(const ZRegister& zt,
4631               const PRegisterZ& pg,
4632               const SVEMemOperand& addr);
4633 
4634   // Contiguous load and replicate four words.
4635   void ld1rqw(const ZRegister& zt,
4636               const PRegisterZ& pg,
4637               const SVEMemOperand& addr);
4638 
4639   // Contiguous load and replicate two doublewords.
4640   void ld1rqd(const ZRegister& zt,
4641               const PRegisterZ& pg,
4642               const SVEMemOperand& addr);
4643 
4644   // Contiguous load and replicate thirty-two bytes.
4645   void ld1rob(const ZRegister& zt,
4646               const PRegisterZ& pg,
4647               const SVEMemOperand& addr);
4648 
4649   // Contiguous load and replicate sixteen halfwords.
4650   void ld1roh(const ZRegister& zt,
4651               const PRegisterZ& pg,
4652               const SVEMemOperand& addr);
4653 
4654   // Contiguous load and replicate eight words.
4655   void ld1row(const ZRegister& zt,
4656               const PRegisterZ& pg,
4657               const SVEMemOperand& addr);
4658 
4659   // Contiguous load and replicate four doublewords.
4660   void ld1rod(const ZRegister& zt,
4661               const PRegisterZ& pg,
4662               const SVEMemOperand& addr);
4663 
4664   // Load and broadcast signed byte to vector.
4665   void ld1rsb(const ZRegister& zt,
4666               const PRegisterZ& pg,
4667               const SVEMemOperand& addr);
4668 
4669   // Load and broadcast signed halfword to vector.
4670   void ld1rsh(const ZRegister& zt,
4671               const PRegisterZ& pg,
4672               const SVEMemOperand& addr);
4673 
4674   // Load and broadcast signed word to vector.
4675   void ld1rsw(const ZRegister& zt,
4676               const PRegisterZ& pg,
4677               const SVEMemOperand& addr);
4678 
4679   // Contiguous/gather load signed bytes to vector.
4680   void ld1sb(const ZRegister& zt,
4681              const PRegisterZ& pg,
4682              const SVEMemOperand& addr);
4683 
4684   // Contiguous/gather load signed halfwords to vector.
4685   void ld1sh(const ZRegister& zt,
4686              const PRegisterZ& pg,
4687              const SVEMemOperand& addr);
4688 
4689   // Contiguous/gather load signed words to vector.
4690   void ld1sw(const ZRegister& zt,
4691              const PRegisterZ& pg,
4692              const SVEMemOperand& addr);
4693 
4694   // TODO: Merge other loads into the SVEMemOperand versions.
4695 
4696   // Contiguous load two-byte structures to two vectors.
4697   void ld2b(const ZRegister& zt1,
4698             const ZRegister& zt2,
4699             const PRegisterZ& pg,
4700             const SVEMemOperand& addr);
4701 
4702   // Contiguous load two-halfword structures to two vectors.
4703   void ld2h(const ZRegister& zt1,
4704             const ZRegister& zt2,
4705             const PRegisterZ& pg,
4706             const SVEMemOperand& addr);
4707 
4708   // Contiguous load two-word structures to two vectors.
4709   void ld2w(const ZRegister& zt1,
4710             const ZRegister& zt2,
4711             const PRegisterZ& pg,
4712             const SVEMemOperand& addr);
4713 
4714   // Contiguous load two-doubleword structures to two vectors.
4715   void ld2d(const ZRegister& zt1,
4716             const ZRegister& zt2,
4717             const PRegisterZ& pg,
4718             const SVEMemOperand& addr);
4719 
4720   // Contiguous load three-byte structures to three vectors.
4721   void ld3b(const ZRegister& zt1,
4722             const ZRegister& zt2,
4723             const ZRegister& zt3,
4724             const PRegisterZ& pg,
4725             const SVEMemOperand& addr);
4726 
4727   // Contiguous load three-halfword structures to three vectors.
4728   void ld3h(const ZRegister& zt1,
4729             const ZRegister& zt2,
4730             const ZRegister& zt3,
4731             const PRegisterZ& pg,
4732             const SVEMemOperand& addr);
4733 
4734   // Contiguous load three-word structures to three vectors.
4735   void ld3w(const ZRegister& zt1,
4736             const ZRegister& zt2,
4737             const ZRegister& zt3,
4738             const PRegisterZ& pg,
4739             const SVEMemOperand& addr);
4740 
4741   // Contiguous load three-doubleword structures to three vectors.
4742   void ld3d(const ZRegister& zt1,
4743             const ZRegister& zt2,
4744             const ZRegister& zt3,
4745             const PRegisterZ& pg,
4746             const SVEMemOperand& addr);
4747 
4748   // Contiguous load four-byte structures to four vectors.
4749   void ld4b(const ZRegister& zt1,
4750             const ZRegister& zt2,
4751             const ZRegister& zt3,
4752             const ZRegister& zt4,
4753             const PRegisterZ& pg,
4754             const SVEMemOperand& addr);
4755 
4756   // Contiguous load four-halfword structures to four vectors.
4757   void ld4h(const ZRegister& zt1,
4758             const ZRegister& zt2,
4759             const ZRegister& zt3,
4760             const ZRegister& zt4,
4761             const PRegisterZ& pg,
4762             const SVEMemOperand& addr);
4763 
4764   // Contiguous load four-word structures to four vectors.
4765   void ld4w(const ZRegister& zt1,
4766             const ZRegister& zt2,
4767             const ZRegister& zt3,
4768             const ZRegister& zt4,
4769             const PRegisterZ& pg,
4770             const SVEMemOperand& addr);
4771 
4772   // Contiguous load four-doubleword structures to four vectors.
4773   void ld4d(const ZRegister& zt1,
4774             const ZRegister& zt2,
4775             const ZRegister& zt3,
4776             const ZRegister& zt4,
4777             const PRegisterZ& pg,
4778             const SVEMemOperand& addr);
4779 
4780   // Contiguous load first-fault unsigned bytes to vector.
4781   void ldff1b(const ZRegister& zt,
4782               const PRegisterZ& pg,
4783               const SVEMemOperand& addr);
4784 
4785   // Contiguous load first-fault unsigned halfwords to vector.
4786   void ldff1h(const ZRegister& zt,
4787               const PRegisterZ& pg,
4788               const SVEMemOperand& addr);
4789 
4790   // Contiguous load first-fault unsigned words to vector.
4791   void ldff1w(const ZRegister& zt,
4792               const PRegisterZ& pg,
4793               const SVEMemOperand& addr);
4794 
4795   // Contiguous load first-fault doublewords to vector.
4796   void ldff1d(const ZRegister& zt,
4797               const PRegisterZ& pg,
4798               const SVEMemOperand& addr);
4799 
4800   // Contiguous load first-fault signed bytes to vector.
4801   void ldff1sb(const ZRegister& zt,
4802                const PRegisterZ& pg,
4803                const SVEMemOperand& addr);
4804 
4805   // Contiguous load first-fault signed halfwords to vector.
4806   void ldff1sh(const ZRegister& zt,
4807                const PRegisterZ& pg,
4808                const SVEMemOperand& addr);
4809 
4810   // Contiguous load first-fault signed words to vector.
4811   void ldff1sw(const ZRegister& zt,
4812                const PRegisterZ& pg,
4813                const SVEMemOperand& addr);
4814 
4815   // Gather load first-fault unsigned bytes to vector.
4816   void ldff1b(const ZRegister& zt,
4817               const PRegisterZ& pg,
4818               const Register& xn,
4819               const ZRegister& zm);
4820 
4821   // Gather load first-fault unsigned bytes to vector (immediate index).
4822   void ldff1b(const ZRegister& zt,
4823               const PRegisterZ& pg,
4824               const ZRegister& zn,
4825               int imm5);
4826 
4827   // Gather load first-fault doublewords to vector (vector index).
4828   void ldff1d(const ZRegister& zt,
4829               const PRegisterZ& pg,
4830               const Register& xn,
4831               const ZRegister& zm);
4832 
4833   // Gather load first-fault doublewords to vector (immediate index).
4834   void ldff1d(const ZRegister& zt,
4835               const PRegisterZ& pg,
4836               const ZRegister& zn,
4837               int imm5);
4838 
4839   // Gather load first-fault unsigned halfwords to vector (vector index).
4840   void ldff1h(const ZRegister& zt,
4841               const PRegisterZ& pg,
4842               const Register& xn,
4843               const ZRegister& zm);
4844 
4845   // Gather load first-fault unsigned halfwords to vector (immediate index).
4846   void ldff1h(const ZRegister& zt,
4847               const PRegisterZ& pg,
4848               const ZRegister& zn,
4849               int imm5);
4850 
4851   // Gather load first-fault signed bytes to vector (vector index).
4852   void ldff1sb(const ZRegister& zt,
4853                const PRegisterZ& pg,
4854                const Register& xn,
4855                const ZRegister& zm);
4856 
4857   // Gather load first-fault signed bytes to vector (immediate index).
4858   void ldff1sb(const ZRegister& zt,
4859                const PRegisterZ& pg,
4860                const ZRegister& zn,
4861                int imm5);
4862 
4863   // Gather load first-fault signed halfwords to vector (vector index).
4864   void ldff1sh(const ZRegister& zt,
4865                const PRegisterZ& pg,
4866                const Register& xn,
4867                const ZRegister& zm);
4868 
4869   // Gather load first-fault signed halfwords to vector (immediate index).
4870   void ldff1sh(const ZRegister& zt,
4871                const PRegisterZ& pg,
4872                const ZRegister& zn,
4873                int imm5);
4874 
4875   // Gather load first-fault signed words to vector (vector index).
4876   void ldff1sw(const ZRegister& zt,
4877                const PRegisterZ& pg,
4878                const Register& xn,
4879                const ZRegister& zm);
4880 
4881   // Gather load first-fault signed words to vector (immediate index).
4882   void ldff1sw(const ZRegister& zt,
4883                const PRegisterZ& pg,
4884                const ZRegister& zn,
4885                int imm5);
4886 
4887   // Gather load first-fault unsigned words to vector (vector index).
4888   void ldff1w(const ZRegister& zt,
4889               const PRegisterZ& pg,
4890               const Register& xn,
4891               const ZRegister& zm);
4892 
4893   // Gather load first-fault unsigned words to vector (immediate index).
4894   void ldff1w(const ZRegister& zt,
4895               const PRegisterZ& pg,
4896               const ZRegister& zn,
4897               int imm5);
4898 
4899   // Contiguous load non-fault unsigned bytes to vector (immediate index).
4900   void ldnf1b(const ZRegister& zt,
4901               const PRegisterZ& pg,
4902               const SVEMemOperand& addr);
4903 
4904   // Contiguous load non-fault doublewords to vector (immediate index).
4905   void ldnf1d(const ZRegister& zt,
4906               const PRegisterZ& pg,
4907               const SVEMemOperand& addr);
4908 
4909   // Contiguous load non-fault unsigned halfwords to vector (immediate
4910   // index).
4911   void ldnf1h(const ZRegister& zt,
4912               const PRegisterZ& pg,
4913               const SVEMemOperand& addr);
4914 
4915   // Contiguous load non-fault signed bytes to vector (immediate index).
4916   void ldnf1sb(const ZRegister& zt,
4917                const PRegisterZ& pg,
4918                const SVEMemOperand& addr);
4919 
4920   // Contiguous load non-fault signed halfwords to vector (immediate index).
4921   void ldnf1sh(const ZRegister& zt,
4922                const PRegisterZ& pg,
4923                const SVEMemOperand& addr);
4924 
4925   // Contiguous load non-fault signed words to vector (immediate index).
4926   void ldnf1sw(const ZRegister& zt,
4927                const PRegisterZ& pg,
4928                const SVEMemOperand& addr);
4929 
4930   // Contiguous load non-fault unsigned words to vector (immediate index).
4931   void ldnf1w(const ZRegister& zt,
4932               const PRegisterZ& pg,
4933               const SVEMemOperand& addr);
4934 
4935   // Contiguous load non-temporal bytes to vector.
4936   void ldnt1b(const ZRegister& zt,
4937               const PRegisterZ& pg,
4938               const SVEMemOperand& addr);
4939 
4940   // Contiguous load non-temporal halfwords to vector.
4941   void ldnt1h(const ZRegister& zt,
4942               const PRegisterZ& pg,
4943               const SVEMemOperand& addr);
4944 
4945   // Contiguous load non-temporal words to vector.
4946   void ldnt1w(const ZRegister& zt,
4947               const PRegisterZ& pg,
4948               const SVEMemOperand& addr);
4949 
4950   // Contiguous load non-temporal doublewords to vector.
4951   void ldnt1d(const ZRegister& zt,
4952               const PRegisterZ& pg,
4953               const SVEMemOperand& addr);
4954 
4955   // Load SVE predicate/vector register.
4956   void ldr(const CPURegister& rt, const SVEMemOperand& addr);
4957 
4958   // Logical shift left by immediate (predicated).
4959   void lsl(const ZRegister& zd,
4960            const PRegisterM& pg,
4961            const ZRegister& zn,
4962            int shift);
4963 
4964   // Logical shift left by 64-bit wide elements (predicated).
4965   void lsl(const ZRegister& zd,
4966            const PRegisterM& pg,
4967            const ZRegister& zn,
4968            const ZRegister& zm);
4969 
4970   // Logical shift left by immediate (unpredicated).
4971   void lsl(const ZRegister& zd, const ZRegister& zn, int shift);
4972 
4973   // Logical shift left by 64-bit wide elements (unpredicated).
4974   void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4975 
4976   // Reversed logical shift left by vector (predicated).
4977   void lslr(const ZRegister& zd,
4978             const PRegisterM& pg,
4979             const ZRegister& zn,
4980             const ZRegister& zm);
4981 
4982   // Logical shift right by immediate (predicated).
4983   void lsr(const ZRegister& zd,
4984            const PRegisterM& pg,
4985            const ZRegister& zn,
4986            int shift);
4987 
4988   // Logical shift right by 64-bit wide elements (predicated).
4989   void lsr(const ZRegister& zd,
4990            const PRegisterM& pg,
4991            const ZRegister& zn,
4992            const ZRegister& zm);
4993 
4994   // Logical shift right by immediate (unpredicated).
4995   void lsr(const ZRegister& zd, const ZRegister& zn, int shift);
4996 
4997   // Logical shift right by 64-bit wide elements (unpredicated).
4998   void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4999 
5000   // Reversed logical shift right by vector (predicated).
5001   void lsrr(const ZRegister& zd,
5002             const PRegisterM& pg,
5003             const ZRegister& zn,
5004             const ZRegister& zm);
5005 
5006   // Bitwise invert predicate.
5007   void not_(const PRegisterWithLaneSize& pd,
5008             const PRegisterZ& pg,
5009             const PRegisterWithLaneSize& pn);
5010 
5011   // Bitwise invert predicate, setting the condition flags.
5012   void nots(const PRegisterWithLaneSize& pd,
5013             const PRegisterZ& pg,
5014             const PRegisterWithLaneSize& pn);
5015 
5016   // Multiply-add vectors (predicated), writing multiplicand
5017   // [Zdn = Za + Zdn * Zm].
5018   void mad(const ZRegister& zdn,
5019            const PRegisterM& pg,
5020            const ZRegister& zm,
5021            const ZRegister& za);
5022 
5023   // Multiply-add vectors (predicated), writing addend
5024   // [Zda = Zda + Zn * Zm].
5025   void mla(const ZRegister& zda,
5026            const PRegisterM& pg,
5027            const ZRegister& zn,
5028            const ZRegister& zm);
5029 
5030   // Multiply-subtract vectors (predicated), writing addend
5031   // [Zda = Zda - Zn * Zm].
5032   void mls(const ZRegister& zda,
5033            const PRegisterM& pg,
5034            const ZRegister& zn,
5035            const ZRegister& zm);
5036 
5037   // Move predicates (unpredicated)
5038   void mov(const PRegister& pd, const PRegister& pn);
5039 
5040   // Move predicates (merging)
5041   void mov(const PRegisterWithLaneSize& pd,
5042            const PRegisterM& pg,
5043            const PRegisterWithLaneSize& pn);
5044 
5045   // Move predicates (zeroing)
5046   void mov(const PRegisterWithLaneSize& pd,
5047            const PRegisterZ& pg,
5048            const PRegisterWithLaneSize& pn);
5049 
5050   // Move general-purpose register to vector elements (unpredicated)
5051   void mov(const ZRegister& zd, const Register& xn);
5052 
5053   // Move SIMD&FP scalar register to vector elements (unpredicated)
5054   void mov(const ZRegister& zd, const VRegister& vn);
5055 
5056   // Move vector register (unpredicated)
5057   void mov(const ZRegister& zd, const ZRegister& zn);
5058 
5059   // Move indexed element to vector elements (unpredicated)
5060   void mov(const ZRegister& zd, const ZRegister& zn, unsigned index);
5061 
5062   // Move general-purpose register to vector elements (predicated)
5063   void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
5064 
5065   // Move SIMD&FP scalar register to vector elements (predicated)
5066   void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
5067 
5068   // Move vector elements (predicated)
5069   void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5070 
5071   // Move signed integer immediate to vector elements (predicated)
5072   void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
5073 
5074   // Move signed immediate to vector elements (unpredicated).
5075   void mov(const ZRegister& zd, int imm8, int shift);
5076 
5077   // Move logical bitmask immediate to vector (unpredicated).
5078   void mov(const ZRegister& zd, uint64_t imm);
5079 
5080   // Move predicate (unpredicated), setting the condition flags
5081   void movs(const PRegister& pd, const PRegister& pn);
5082 
5083   // Move predicates (zeroing), setting the condition flags
5084   void movs(const PRegisterWithLaneSize& pd,
5085             const PRegisterZ& pg,
5086             const PRegisterWithLaneSize& pn);
5087 
5088   // Move prefix (predicated).
5089   void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
5090 
5091   // Move prefix (unpredicated).
5092   void movprfx(const ZRegister& zd, const ZRegister& zn);
5093 
5094   // Multiply-subtract vectors (predicated), writing multiplicand
5095   // [Zdn = Za - Zdn * Zm].
5096   void msb(const ZRegister& zdn,
5097            const PRegisterM& pg,
5098            const ZRegister& zm,
5099            const ZRegister& za);
5100 
5101   // Multiply vectors (predicated).
5102   void mul(const ZRegister& zd,
5103            const PRegisterM& pg,
5104            const ZRegister& zn,
5105            const ZRegister& zm);
5106 
5107   // Multiply by immediate (unpredicated).
5108   void mul(const ZRegister& zd, const ZRegister& zn, int imm8);
5109 
5110   // Bitwise NAND predicates.
5111   void nand(const PRegisterWithLaneSize& pd,
5112             const PRegisterZ& pg,
5113             const PRegisterWithLaneSize& pn,
5114             const PRegisterWithLaneSize& pm);
5115 
5116   // Bitwise NAND predicates.
5117   void nands(const PRegisterWithLaneSize& pd,
5118              const PRegisterZ& pg,
5119              const PRegisterWithLaneSize& pn,
5120              const PRegisterWithLaneSize& pm);
5121 
5122   // Negate (predicated).
5123   void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5124 
5125   // Bitwise NOR predicates.
5126   void nor(const PRegisterWithLaneSize& pd,
5127            const PRegisterZ& pg,
5128            const PRegisterWithLaneSize& pn,
5129            const PRegisterWithLaneSize& pm);
5130 
5131   // Bitwise NOR predicates.
5132   void nors(const PRegisterWithLaneSize& pd,
5133             const PRegisterZ& pg,
5134             const PRegisterWithLaneSize& pn,
5135             const PRegisterWithLaneSize& pm);
5136 
5137   // Bitwise invert vector (predicated).
5138   void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5139 
5140   // Bitwise OR inverted predicate.
5141   void orn(const PRegisterWithLaneSize& pd,
5142            const PRegisterZ& pg,
5143            const PRegisterWithLaneSize& pn,
5144            const PRegisterWithLaneSize& pm);
5145 
5146   // Bitwise OR inverted predicate.
5147   void orns(const PRegisterWithLaneSize& pd,
5148             const PRegisterZ& pg,
5149             const PRegisterWithLaneSize& pn,
5150             const PRegisterWithLaneSize& pm);
5151 
5152   // Bitwise OR with inverted immediate (unpredicated).
5153   void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5154 
5155   // Bitwise OR predicate.
5156   void orr(const PRegisterWithLaneSize& pd,
5157            const PRegisterZ& pg,
5158            const PRegisterWithLaneSize& pn,
5159            const PRegisterWithLaneSize& pm);
5160 
5161   // Bitwise OR vectors (predicated).
5162   void orr(const ZRegister& zd,
5163            const PRegisterM& pg,
5164            const ZRegister& zn,
5165            const ZRegister& zm);
5166 
5167   // Bitwise OR with immediate (unpredicated).
5168   void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5169 
5170   // Bitwise OR vectors (unpredicated).
5171   void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5172 
5173   // Bitwise OR predicate.
5174   void orrs(const PRegisterWithLaneSize& pd,
5175             const PRegisterZ& pg,
5176             const PRegisterWithLaneSize& pn,
5177             const PRegisterWithLaneSize& pm);
5178 
5179   // Bitwise OR reduction to scalar.
5180   void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5181 
5182   // Set all predicate elements to false.
5183   void pfalse(const PRegisterWithLaneSize& pd);
5184 
5185   // Set the first active predicate element to true.
5186   void pfirst(const PRegisterWithLaneSize& pd,
5187               const PRegister& pg,
5188               const PRegisterWithLaneSize& pn);
5189 
5190   // Find next active predicate.
5191   void pnext(const PRegisterWithLaneSize& pd,
5192              const PRegister& pg,
5193              const PRegisterWithLaneSize& pn);
5194 
5195   // Prefetch bytes.
5196   void prfb(PrefetchOperation prfop,
5197             const PRegister& pg,
5198             const SVEMemOperand& addr);
5199 
5200   // Prefetch halfwords.
5201   void prfh(PrefetchOperation prfop,
5202             const PRegister& pg,
5203             const SVEMemOperand& addr);
5204 
5205   // Prefetch words.
5206   void prfw(PrefetchOperation prfop,
5207             const PRegister& pg,
5208             const SVEMemOperand& addr);
5209 
5210   // Prefetch doublewords.
5211   void prfd(PrefetchOperation prfop,
5212             const PRegister& pg,
5213             const SVEMemOperand& addr);
5214 
5215   // Set condition flags for predicate.
5216   void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn);
5217 
5218   // Initialise predicate from named constraint.
5219   void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5220 
5221   // Initialise predicate from named constraint.
5222   void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5223 
5224   // Unpack and widen half of predicate.
5225   void punpkhi(const PRegisterWithLaneSize& pd,
5226                const PRegisterWithLaneSize& pn);
5227 
5228   // Unpack and widen half of predicate.
5229   void punpklo(const PRegisterWithLaneSize& pd,
5230                const PRegisterWithLaneSize& pn);
5231 
5232   // Reverse bits (predicated).
5233   void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5234 
5235   // Read the first-fault register.
5236   void rdffr(const PRegisterWithLaneSize& pd);
5237 
5238   // Return predicate of succesfully loaded elements.
5239   void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5240 
5241   // Return predicate of succesfully loaded elements.
5242   void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5243 
5244   // Read multiple of vector register size to scalar register.
5245   void rdvl(const Register& xd, int imm6);
5246 
5247   // Reverse all elements in a predicate.
5248   void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn);
5249 
5250   // Reverse all elements in a vector (unpredicated).
5251   void rev(const ZRegister& zd, const ZRegister& zn);
5252 
5253   // Reverse bytes / halfwords / words within elements (predicated).
5254   void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5255 
5256   // Reverse bytes / halfwords / words within elements (predicated).
5257   void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5258 
5259   // Reverse bytes / halfwords / words within elements (predicated).
5260   void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5261 
5262   // Signed absolute difference (predicated).
5263   void sabd(const ZRegister& zd,
5264             const PRegisterM& pg,
5265             const ZRegister& zn,
5266             const ZRegister& zm);
5267 
5268   // Signed add reduction to scalar.
5269   void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5270 
5271   // Signed integer convert to floating-point (predicated).
5272   void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5273 
5274   // Signed divide (predicated).
5275   void sdiv(const ZRegister& zd,
5276             const PRegisterM& pg,
5277             const ZRegister& zn,
5278             const ZRegister& zm);
5279 
5280   // Signed reversed divide (predicated).
5281   void sdivr(const ZRegister& zd,
5282              const PRegisterM& pg,
5283              const ZRegister& zn,
5284              const ZRegister& zm);
5285 
5286   // Signed dot product by indexed quadtuplet.
5287   void sdot(const ZRegister& zda,
5288             const ZRegister& zn,
5289             const ZRegister& zm,
5290             int index);
5291 
5292   // Signed dot product.
5293   void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5294 
5295   // Conditionally select elements from two predicates.
5296   void sel(const PRegisterWithLaneSize& pd,
5297            const PRegister& pg,
5298            const PRegisterWithLaneSize& pn,
5299            const PRegisterWithLaneSize& pm);
5300 
5301   // Conditionally select elements from two vectors.
5302   void sel(const ZRegister& zd,
5303            const PRegister& pg,
5304            const ZRegister& zn,
5305            const ZRegister& zm);
5306 
5307   // Initialise the first-fault register to all true.
5308   void setffr();
5309 
5310   // Signed maximum vectors (predicated).
5311   void smax(const ZRegister& zd,
5312             const PRegisterM& pg,
5313             const ZRegister& zn,
5314             const ZRegister& zm);
5315 
5316   // Signed maximum with immediate (unpredicated).
5317   void smax(const ZRegister& zd, const ZRegister& zn, int imm8);
5318 
5319   // Signed maximum reduction to scalar.
5320   void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5321 
5322   // Signed minimum vectors (predicated).
5323   void smin(const ZRegister& zd,
5324             const PRegisterM& pg,
5325             const ZRegister& zn,
5326             const ZRegister& zm);
5327 
5328   // Signed minimum with immediate (unpredicated).
5329   void smin(const ZRegister& zd, const ZRegister& zn, int imm8);
5330 
5331   // Signed minimum reduction to scalar.
5332   void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5333 
5334   // Signed multiply returning high half (predicated).
5335   void smulh(const ZRegister& zd,
5336              const PRegisterM& pg,
5337              const ZRegister& zn,
5338              const ZRegister& zm);
5339 
5340   // Splice two vectors under predicate control.
5341   void splice(const ZRegister& zd,
5342               const PRegister& pg,
5343               const ZRegister& zn,
5344               const ZRegister& zm);
5345 
5346   // Splice two vectors under predicate control (constructive).
5347   void splice_con(const ZRegister& zd,
5348                   const PRegister& pg,
5349                   const ZRegister& zn,
5350                   const ZRegister& zm);
5351 
5352   // Signed saturating add vectors (unpredicated).
5353   void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5354 
5355   // Signed saturating add immediate (unpredicated).
5356   void sqadd(const ZRegister& zd,
5357              const ZRegister& zn,
5358              int imm8,
5359              int shift = -1);
5360 
5361   // Signed saturating decrement scalar by multiple of 8-bit predicate
5362   // constraint element count.
5363   void sqdecb(const Register& xd,
5364               const Register& wn,
5365               int pattern,
5366               int multiplier);
5367 
5368   // Signed saturating decrement scalar by multiple of 8-bit predicate
5369   // constraint element count.
5370   void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5371 
5372   // Signed saturating decrement scalar by multiple of 64-bit predicate
5373   // constraint element count.
5374   void sqdecd(const Register& xd,
5375               const Register& wn,
5376               int pattern = SVE_ALL,
5377               int multiplier = 1);
5378 
5379   // Signed saturating decrement scalar by multiple of 64-bit predicate
5380   // constraint element count.
5381   void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5382 
5383   // Signed saturating decrement vector by multiple of 64-bit predicate
5384   // constraint element count.
5385   void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5386 
5387   // Signed saturating decrement scalar by multiple of 16-bit predicate
5388   // constraint element count.
5389   void sqdech(const Register& xd,
5390               const Register& wn,
5391               int pattern = SVE_ALL,
5392               int multiplier = 1);
5393 
5394   // Signed saturating decrement scalar by multiple of 16-bit predicate
5395   // constraint element count.
5396   void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5397 
5398   // Signed saturating decrement vector by multiple of 16-bit predicate
5399   // constraint element count.
5400   void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5401 
5402   // Signed saturating decrement scalar by active predicate element count.
5403   void sqdecp(const Register& xd,
5404               const PRegisterWithLaneSize& pg,
5405               const Register& wn);
5406 
5407   // Signed saturating decrement scalar by active predicate element count.
5408   void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg);
5409 
5410   // Signed saturating decrement vector by active predicate element count.
5411   void sqdecp(const ZRegister& zdn, const PRegister& pg);
5412 
5413   // Signed saturating decrement scalar by multiple of 32-bit predicate
5414   // constraint element count.
5415   void sqdecw(const Register& xd,
5416               const Register& wn,
5417               int pattern = SVE_ALL,
5418               int multiplier = 1);
5419 
5420   // Signed saturating decrement scalar by multiple of 32-bit predicate
5421   // constraint element count.
5422   void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5423 
5424   // Signed saturating decrement vector by multiple of 32-bit predicate
5425   // constraint element count.
5426   void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5427 
5428   // Signed saturating increment scalar by multiple of 8-bit predicate
5429   // constraint element count.
5430   void sqincb(const Register& xd,
5431               const Register& wn,
5432               int pattern = SVE_ALL,
5433               int multiplier = 1);
5434 
5435   // Signed saturating increment scalar by multiple of 8-bit predicate
5436   // constraint element count.
5437   void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5438 
5439   // Signed saturating increment scalar by multiple of 64-bit predicate
5440   // constraint element count.
5441   void sqincd(const Register& xd,
5442               const Register& wn,
5443               int pattern,
5444               int multiplier);
5445 
5446   // Signed saturating increment scalar by multiple of 64-bit predicate
5447   // constraint element count.
5448   void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5449 
5450   // Signed saturating increment vector by multiple of 64-bit predicate
5451   // constraint element count.
5452   void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5453 
5454   // Signed saturating increment scalar by multiple of 16-bit predicate
5455   // constraint element count.
5456   void sqinch(const Register& xd,
5457               const Register& wn,
5458               int pattern = SVE_ALL,
5459               int multiplier = 1);
5460 
5461   // Signed saturating increment scalar by multiple of 16-bit predicate
5462   // constraint element count.
5463   void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5464 
5465   // Signed saturating increment vector by multiple of 16-bit predicate
5466   // constraint element count.
5467   void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5468 
5469   // Signed saturating increment scalar by active predicate element count.
5470   void sqincp(const Register& xd,
5471               const PRegisterWithLaneSize& pg,
5472               const Register& wn);
5473 
5474   // Signed saturating increment scalar by active predicate element count.
5475   void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg);
5476 
5477   // Signed saturating increment vector by active predicate element count.
5478   void sqincp(const ZRegister& zdn, const PRegister& pg);
5479 
5480   // Signed saturating increment scalar by multiple of 32-bit predicate
5481   // constraint element count.
5482   void sqincw(const Register& xd,
5483               const Register& wn,
5484               int pattern = SVE_ALL,
5485               int multiplier = 1);
5486 
5487   // Signed saturating increment scalar by multiple of 32-bit predicate
5488   // constraint element count.
5489   void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5490 
5491   // Signed saturating increment vector by multiple of 32-bit predicate
5492   // constraint element count.
5493   void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5494 
5495   // Signed saturating subtract vectors (unpredicated).
5496   void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5497 
5498   // Signed saturating subtract immediate (unpredicated).
5499   void sqsub(const ZRegister& zd,
5500              const ZRegister& zn,
5501              int imm8,
5502              int shift = -1);
5503 
5504   // Contiguous/scatter store bytes from vector.
5505   void st1b(const ZRegister& zt,
5506             const PRegister& pg,
5507             const SVEMemOperand& addr);
5508 
5509   // Contiguous/scatter store halfwords from vector.
5510   void st1h(const ZRegister& zt,
5511             const PRegister& pg,
5512             const SVEMemOperand& addr);
5513 
5514   // Contiguous/scatter store words from vector.
5515   void st1w(const ZRegister& zt,
5516             const PRegister& pg,
5517             const SVEMemOperand& addr);
5518 
5519   // Contiguous/scatter store doublewords from vector.
5520   void st1d(const ZRegister& zt,
5521             const PRegister& pg,
5522             const SVEMemOperand& addr);
5523 
5524   // Contiguous store two-byte structures from two vectors.
5525   void st2b(const ZRegister& zt1,
5526             const ZRegister& zt2,
5527             const PRegister& pg,
5528             const SVEMemOperand& addr);
5529 
5530   // Contiguous store two-halfword structures from two vectors.
5531   void st2h(const ZRegister& zt1,
5532             const ZRegister& zt2,
5533             const PRegister& pg,
5534             const SVEMemOperand& addr);
5535 
5536   // Contiguous store two-word structures from two vectors.
5537   void st2w(const ZRegister& zt1,
5538             const ZRegister& zt2,
5539             const PRegister& pg,
5540             const SVEMemOperand& addr);
5541 
5542   // Contiguous store two-doubleword structures from two vectors,
5543   void st2d(const ZRegister& zt1,
5544             const ZRegister& zt2,
5545             const PRegister& pg,
5546             const SVEMemOperand& addr);
5547 
5548   // Contiguous store three-byte structures from three vectors.
5549   void st3b(const ZRegister& zt1,
5550             const ZRegister& zt2,
5551             const ZRegister& zt3,
5552             const PRegister& pg,
5553             const SVEMemOperand& addr);
5554 
5555   // Contiguous store three-halfword structures from three vectors.
5556   void st3h(const ZRegister& zt1,
5557             const ZRegister& zt2,
5558             const ZRegister& zt3,
5559             const PRegister& pg,
5560             const SVEMemOperand& addr);
5561 
5562   // Contiguous store three-word structures from three vectors.
5563   void st3w(const ZRegister& zt1,
5564             const ZRegister& zt2,
5565             const ZRegister& zt3,
5566             const PRegister& pg,
5567             const SVEMemOperand& addr);
5568 
5569   // Contiguous store three-doubleword structures from three vectors.
5570   void st3d(const ZRegister& zt1,
5571             const ZRegister& zt2,
5572             const ZRegister& zt3,
5573             const PRegister& pg,
5574             const SVEMemOperand& addr);
5575 
5576   // Contiguous store four-byte structures from four vectors.
5577   void st4b(const ZRegister& zt1,
5578             const ZRegister& zt2,
5579             const ZRegister& zt3,
5580             const ZRegister& zt4,
5581             const PRegister& pg,
5582             const SVEMemOperand& addr);
5583 
5584   // Contiguous store four-halfword structures from four vectors.
5585   void st4h(const ZRegister& zt1,
5586             const ZRegister& zt2,
5587             const ZRegister& zt3,
5588             const ZRegister& zt4,
5589             const PRegister& pg,
5590             const SVEMemOperand& addr);
5591 
5592   // Contiguous store four-word structures from four vectors.
5593   void st4w(const ZRegister& zt1,
5594             const ZRegister& zt2,
5595             const ZRegister& zt3,
5596             const ZRegister& zt4,
5597             const PRegister& pg,
5598             const SVEMemOperand& addr);
5599 
5600   // Contiguous store four-doubleword structures from four vectors.
5601   void st4d(const ZRegister& zt1,
5602             const ZRegister& zt2,
5603             const ZRegister& zt3,
5604             const ZRegister& zt4,
5605             const PRegister& pg,
5606             const SVEMemOperand& addr);
5607 
5608   // Contiguous store non-temporal bytes from vector.
5609   void stnt1b(const ZRegister& zt,
5610               const PRegister& pg,
5611               const SVEMemOperand& addr);
5612 
5613   // Contiguous store non-temporal halfwords from vector.
5614   void stnt1h(const ZRegister& zt,
5615               const PRegister& pg,
5616               const SVEMemOperand& addr);
5617 
5618   // Contiguous store non-temporal words from vector.
5619   void stnt1w(const ZRegister& zt,
5620               const PRegister& pg,
5621               const SVEMemOperand& addr);
5622 
5623   // Contiguous store non-temporal doublewords from vector.
5624   void stnt1d(const ZRegister& zt,
5625               const PRegister& pg,
5626               const SVEMemOperand& addr);
5627 
5628   // Store SVE predicate/vector register.
5629   void str(const CPURegister& rt, const SVEMemOperand& addr);
5630 
5631   // Subtract vectors (predicated).
5632   void sub(const ZRegister& zd,
5633            const PRegisterM& pg,
5634            const ZRegister& zn,
5635            const ZRegister& zm);
5636 
5637   // Subtract vectors (unpredicated).
5638   void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5639 
5640   // Subtract immediate (unpredicated).
5641   void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5642 
5643   // Reversed subtract vectors (predicated).
5644   void subr(const ZRegister& zd,
5645             const PRegisterM& pg,
5646             const ZRegister& zn,
5647             const ZRegister& zm);
5648 
5649   // Reversed subtract from immediate (unpredicated).
5650   void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5651 
5652   // Signed unpack and extend half of vector.
5653   void sunpkhi(const ZRegister& zd, const ZRegister& zn);
5654 
5655   // Signed unpack and extend half of vector.
5656   void sunpklo(const ZRegister& zd, const ZRegister& zn);
5657 
5658   // Signed byte extend (predicated).
5659   void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5660 
5661   // Signed halfword extend (predicated).
5662   void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5663 
5664   // Signed word extend (predicated).
5665   void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5666 
5667   // Programmable table lookup/permute using vector of indices into a
5668   // vector.
5669   void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5670 
5671   // Interleave even or odd elements from two predicates.
5672   void trn1(const PRegisterWithLaneSize& pd,
5673             const PRegisterWithLaneSize& pn,
5674             const PRegisterWithLaneSize& pm);
5675 
5676   // Interleave even or odd elements from two vectors.
5677   void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5678 
5679   // Interleave even or odd elements from two predicates.
5680   void trn2(const PRegisterWithLaneSize& pd,
5681             const PRegisterWithLaneSize& pn,
5682             const PRegisterWithLaneSize& pm);
5683 
5684   // Interleave even or odd elements from two vectors.
5685   void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5686 
5687   // Unsigned absolute difference (predicated).
5688   void uabd(const ZRegister& zd,
5689             const PRegisterM& pg,
5690             const ZRegister& zn,
5691             const ZRegister& zm);
5692 
5693   // Unsigned add reduction to scalar.
5694   void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5695 
5696   // Unsigned integer convert to floating-point (predicated).
5697   void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5698 
5699   // Unsigned divide (predicated).
5700   void udiv(const ZRegister& zd,
5701             const PRegisterM& pg,
5702             const ZRegister& zn,
5703             const ZRegister& zm);
5704 
5705   // Unsigned reversed divide (predicated).
5706   void udivr(const ZRegister& zd,
5707              const PRegisterM& pg,
5708              const ZRegister& zn,
5709              const ZRegister& zm);
5710 
5711   // Unsigned dot product by indexed quadtuplet.
5712   void udot(const ZRegister& zda,
5713             const ZRegister& zn,
5714             const ZRegister& zm,
5715             int index);
5716 
5717   // Unsigned dot product.
5718   void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5719 
5720   // Unsigned maximum vectors (predicated).
5721   void umax(const ZRegister& zd,
5722             const PRegisterM& pg,
5723             const ZRegister& zn,
5724             const ZRegister& zm);
5725 
5726   // Unsigned maximum with immediate (unpredicated).
5727   void umax(const ZRegister& zd, const ZRegister& zn, int imm8);
5728 
5729   // Unsigned maximum reduction to scalar.
5730   void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5731 
5732   // Unsigned minimum vectors (predicated).
5733   void umin(const ZRegister& zd,
5734             const PRegisterM& pg,
5735             const ZRegister& zn,
5736             const ZRegister& zm);
5737 
5738   // Unsigned minimum with immediate (unpredicated).
5739   void umin(const ZRegister& zd, const ZRegister& zn, int imm8);
5740 
5741   // Unsigned minimum reduction to scalar.
5742   void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5743 
5744   // Unsigned multiply returning high half (predicated).
5745   void umulh(const ZRegister& zd,
5746              const PRegisterM& pg,
5747              const ZRegister& zn,
5748              const ZRegister& zm);
5749 
5750   // Unsigned saturating add vectors (unpredicated).
5751   void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5752 
5753   // Unsigned saturating add immediate (unpredicated).
5754   void uqadd(const ZRegister& zd,
5755              const ZRegister& zn,
5756              int imm8,
5757              int shift = -1);
5758 
5759   // Unsigned saturating decrement scalar by multiple of 8-bit predicate
5760   // constraint element count.
5761   void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5762 
5763   // Unsigned saturating decrement scalar by multiple of 64-bit predicate
5764   // constraint element count.
5765   void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5766 
5767   // Unsigned saturating decrement vector by multiple of 64-bit predicate
5768   // constraint element count.
5769   void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5770 
5771   // Unsigned saturating decrement scalar by multiple of 16-bit predicate
5772   // constraint element count.
5773   void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5774 
5775   // Unsigned saturating decrement vector by multiple of 16-bit predicate
5776   // constraint element count.
5777   void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5778 
5779   // Unsigned saturating decrement scalar by active predicate element count.
5780   void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg);
5781 
5782   // Unsigned saturating decrement vector by active predicate element count.
5783   void uqdecp(const ZRegister& zdn, const PRegister& pg);
5784 
5785   // Unsigned saturating decrement scalar by multiple of 32-bit predicate
5786   // constraint element count.
5787   void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5788 
5789   // Unsigned saturating decrement vector by multiple of 32-bit predicate
5790   // constraint element count.
5791   void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5792 
5793   // Unsigned saturating increment scalar by multiple of 8-bit predicate
5794   // constraint element count.
5795   void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5796 
5797   // Unsigned saturating increment scalar by multiple of 64-bit predicate
5798   // constraint element count.
5799   void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5800 
5801   // Unsigned saturating increment vector by multiple of 64-bit predicate
5802   // constraint element count.
5803   void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5804 
5805   // Unsigned saturating increment scalar by multiple of 16-bit predicate
5806   // constraint element count.
5807   void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5808 
5809   // Unsigned saturating increment vector by multiple of 16-bit predicate
5810   // constraint element count.
5811   void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5812 
5813   // Unsigned saturating increment scalar by active predicate element count.
5814   void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg);
5815 
5816   // Unsigned saturating increment vector by active predicate element count.
5817   void uqincp(const ZRegister& zdn, const PRegister& pg);
5818 
5819   // Unsigned saturating increment scalar by multiple of 32-bit predicate
5820   // constraint element count.
5821   void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5822 
5823   // Unsigned saturating increment vector by multiple of 32-bit predicate
5824   // constraint element count.
5825   void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5826 
5827   // Unsigned saturating subtract vectors (unpredicated).
5828   void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5829 
5830   // Unsigned saturating subtract immediate (unpredicated).
5831   void uqsub(const ZRegister& zd,
5832              const ZRegister& zn,
5833              int imm8,
5834              int shift = -1);
5835 
5836   // Unsigned unpack and extend half of vector.
5837   void uunpkhi(const ZRegister& zd, const ZRegister& zn);
5838 
5839   // Unsigned unpack and extend half of vector.
5840   void uunpklo(const ZRegister& zd, const ZRegister& zn);
5841 
5842   // Unsigned byte extend (predicated).
5843   void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5844 
5845   // Unsigned halfword extend (predicated).
5846   void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5847 
5848   // Unsigned word extend (predicated).
5849   void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5850 
5851   // Concatenate even or odd elements from two predicates.
5852   void uzp1(const PRegisterWithLaneSize& pd,
5853             const PRegisterWithLaneSize& pn,
5854             const PRegisterWithLaneSize& pm);
5855 
5856   // Concatenate even or odd elements from two vectors.
5857   void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5858 
5859   // Concatenate even or odd elements from two predicates.
5860   void uzp2(const PRegisterWithLaneSize& pd,
5861             const PRegisterWithLaneSize& pn,
5862             const PRegisterWithLaneSize& pm);
5863 
5864   // Concatenate even or odd elements from two vectors.
5865   void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5866 
5867   // While incrementing signed scalar less than or equal to scalar.
5868   void whilele(const PRegisterWithLaneSize& pd,
5869                const Register& rn,
5870                const Register& rm);
5871 
5872   // While incrementing unsigned scalar lower than scalar.
5873   void whilelo(const PRegisterWithLaneSize& pd,
5874                const Register& rn,
5875                const Register& rm);
5876 
5877   // While incrementing unsigned scalar lower or same as scalar.
5878   void whilels(const PRegisterWithLaneSize& pd,
5879                const Register& rn,
5880                const Register& rm);
5881 
5882   // While incrementing signed scalar less than scalar.
5883   void whilelt(const PRegisterWithLaneSize& pd,
5884                const Register& rn,
5885                const Register& rm);
5886 
5887   // Write the first-fault register.
5888   void wrffr(const PRegisterWithLaneSize& pn);
5889 
5890   // Interleave elements from two half predicates.
5891   void zip1(const PRegisterWithLaneSize& pd,
5892             const PRegisterWithLaneSize& pn,
5893             const PRegisterWithLaneSize& pm);
5894 
5895   // Interleave elements from two half vectors.
5896   void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5897 
5898   // Interleave elements from two half predicates.
5899   void zip2(const PRegisterWithLaneSize& pd,
5900             const PRegisterWithLaneSize& pn,
5901             const PRegisterWithLaneSize& pm);
5902 
5903   // Interleave elements from two half vectors.
5904   void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5905 
5906   // Add with carry long (bottom).
5907   void adclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5908 
5909   // Add with carry long (top).
5910   void adclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5911 
5912   // Add narrow high part (bottom).
5913   void addhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5914 
5915   // Add narrow high part (top).
5916   void addhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5917 
5918   // Add pairwise.
5919   void addp(const ZRegister& zd,
5920             const PRegisterM& pg,
5921             const ZRegister& zn,
5922             const ZRegister& zm);
5923 
5924   // Bitwise clear and exclusive OR.
5925   void bcax(const ZRegister& zd,
5926             const ZRegister& zn,
5927             const ZRegister& zm,
5928             const ZRegister& zk);
5929 
5930   // Scatter lower bits into positions selected by bitmask.
5931   void bdep(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5932 
5933   // Gather lower bits from positions selected by bitmask.
5934   void bext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5935 
5936   // Group bits to right or left as selected by bitmask.
5937   void bgrp(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5938 
5939   // Bitwise select.
5940   void bsl(const ZRegister& zd,
5941            const ZRegister& zn,
5942            const ZRegister& zm,
5943            const ZRegister& zk);
5944 
5945   // Bitwise select with first input inverted.
5946   void bsl1n(const ZRegister& zd,
5947              const ZRegister& zn,
5948              const ZRegister& zm,
5949              const ZRegister& zk);
5950 
5951   // Bitwise select with second input inverted.
5952   void bsl2n(const ZRegister& zd,
5953              const ZRegister& zn,
5954              const ZRegister& zm,
5955              const ZRegister& zk);
5956 
5957   // Complex integer add with rotate.
5958   void cadd(const ZRegister& zd,
5959             const ZRegister& zn,
5960             const ZRegister& zm,
5961             int rot);
5962 
5963   // Complex integer dot product (indexed).
5964   void cdot(const ZRegister& zda,
5965             const ZRegister& zn,
5966             const ZRegister& zm,
5967             int index,
5968             int rot);
5969 
5970   // Complex integer dot product.
5971   void cdot(const ZRegister& zda,
5972             const ZRegister& zn,
5973             const ZRegister& zm,
5974             int rot);
5975 
5976   // Complex integer multiply-add with rotate (indexed).
5977   void cmla(const ZRegister& zda,
5978             const ZRegister& zn,
5979             const ZRegister& zm,
5980             int index,
5981             int rot);
5982 
5983   // Complex integer multiply-add with rotate.
5984   void cmla(const ZRegister& zda,
5985             const ZRegister& zn,
5986             const ZRegister& zm,
5987             int rot);
5988 
5989   // Bitwise exclusive OR of three vectors.
5990   void eor3(const ZRegister& zd,
5991             const ZRegister& zn,
5992             const ZRegister& zm,
5993             const ZRegister& zk);
5994 
5995   // Interleaving exclusive OR (bottom, top).
5996   void eorbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5997 
5998   // Interleaving exclusive OR (top, bottom).
5999   void eortb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6000 
6001   // Floating-point add pairwise.
6002   void faddp(const ZRegister& zd,
6003              const PRegisterM& pg,
6004              const ZRegister& zn,
6005              const ZRegister& zm);
6006 
6007   // Floating-point up convert long (top, predicated).
6008   void fcvtlt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6009 
6010   // Floating-point down convert and narrow (top, predicated).
6011   void fcvtnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6012 
6013   // Floating-point down convert, rounding to odd (predicated).
6014   void fcvtx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6015 
6016   // Floating-point down convert, rounding to odd (top, predicated).
6017   void fcvtxnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6018 
6019   // Floating-point base 2 logarithm as integer.
6020   void flogb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6021 
6022   // Floating-point maximum number pairwise.
6023   void fmaxnmp(const ZRegister& zd,
6024                const PRegisterM& pg,
6025                const ZRegister& zn,
6026                const ZRegister& zm);
6027 
6028   // Floating-point maximum pairwise.
6029   void fmaxp(const ZRegister& zd,
6030              const PRegisterM& pg,
6031              const ZRegister& zn,
6032              const ZRegister& zm);
6033 
6034   // Floating-point minimum number pairwise.
6035   void fminnmp(const ZRegister& zd,
6036                const PRegisterM& pg,
6037                const ZRegister& zn,
6038                const ZRegister& zm);
6039 
6040   // Floating-point minimum pairwise.
6041   void fminp(const ZRegister& zd,
6042              const PRegisterM& pg,
6043              const ZRegister& zn,
6044              const ZRegister& zm);
6045 
6046   // Half-precision floating-point multiply-add long to single-precision
6047   // (bottom).
6048   void fmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6049 
6050   // Half-precision floating-point multiply-add long to single-precision
6051   // (top).
6052   void fmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6053 
6054   // Half-precision floating-point multiply-subtract long from
6055   // single-precision (bottom).
6056   void fmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6057 
6058   // Half-precision floating-point multiply-subtract long from
6059   // single-precision (top, indexed).
6060   void fmlslt(const ZRegister& zda,
6061               const ZRegister& zn,
6062               const ZRegister& zm,
6063               int index);
6064 
6065   // Half-precision floating-point multiply-add long to single-precision
6066   // (bottom, indexed).
6067   void fmlalb(const ZRegister& zda,
6068               const ZRegister& zn,
6069               const ZRegister& zm,
6070               int index);
6071 
6072   // Half-precision floating-point multiply-add long to single-precision
6073   // (top, indexed).
6074   void fmlalt(const ZRegister& zda,
6075               const ZRegister& zn,
6076               const ZRegister& zm,
6077               int index);
6078 
6079   // Half-precision floating-point multiply-subtract long from
6080   // single-precision (bottom, indexed).
6081   void fmlslb(const ZRegister& zda,
6082               const ZRegister& zn,
6083               const ZRegister& zm,
6084               int index);
6085 
6086   // Half-precision floating-point multiply-subtract long from
6087   // single-precision (top).
6088   void fmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6089 
6090   // Count matching elements in vector.
6091   void histcnt(const ZRegister& zd,
6092                const PRegisterZ& pg,
6093                const ZRegister& zn,
6094                const ZRegister& zm);
6095 
6096   // Count matching elements in vector segments.
6097   void histseg(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6098 
6099   // Gather load non-temporal signed bytes.
6100   void ldnt1sb(const ZRegister& zt,
6101                const PRegisterZ& pg,
6102                const SVEMemOperand& addr);
6103 
6104   // Gather load non-temporal signed halfwords.
6105   void ldnt1sh(const ZRegister& zt,
6106                const PRegisterZ& pg,
6107                const SVEMemOperand& addr);
6108 
6109   // Gather load non-temporal signed words.
6110   void ldnt1sw(const ZRegister& zt,
6111                const PRegisterZ& pg,
6112                const SVEMemOperand& addr);
6113 
6114   // Detect any matching elements, setting the condition flags.
6115   void match(const PRegisterWithLaneSize& pd,
6116              const PRegisterZ& pg,
6117              const ZRegister& zn,
6118              const ZRegister& zm);
6119 
6120   // Multiply-add to accumulator (indexed).
6121   void mla(const ZRegister& zda,
6122            const ZRegister& zn,
6123            const ZRegister& zm,
6124            int index);
6125 
6126   // Multiply-subtract from accumulator (indexed).
6127   void mls(const ZRegister& zda,
6128            const ZRegister& zn,
6129            const ZRegister& zm,
6130            int index);
6131 
6132   // Multiply (indexed).
6133   void mul(const ZRegister& zd,
6134            const ZRegister& zn,
6135            const ZRegister& zm,
6136            int index);
6137 
6138   // Multiply vectors (unpredicated).
6139   void mul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6140 
6141   // Bitwise inverted select.
6142   void nbsl(const ZRegister& zd,
6143             const ZRegister& zn,
6144             const ZRegister& zm,
6145             const ZRegister& zk);
6146 
6147   // Detect no matching elements, setting the condition flags.
6148   void nmatch(const PRegisterWithLaneSize& pd,
6149               const PRegisterZ& pg,
6150               const ZRegister& zn,
6151               const ZRegister& zm);
6152 
6153   // Polynomial multiply vectors (unpredicated).
6154   void pmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6155 
6156   // Polynomial multiply long (bottom).
6157   void pmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6158 
6159   // Polynomial multiply long (top).
6160   void pmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6161 
6162   // Rounding add narrow high part (bottom).
6163   void raddhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6164 
6165   // Rounding add narrow high part (top).
6166   void raddhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6167 
6168   // Rounding shift right narrow by immediate (bottom).
6169   void rshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6170 
6171   // Rounding shift right narrow by immediate (top).
6172   void rshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6173 
6174   // Rounding subtract narrow high part (bottom).
6175   void rsubhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6176 
6177   // Rounding subtract narrow high part (top).
6178   void rsubhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6179 
6180   // Signed absolute difference and accumulate.
6181   void saba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6182 
6183   // Signed absolute difference and accumulate long (bottom).
6184   void sabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6185 
6186   // Signed absolute difference and accumulate long (top).
6187   void sabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6188 
6189   // Signed absolute difference long (bottom).
6190   void sabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6191 
6192   // Signed absolute difference long (top).
6193   void sabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6194 
6195   // Signed add and accumulate long pairwise.
6196   void sadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
6197 
6198   // Signed add long (bottom).
6199   void saddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6200 
6201   // Signed add long (bottom + top).
6202   void saddlbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6203 
6204   // Signed add long (top).
6205   void saddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6206 
6207   // Signed add wide (bottom).
6208   void saddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6209 
6210   // Signed add wide (top).
6211   void saddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6212 
6213   // Subtract with carry long (bottom).
6214   void sbclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6215 
6216   // Subtract with carry long (top).
6217   void sbclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6218 
6219   // Signed halving addition.
6220   void shadd(const ZRegister& zd,
6221              const PRegisterM& pg,
6222              const ZRegister& zn,
6223              const ZRegister& zm);
6224 
6225   // Shift right narrow by immediate (bottom).
6226   void shrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6227 
6228   // Shift right narrow by immediate (top).
6229   void shrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6230 
6231   // Signed halving subtract.
6232   void shsub(const ZRegister& zd,
6233              const PRegisterM& pg,
6234              const ZRegister& zn,
6235              const ZRegister& zm);
6236 
6237   // Signed halving subtract reversed vectors.
6238   void shsubr(const ZRegister& zd,
6239               const PRegisterM& pg,
6240               const ZRegister& zn,
6241               const ZRegister& zm);
6242 
6243   // Shift left and insert (immediate).
6244   void sli(const ZRegister& zd, const ZRegister& zn, int shift);
6245 
6246   // Signed maximum pairwise.
6247   void smaxp(const ZRegister& zd,
6248              const PRegisterM& pg,
6249              const ZRegister& zn,
6250              const ZRegister& zm);
6251 
6252   // Signed minimum pairwise.
6253   void sminp(const ZRegister& zd,
6254              const PRegisterM& pg,
6255              const ZRegister& zn,
6256              const ZRegister& zm);
6257 
6258   // Signed multiply-add long to accumulator (bottom, indexed).
6259   void smlalb(const ZRegister& zda,
6260               const ZRegister& zn,
6261               const ZRegister& zm,
6262               int index);
6263 
6264   // Signed multiply-add long to accumulator (bottom).
6265   void smlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6266 
6267   // Signed multiply-add long to accumulator (top, indexed).
6268   void smlalt(const ZRegister& zda,
6269               const ZRegister& zn,
6270               const ZRegister& zm,
6271               int index);
6272 
6273   // Signed multiply-add long to accumulator (top).
6274   void smlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6275 
6276   // Signed multiply-subtract long from accumulator (bottom, indexed).
6277   void smlslb(const ZRegister& zda,
6278               const ZRegister& zn,
6279               const ZRegister& zm,
6280               int index);
6281 
6282   // Signed multiply-subtract long from accumulator (bottom).
6283   void smlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6284 
6285   // Signed multiply-subtract long from accumulator (top, indexed).
6286   void smlslt(const ZRegister& zda,
6287               const ZRegister& zn,
6288               const ZRegister& zm,
6289               int index);
6290 
6291   // Signed multiply-subtract long from accumulator (top).
6292   void smlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6293 
6294   // Signed multiply returning high half (unpredicated).
6295   void smulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6296 
6297   // Signed multiply long (bottom, indexed).
6298   void smullb(const ZRegister& zd,
6299               const ZRegister& zn,
6300               const ZRegister& zm,
6301               int index);
6302 
6303   // Signed multiply long (bottom).
6304   void smullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6305 
6306   // Signed multiply long (top, indexed).
6307   void smullt(const ZRegister& zd,
6308               const ZRegister& zn,
6309               const ZRegister& zm,
6310               int index);
6311 
6312   // Signed multiply long (top).
6313   void smullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6314 
6315   // Signed saturating absolute value.
6316   void sqabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6317 
6318   // Signed saturating addition (predicated).
6319   void sqadd(const ZRegister& zd,
6320              const PRegisterM& pg,
6321              const ZRegister& zn,
6322              const ZRegister& zm);
6323 
6324   // Saturating complex integer add with rotate.
6325   void sqcadd(const ZRegister& zd,
6326               const ZRegister& zn,
6327               const ZRegister& zm,
6328               int rot);
6329 
6330   // Signed saturating doubling multiply-add long to accumulator (bottom,
6331   // indexed).
6332   void sqdmlalb(const ZRegister& zda,
6333                 const ZRegister& zn,
6334                 const ZRegister& zm,
6335                 int index);
6336 
6337   // Signed saturating doubling multiply-add long to accumulator (bottom).
6338   void sqdmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6339 
6340   // Signed saturating doubling multiply-add long to accumulator (bottom x
6341   // top).
6342   void sqdmlalbt(const ZRegister& zda,
6343                  const ZRegister& zn,
6344                  const ZRegister& zm);
6345 
6346   // Signed saturating doubling multiply-add long to accumulator (top,
6347   // indexed).
6348   void sqdmlalt(const ZRegister& zda,
6349                 const ZRegister& zn,
6350                 const ZRegister& zm,
6351                 int index);
6352 
6353   // Signed saturating doubling multiply-add long to accumulator (top).
6354   void sqdmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6355 
6356   // Signed saturating doubling multiply-subtract long from accumulator
6357   // (bottom, indexed).
6358   void sqdmlslb(const ZRegister& zda,
6359                 const ZRegister& zn,
6360                 const ZRegister& zm,
6361                 int index);
6362 
6363   // Signed saturating doubling multiply-subtract long from accumulator
6364   // (bottom).
6365   void sqdmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6366 
6367   // Signed saturating doubling multiply-subtract long from accumulator
6368   // (bottom x top).
6369   void sqdmlslbt(const ZRegister& zda,
6370                  const ZRegister& zn,
6371                  const ZRegister& zm);
6372 
6373   // Signed saturating doubling multiply-subtract long from accumulator
6374   // (top, indexed).
6375   void sqdmlslt(const ZRegister& zda,
6376                 const ZRegister& zn,
6377                 const ZRegister& zm,
6378                 int index);
6379 
6380   // Signed saturating doubling multiply-subtract long from accumulator
6381   // (top).
6382   void sqdmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6383 
6384   // Signed saturating doubling multiply high (indexed).
6385   void sqdmulh(const ZRegister& zd,
6386                const ZRegister& zn,
6387                const ZRegister& zm,
6388                int index);
6389 
6390   // Signed saturating doubling multiply high (unpredicated).
6391   void sqdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6392 
6393   // Signed saturating doubling multiply long (bottom, indexed).
6394   void sqdmullb(const ZRegister& zd,
6395                 const ZRegister& zn,
6396                 const ZRegister& zm,
6397                 int index);
6398 
6399   // Signed saturating doubling multiply long (bottom).
6400   void sqdmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6401 
6402   // Signed saturating doubling multiply long (top, indexed).
6403   void sqdmullt(const ZRegister& zd,
6404                 const ZRegister& zn,
6405                 const ZRegister& zm,
6406                 int index);
6407 
6408   // Signed saturating doubling multiply long (top).
6409   void sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6410 
6411   // Signed saturating negate.
6412   void sqneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6413 
6414   // Saturating rounding doubling complex integer multiply-add high with
6415   // rotate (indexed).
6416   void sqrdcmlah(const ZRegister& zda,
6417                  const ZRegister& zn,
6418                  const ZRegister& zm,
6419                  int index,
6420                  int rot);
6421 
6422   // Saturating rounding doubling complex integer multiply-add high with
6423   // rotate.
6424   void sqrdcmlah(const ZRegister& zda,
6425                  const ZRegister& zn,
6426                  const ZRegister& zm,
6427                  int rot);
6428 
6429   // Signed saturating rounding doubling multiply-add high to accumulator
6430   // (indexed).
6431   void sqrdmlah(const ZRegister& zda,
6432                 const ZRegister& zn,
6433                 const ZRegister& zm,
6434                 int index);
6435 
6436   // Signed saturating rounding doubling multiply-add high to accumulator
6437   // (unpredicated).
6438   void sqrdmlah(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6439 
6440   // Signed saturating rounding doubling multiply-subtract high from
6441   // accumulator (indexed).
6442   void sqrdmlsh(const ZRegister& zda,
6443                 const ZRegister& zn,
6444                 const ZRegister& zm,
6445                 int index);
6446 
6447   // Signed saturating rounding doubling multiply-subtract high from
6448   // accumulator (unpredicated).
6449   void sqrdmlsh(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6450 
6451   // Signed saturating rounding doubling multiply high (indexed).
6452   void sqrdmulh(const ZRegister& zd,
6453                 const ZRegister& zn,
6454                 const ZRegister& zm,
6455                 int index);
6456 
6457   // Signed saturating rounding doubling multiply high (unpredicated).
6458   void sqrdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6459 
6460   // Signed saturating rounding shift left by vector (predicated).
6461   void sqrshl(const ZRegister& zd,
6462               const PRegisterM& pg,
6463               const ZRegister& zn,
6464               const ZRegister& zm);
6465 
6466   // Signed saturating rounding shift left reversed vectors (predicated).
6467   void sqrshlr(const ZRegister& zd,
6468                const PRegisterM& pg,
6469                const ZRegister& zn,
6470                const ZRegister& zm);
6471 
6472   // Signed saturating rounding shift right narrow by immediate (bottom).
6473   void sqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6474 
6475   // Signed saturating rounding shift right narrow by immediate (top).
6476   void sqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6477 
6478   // Signed saturating rounding shift right unsigned narrow by immediate
6479   // (bottom).
6480   void sqrshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
6481 
6482   // Signed saturating rounding shift right unsigned narrow by immediate
6483   // (top).
6484   void sqrshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
6485 
6486   // Signed saturating shift left by immediate.
6487   void sqshl(const ZRegister& zd,
6488              const PRegisterM& pg,
6489              const ZRegister& zn,
6490              int shift);
6491 
6492   // Signed saturating shift left by vector (predicated).
6493   void sqshl(const ZRegister& zd,
6494              const PRegisterM& pg,
6495              const ZRegister& zn,
6496              const ZRegister& zm);
6497 
6498   // Signed saturating shift left reversed vectors (predicated).
6499   void sqshlr(const ZRegister& zd,
6500               const PRegisterM& pg,
6501               const ZRegister& zn,
6502               const ZRegister& zm);
6503 
6504   // Signed saturating shift left unsigned by immediate.
6505   void sqshlu(const ZRegister& zd,
6506               const PRegisterM& pg,
6507               const ZRegister& zn,
6508               int shift);
6509 
6510   // Signed saturating shift right narrow by immediate (bottom).
6511   void sqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6512 
6513   // Signed saturating shift right narrow by immediate (top).
6514   void sqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6515 
6516   // Signed saturating shift right unsigned narrow by immediate (bottom).
6517   void sqshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
6518 
6519   // Signed saturating shift right unsigned narrow by immediate (top).
6520   void sqshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
6521 
6522   // Signed saturating subtraction (predicated).
6523   void sqsub(const ZRegister& zd,
6524              const PRegisterM& pg,
6525              const ZRegister& zn,
6526              const ZRegister& zm);
6527 
6528   // Signed saturating subtraction reversed vectors (predicated).
6529   void sqsubr(const ZRegister& zd,
6530               const PRegisterM& pg,
6531               const ZRegister& zn,
6532               const ZRegister& zm);
6533 
6534   // Signed saturating extract narrow (bottom).
6535   void sqxtnb(const ZRegister& zd, const ZRegister& zn);
6536 
6537   // Signed saturating extract narrow (top).
6538   void sqxtnt(const ZRegister& zd, const ZRegister& zn);
6539 
6540   // Signed saturating unsigned extract narrow (bottom).
6541   void sqxtunb(const ZRegister& zd, const ZRegister& zn);
6542 
6543   // Signed saturating unsigned extract narrow (top).
6544   void sqxtunt(const ZRegister& zd, const ZRegister& zn);
6545 
6546   // Signed rounding halving addition.
6547   void srhadd(const ZRegister& zd,
6548               const PRegisterM& pg,
6549               const ZRegister& zn,
6550               const ZRegister& zm);
6551 
6552   // Shift right and insert (immediate).
6553   void sri(const ZRegister& zd, const ZRegister& zn, int shift);
6554 
6555   // Signed rounding shift left by vector (predicated).
6556   void srshl(const ZRegister& zd,
6557              const PRegisterM& pg,
6558              const ZRegister& zn,
6559              const ZRegister& zm);
6560 
6561   // Signed rounding shift left reversed vectors (predicated).
6562   void srshlr(const ZRegister& zd,
6563               const PRegisterM& pg,
6564               const ZRegister& zn,
6565               const ZRegister& zm);
6566 
6567   // Signed rounding shift right by immediate.
6568   void srshr(const ZRegister& zd,
6569              const PRegisterM& pg,
6570              const ZRegister& zn,
6571              int shift);
6572 
6573   // Signed rounding shift right and accumulate (immediate).
6574   void srsra(const ZRegister& zda, const ZRegister& zn, int shift);
6575 
6576   // Signed shift left long by immediate (bottom).
6577   void sshllb(const ZRegister& zd, const ZRegister& zn, int shift);
6578 
6579   // Signed shift left long by immediate (top).
6580   void sshllt(const ZRegister& zd, const ZRegister& zn, int shift);
6581 
6582   // Signed shift right and accumulate (immediate).
6583   void ssra(const ZRegister& zda, const ZRegister& zn, int shift);
6584 
6585   // Signed subtract long (bottom).
6586   void ssublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6587 
6588   // Signed subtract long (bottom - top).
6589   void ssublbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6590 
6591   // Signed subtract long (top).
6592   void ssublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6593 
6594   // Signed subtract long (top - bottom).
6595   void ssubltb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6596 
6597   // Signed subtract wide (bottom).
6598   void ssubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6599 
6600   // Signed subtract wide (top).
6601   void ssubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6602 
6603   // Subtract narrow high part (bottom).
6604   void subhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6605 
6606   // Subtract narrow high part (top).
6607   void subhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6608 
6609   // Signed saturating addition of unsigned value.
6610   void suqadd(const ZRegister& zd,
6611               const PRegisterM& pg,
6612               const ZRegister& zn,
6613               const ZRegister& zm);
6614 
6615   // Programmable table lookup in one or two vector table (zeroing).
6616   void tbl(const ZRegister& zd,
6617            const ZRegister& zn1,
6618            const ZRegister& zn2,
6619            const ZRegister& zm);
6620 
6621   // Programmable table lookup in single vector table (merging).
6622   void tbx(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6623 
6624   // Unsigned absolute difference and accumulate.
6625   void uaba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6626 
6627   // Unsigned absolute difference and accumulate long (bottom).
6628   void uabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6629 
6630   // Unsigned absolute difference and accumulate long (top).
6631   void uabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6632 
6633   // Unsigned absolute difference long (bottom).
6634   void uabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6635 
6636   // Unsigned absolute difference long (top).
6637   void uabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6638 
6639   // Unsigned add and accumulate long pairwise.
6640   void uadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
6641 
6642   // Unsigned add long (bottom).
6643   void uaddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6644 
6645   // Unsigned add long (top).
6646   void uaddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6647 
6648   // Unsigned add wide (bottom).
6649   void uaddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6650 
6651   // Unsigned add wide (top).
6652   void uaddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6653 
6654   // Unsigned halving addition.
6655   void uhadd(const ZRegister& zd,
6656              const PRegisterM& pg,
6657              const ZRegister& zn,
6658              const ZRegister& zm);
6659 
6660   // Unsigned halving subtract.
6661   void uhsub(const ZRegister& zd,
6662              const PRegisterM& pg,
6663              const ZRegister& zn,
6664              const ZRegister& zm);
6665 
6666   // Unsigned halving subtract reversed vectors.
6667   void uhsubr(const ZRegister& zd,
6668               const PRegisterM& pg,
6669               const ZRegister& zn,
6670               const ZRegister& zm);
6671 
6672   // Unsigned maximum pairwise.
6673   void umaxp(const ZRegister& zd,
6674              const PRegisterM& pg,
6675              const ZRegister& zn,
6676              const ZRegister& zm);
6677 
6678   // Unsigned minimum pairwise.
6679   void uminp(const ZRegister& zd,
6680              const PRegisterM& pg,
6681              const ZRegister& zn,
6682              const ZRegister& zm);
6683 
6684   // Unsigned multiply-add long to accumulator (bottom, indexed).
6685   void umlalb(const ZRegister& zda,
6686               const ZRegister& zn,
6687               const ZRegister& zm,
6688               int index);
6689 
6690   // Unsigned multiply-add long to accumulator (bottom).
6691   void umlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6692 
6693   // Unsigned multiply-add long to accumulator (top, indexed).
6694   void umlalt(const ZRegister& zda,
6695               const ZRegister& zn,
6696               const ZRegister& zm,
6697               int index);
6698 
6699   // Unsigned multiply-add long to accumulator (top).
6700   void umlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6701 
6702   // Unsigned multiply-subtract long from accumulator (bottom, indexed).
6703   void umlslb(const ZRegister& zda,
6704               const ZRegister& zn,
6705               const ZRegister& zm,
6706               int index);
6707 
6708   // Unsigned multiply-subtract long from accumulator (bottom).
6709   void umlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6710 
6711   // Unsigned multiply-subtract long from accumulator (top, indexed).
6712   void umlslt(const ZRegister& zda,
6713               const ZRegister& zn,
6714               const ZRegister& zm,
6715               int index);
6716 
6717   // Unsigned multiply-subtract long from accumulator (top).
6718   void umlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6719 
6720   // Unsigned multiply returning high half (unpredicated).
6721   void umulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6722 
6723   // Unsigned multiply long (bottom, indexed).
6724   void umullb(const ZRegister& zd,
6725               const ZRegister& zn,
6726               const ZRegister& zm,
6727               int index);
6728 
6729   // Unsigned multiply long (bottom).
6730   void umullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6731 
6732   // Unsigned multiply long (top, indexed).
6733   void umullt(const ZRegister& zd,
6734               const ZRegister& zn,
6735               const ZRegister& zm,
6736               int index);
6737 
6738   // Unsigned multiply long (top).
6739   void umullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6740 
6741   // Unsigned saturating addition (predicated).
6742   void uqadd(const ZRegister& zd,
6743              const PRegisterM& pg,
6744              const ZRegister& zn,
6745              const ZRegister& zm);
6746 
6747   // Unsigned saturating rounding shift left by vector (predicated).
6748   void uqrshl(const ZRegister& zd,
6749               const PRegisterM& pg,
6750               const ZRegister& zn,
6751               const ZRegister& zm);
6752 
6753   // Unsigned saturating rounding shift left reversed vectors (predicated).
6754   void uqrshlr(const ZRegister& zd,
6755                const PRegisterM& pg,
6756                const ZRegister& zn,
6757                const ZRegister& zm);
6758 
6759   // Unsigned saturating rounding shift right narrow by immediate (bottom).
6760   void uqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6761 
6762   // Unsigned saturating rounding shift right narrow by immediate (top).
6763   void uqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6764 
6765   // Unsigned saturating shift left by immediate.
6766   void uqshl(const ZRegister& zd,
6767              const PRegisterM& pg,
6768              const ZRegister& zn,
6769              int shift);
6770 
6771   // Unsigned saturating shift left by vector (predicated).
6772   void uqshl(const ZRegister& zd,
6773              const PRegisterM& pg,
6774              const ZRegister& zn,
6775              const ZRegister& zm);
6776 
6777   // Unsigned saturating shift left reversed vectors (predicated).
6778   void uqshlr(const ZRegister& zd,
6779               const PRegisterM& pg,
6780               const ZRegister& zn,
6781               const ZRegister& zm);
6782 
6783   // Unsigned saturating shift right narrow by immediate (bottom).
6784   void uqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6785 
6786   // Unsigned saturating shift right narrow by immediate (top).
6787   void uqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6788 
6789   // Unsigned saturating subtraction (predicated).
6790   void uqsub(const ZRegister& zd,
6791              const PRegisterM& pg,
6792              const ZRegister& zn,
6793              const ZRegister& zm);
6794 
6795   // Unsigned saturating subtraction reversed vectors (predicated).
6796   void uqsubr(const ZRegister& zd,
6797               const PRegisterM& pg,
6798               const ZRegister& zn,
6799               const ZRegister& zm);
6800 
6801   // Unsigned saturating extract narrow (bottom).
6802   void uqxtnb(const ZRegister& zd, const ZRegister& zn);
6803 
6804   // Unsigned saturating extract narrow (top).
6805   void uqxtnt(const ZRegister& zd, const ZRegister& zn);
6806 
6807   // Unsigned reciprocal estimate (predicated).
6808   void urecpe(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6809 
6810   // Unsigned rounding halving addition.
6811   void urhadd(const ZRegister& zd,
6812               const PRegisterM& pg,
6813               const ZRegister& zn,
6814               const ZRegister& zm);
6815 
6816   // Unsigned rounding shift left by vector (predicated).
6817   void urshl(const ZRegister& zd,
6818              const PRegisterM& pg,
6819              const ZRegister& zn,
6820              const ZRegister& zm);
6821 
6822   // Unsigned rounding shift left reversed vectors (predicated).
6823   void urshlr(const ZRegister& zd,
6824               const PRegisterM& pg,
6825               const ZRegister& zn,
6826               const ZRegister& zm);
6827 
6828   // Unsigned rounding shift right by immediate.
6829   void urshr(const ZRegister& zd,
6830              const PRegisterM& pg,
6831              const ZRegister& zn,
6832              int shift);
6833 
6834   // Unsigned reciprocal square root estimate (predicated).
6835   void ursqrte(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6836 
6837   // Unsigned rounding shift right and accumulate (immediate).
6838   void ursra(const ZRegister& zda, const ZRegister& zn, int shift);
6839 
6840   // Unsigned shift left long by immediate (bottom).
6841   void ushllb(const ZRegister& zd, const ZRegister& zn, int shift);
6842 
6843   // Unsigned shift left long by immediate (top).
6844   void ushllt(const ZRegister& zd, const ZRegister& zn, int shift);
6845 
6846   // Unsigned saturating addition of signed value.
6847   void usqadd(const ZRegister& zd,
6848               const PRegisterM& pg,
6849               const ZRegister& zn,
6850               const ZRegister& zm);
6851 
6852   // Unsigned shift right and accumulate (immediate).
6853   void usra(const ZRegister& zda, const ZRegister& zn, int shift);
6854 
6855   // Unsigned subtract long (bottom).
6856   void usublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6857 
6858   // Unsigned subtract long (top).
6859   void usublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6860 
6861   // Unsigned subtract wide (bottom).
6862   void usubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6863 
6864   // Unsigned subtract wide (top).
6865   void usubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6866 
6867   // While decrementing signed scalar greater than or equal to scalar.
6868   void whilege(const PRegisterWithLaneSize& pd,
6869                const Register& rn,
6870                const Register& rm);
6871 
6872   // While decrementing signed scalar greater than scalar.
6873   void whilegt(const PRegisterWithLaneSize& pd,
6874                const Register& rn,
6875                const Register& rm);
6876 
6877   // While decrementing unsigned scalar higher than scalar.
6878   void whilehi(const PRegisterWithLaneSize& pd,
6879                const Register& rn,
6880                const Register& rm);
6881 
6882   // While decrementing unsigned scalar higher or same as scalar.
6883   void whilehs(const PRegisterWithLaneSize& pd,
6884                const Register& rn,
6885                const Register& rm);
6886 
6887   // While free of read-after-write conflicts.
6888   void whilerw(const PRegisterWithLaneSize& pd,
6889                const Register& rn,
6890                const Register& rm);
6891 
6892   // While free of write-after-read/write conflicts.
6893   void whilewr(const PRegisterWithLaneSize& pd,
6894                const Register& rn,
6895                const Register& rm);
6896 
6897   // Bitwise exclusive OR and rotate right by immediate.
6898   void xar(const ZRegister& zd,
6899            const ZRegister& zn,
6900            const ZRegister& zm,
6901            int shift);
6902 
6903   // Floating-point matrix multiply-accumulate.
6904   void fmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6905 
6906   // Signed integer matrix multiply-accumulate.
6907   void smmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6908 
6909   // Unsigned by signed integer matrix multiply-accumulate.
6910   void usmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6911 
6912   // Unsigned integer matrix multiply-accumulate.
6913   void ummla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6914 
6915   // Unsigned by signed integer dot product.
6916   void usdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6917 
6918   // Unsigned by signed integer indexed dot product.
6919   void usdot(const ZRegister& zda,
6920              const ZRegister& zn,
6921              const ZRegister& zm,
6922              int index);
6923 
6924   // Signed by unsigned integer indexed dot product.
6925   void sudot(const ZRegister& zda,
6926              const ZRegister& zn,
6927              const ZRegister& zm,
6928              int index);
6929 
6930   // Add with Tag.
6931   void addg(const Register& xd, const Register& xn, int offset, int tag_offset);
6932 
6933   // Tag Mask Insert.
6934   void gmi(const Register& xd, const Register& xn, const Register& xm);
6935 
6936   // Insert Random Tag.
6937   void irg(const Register& xd, const Register& xn, const Register& xm = xzr);
6938 
6939   // Load Allocation Tag.
6940   void ldg(const Register& xt, const MemOperand& addr);
6941 
6942   void StoreTagHelper(const Register& xt, const MemOperand& addr, Instr op);
6943 
6944   // Store Allocation Tags.
6945   void st2g(const Register& xt, const MemOperand& addr);
6946 
6947   // Store Allocation Tag.
6948   void stg(const Register& xt, const MemOperand& addr);
6949 
6950   // Store Allocation Tag and Pair of registers.
6951   void stgp(const Register& xt1, const Register& xt2, const MemOperand& addr);
6952 
6953   // Store Allocation Tags, Zeroing.
6954   void stz2g(const Register& xt, const MemOperand& addr);
6955 
6956   // Store Allocation Tag, Zeroing.
6957   void stzg(const Register& xt, const MemOperand& addr);
6958 
6959   // Subtract with Tag.
6960   void subg(const Register& xd, const Register& xn, int offset, int tag_offset);
6961 
6962   // Subtract Pointer.
6963   void subp(const Register& xd, const Register& xn, const Register& xm);
6964 
6965   // Subtract Pointer, setting Flags.
6966   void subps(const Register& xd, const Register& xn, const Register& xm);
6967 
6968   // Compare with Tag.
cmpp(const Register & xn,const Register & xm)6969   void cmpp(const Register& xn, const Register& xm) { subps(xzr, xn, xm); }
6970 
6971   // Memory Copy.
6972   void cpye(const Register& rd, const Register& rs, const Register& rn);
6973 
6974   // Memory Copy, reads and writes non-temporal.
6975   void cpyen(const Register& rd, const Register& rs, const Register& rn);
6976 
6977   // Memory Copy, reads non-temporal.
6978   void cpyern(const Register& rd, const Register& rs, const Register& rn);
6979 
6980   // Memory Copy, writes non-temporal.
6981   void cpyewn(const Register& rd, const Register& rs, const Register& rn);
6982 
6983   // Memory Copy Forward-only.
6984   void cpyfe(const Register& rd, const Register& rs, const Register& rn);
6985 
6986   // Memory Copy Forward-only, reads and writes non-temporal.
6987   void cpyfen(const Register& rd, const Register& rs, const Register& rn);
6988 
6989   // Memory Copy Forward-only, reads non-temporal.
6990   void cpyfern(const Register& rd, const Register& rs, const Register& rn);
6991 
6992   // Memory Copy Forward-only, writes non-temporal.
6993   void cpyfewn(const Register& rd, const Register& rs, const Register& rn);
6994 
6995   // Memory Copy Forward-only.
6996   void cpyfm(const Register& rd, const Register& rs, const Register& rn);
6997 
6998   // Memory Copy Forward-only, reads and writes non-temporal.
6999   void cpyfmn(const Register& rd, const Register& rs, const Register& rn);
7000 
7001   // Memory Copy Forward-only, reads non-temporal.
7002   void cpyfmrn(const Register& rd, const Register& rs, const Register& rn);
7003 
7004   // Memory Copy Forward-only, writes non-temporal.
7005   void cpyfmwn(const Register& rd, const Register& rs, const Register& rn);
7006 
7007   // Memory Copy Forward-only.
7008   void cpyfp(const Register& rd, const Register& rs, const Register& rn);
7009 
7010   // Memory Copy Forward-only, reads and writes non-temporal.
7011   void cpyfpn(const Register& rd, const Register& rs, const Register& rn);
7012 
7013   // Memory Copy Forward-only, reads non-temporal.
7014   void cpyfprn(const Register& rd, const Register& rs, const Register& rn);
7015 
7016   // Memory Copy Forward-only, writes non-temporal.
7017   void cpyfpwn(const Register& rd, const Register& rs, const Register& rn);
7018 
7019   // Memory Copy.
7020   void cpym(const Register& rd, const Register& rs, const Register& rn);
7021 
7022   // Memory Copy, reads and writes non-temporal.
7023   void cpymn(const Register& rd, const Register& rs, const Register& rn);
7024 
7025   // Memory Copy, reads non-temporal.
7026   void cpymrn(const Register& rd, const Register& rs, const Register& rn);
7027 
7028   // Memory Copy, writes non-temporal.
7029   void cpymwn(const Register& rd, const Register& rs, const Register& rn);
7030 
7031   // Memory Copy.
7032   void cpyp(const Register& rd, const Register& rs, const Register& rn);
7033 
7034   // Memory Copy, reads and writes non-temporal.
7035   void cpypn(const Register& rd, const Register& rs, const Register& rn);
7036 
7037   // Memory Copy, reads non-temporal.
7038   void cpyprn(const Register& rd, const Register& rs, const Register& rn);
7039 
7040   // Memory Copy, writes non-temporal.
7041   void cpypwn(const Register& rd, const Register& rs, const Register& rn);
7042 
7043   // Memory Set.
7044   void sete(const Register& rd, const Register& rn, const Register& rs);
7045 
7046   // Memory Set, non-temporal.
7047   void seten(const Register& rd, const Register& rn, const Register& rs);
7048 
7049   // Memory Set with tag setting.
7050   void setge(const Register& rd, const Register& rn, const Register& rs);
7051 
7052   // Memory Set with tag setting, non-temporal.
7053   void setgen(const Register& rd, const Register& rn, const Register& rs);
7054 
7055   // Memory Set with tag setting.
7056   void setgm(const Register& rd, const Register& rn, const Register& rs);
7057 
7058   // Memory Set with tag setting, non-temporal.
7059   void setgmn(const Register& rd, const Register& rn, const Register& rs);
7060 
7061   // Memory Set with tag setting.
7062   void setgp(const Register& rd, const Register& rn, const Register& rs);
7063 
7064   // Memory Set with tag setting, non-temporal.
7065   void setgpn(const Register& rd, const Register& rn, const Register& rs);
7066 
7067   // Memory Set.
7068   void setm(const Register& rd, const Register& rn, const Register& rs);
7069 
7070   // Memory Set, non-temporal.
7071   void setmn(const Register& rd, const Register& rn, const Register& rs);
7072 
7073   // Memory Set.
7074   void setp(const Register& rd, const Register& rn, const Register& rs);
7075 
7076   // Memory Set, non-temporal.
7077   void setpn(const Register& rd, const Register& rn, const Register& rs);
7078 
7079   // Absolute value.
7080   void abs(const Register& rd, const Register& rn);
7081 
7082   // Count bits.
7083   void cnt(const Register& rd, const Register& rn);
7084 
7085   // Count Trailing Zeros.
7086   void ctz(const Register& rd, const Register& rn);
7087 
7088   // Signed Maximum.
7089   void smax(const Register& rd, const Register& rn, const Operand& op);
7090 
7091   // Signed Minimum.
7092   void smin(const Register& rd, const Register& rn, const Operand& op);
7093 
7094   // Unsigned Maximum.
7095   void umax(const Register& rd, const Register& rn, const Operand& op);
7096 
7097   // Unsigned Minimum.
7098   void umin(const Register& rd, const Register& rn, const Operand& op);
7099 
7100   // Emit generic instructions.
7101 
7102   // Emit raw instructions into the instruction stream.
dci(Instr raw_inst)7103   void dci(Instr raw_inst) { Emit(raw_inst); }
7104 
7105   // Emit 32 bits of data into the instruction stream.
dc32(uint32_t data)7106   void dc32(uint32_t data) { dc(data); }
7107 
7108   // Emit 64 bits of data into the instruction stream.
dc64(uint64_t data)7109   void dc64(uint64_t data) { dc(data); }
7110 
7111   // Emit data in the instruction stream.
7112   template <typename T>
dc(T data)7113   void dc(T data) {
7114     VIXL_ASSERT(AllowAssembler());
7115     GetBuffer()->Emit<T>(data);
7116   }
7117 
7118   // Copy a string into the instruction stream, including the terminating NULL
7119   // character. The instruction pointer is then aligned correctly for
7120   // subsequent instructions.
EmitString(const char * string)7121   void EmitString(const char* string) {
7122     VIXL_ASSERT(string != NULL);
7123     VIXL_ASSERT(AllowAssembler());
7124 
7125     GetBuffer()->EmitString(string);
7126     GetBuffer()->Align();
7127   }
7128 
7129   // Code generation helpers.
7130   static bool OneInstrMoveImmediateHelper(Assembler* assm,
7131                                           const Register& dst,
7132                                           uint64_t imm);
7133 
7134   // Register encoding.
7135   template <int hibit, int lobit>
Rx(CPURegister rx)7136   static Instr Rx(CPURegister rx) {
7137     VIXL_ASSERT(rx.GetCode() != kSPRegInternalCode);
7138     return ImmUnsignedField<hibit, lobit>(rx.GetCode());
7139   }
7140 
7141 #define CPU_REGISTER_FIELD_NAMES(V) V(d) V(n) V(m) V(a) V(t) V(t2) V(s)
7142 #define REGISTER_ENCODER(N)                                           \
7143   static Instr R##N(CPURegister r##N) {                               \
7144     return Rx<R##N##_offset + R##N##_width - 1, R##N##_offset>(r##N); \
7145   }
CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)7146   CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)
7147 #undef REGISTER_ENCODER
7148 #undef CPU_REGISTER_FIELD_NAMES
7149 
7150   static Instr RmNot31(CPURegister rm) {
7151     VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
7152     VIXL_ASSERT(!rm.IsZero());
7153     return Rm(rm);
7154   }
7155 
7156   // These encoding functions allow the stack pointer to be encoded, and
7157   // disallow the zero register.
RdSP(Register rd)7158   static Instr RdSP(Register rd) {
7159     VIXL_ASSERT(!rd.IsZero());
7160     return (rd.GetCode() & kRegCodeMask) << Rd_offset;
7161   }
7162 
RnSP(Register rn)7163   static Instr RnSP(Register rn) {
7164     VIXL_ASSERT(!rn.IsZero());
7165     return (rn.GetCode() & kRegCodeMask) << Rn_offset;
7166   }
7167 
RmSP(Register rm)7168   static Instr RmSP(Register rm) {
7169     VIXL_ASSERT(!rm.IsZero());
7170     return (rm.GetCode() & kRegCodeMask) << Rm_offset;
7171   }
7172 
Pd(PRegister pd)7173   static Instr Pd(PRegister pd) {
7174     return Rx<Pd_offset + Pd_width - 1, Pd_offset>(pd);
7175   }
7176 
Pm(PRegister pm)7177   static Instr Pm(PRegister pm) {
7178     return Rx<Pm_offset + Pm_width - 1, Pm_offset>(pm);
7179   }
7180 
Pn(PRegister pn)7181   static Instr Pn(PRegister pn) {
7182     return Rx<Pn_offset + Pn_width - 1, Pn_offset>(pn);
7183   }
7184 
PgLow8(PRegister pg)7185   static Instr PgLow8(PRegister pg) {
7186     // Governing predicates can be merging, zeroing, or unqualified. They should
7187     // never have a lane size.
7188     VIXL_ASSERT(!pg.HasLaneSize());
7189     return Rx<PgLow8_offset + PgLow8_width - 1, PgLow8_offset>(pg);
7190   }
7191 
7192   template <int hibit, int lobit>
Pg(PRegister pg)7193   static Instr Pg(PRegister pg) {
7194     // Governing predicates can be merging, zeroing, or unqualified. They should
7195     // never have a lane size.
7196     VIXL_ASSERT(!pg.HasLaneSize());
7197     return Rx<hibit, lobit>(pg);
7198   }
7199 
7200   // Flags encoding.
Flags(FlagsUpdate S)7201   static Instr Flags(FlagsUpdate S) {
7202     if (S == SetFlags) {
7203       return 1 << FlagsUpdate_offset;
7204     } else if (S == LeaveFlags) {
7205       return 0 << FlagsUpdate_offset;
7206     }
7207     VIXL_UNREACHABLE();
7208     return 0;
7209   }
7210 
Cond(Condition cond)7211   static Instr Cond(Condition cond) { return cond << Condition_offset; }
7212 
7213   // Generic immediate encoding.
7214   template <int hibit, int lobit>
ImmField(int64_t imm)7215   static Instr ImmField(int64_t imm) {
7216     VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
7217     VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
7218     int fieldsize = hibit - lobit + 1;
7219     VIXL_ASSERT(IsIntN(fieldsize, imm));
7220     return static_cast<Instr>(TruncateToUintN(fieldsize, imm) << lobit);
7221   }
7222 
7223   // For unsigned immediate encoding.
7224   // TODO: Handle signed and unsigned immediate in satisfactory way.
7225   template <int hibit, int lobit>
ImmUnsignedField(uint64_t imm)7226   static Instr ImmUnsignedField(uint64_t imm) {
7227     VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
7228     VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
7229     VIXL_ASSERT(IsUintN(hibit - lobit + 1, imm));
7230     return static_cast<Instr>(imm << lobit);
7231   }
7232 
7233   // PC-relative address encoding.
ImmPCRelAddress(int64_t imm21)7234   static Instr ImmPCRelAddress(int64_t imm21) {
7235     VIXL_ASSERT(IsInt21(imm21));
7236     Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
7237     Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
7238     Instr immlo = imm << ImmPCRelLo_offset;
7239     return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
7240   }
7241 
7242   // Branch encoding.
ImmUncondBranch(int64_t imm26)7243   static Instr ImmUncondBranch(int64_t imm26) {
7244     VIXL_ASSERT(IsInt26(imm26));
7245     return TruncateToUint26(imm26) << ImmUncondBranch_offset;
7246   }
7247 
ImmCondBranch(int64_t imm19)7248   static Instr ImmCondBranch(int64_t imm19) {
7249     VIXL_ASSERT(IsInt19(imm19));
7250     return TruncateToUint19(imm19) << ImmCondBranch_offset;
7251   }
7252 
ImmCmpBranch(int64_t imm19)7253   static Instr ImmCmpBranch(int64_t imm19) {
7254     VIXL_ASSERT(IsInt19(imm19));
7255     return TruncateToUint19(imm19) << ImmCmpBranch_offset;
7256   }
7257 
ImmTestBranch(int64_t imm14)7258   static Instr ImmTestBranch(int64_t imm14) {
7259     VIXL_ASSERT(IsInt14(imm14));
7260     return TruncateToUint14(imm14) << ImmTestBranch_offset;
7261   }
7262 
ImmTestBranchBit(unsigned bit_pos)7263   static Instr ImmTestBranchBit(unsigned bit_pos) {
7264     VIXL_ASSERT(IsUint6(bit_pos));
7265     // Subtract five from the shift offset, as we need bit 5 from bit_pos.
7266     unsigned bit5 = bit_pos << (ImmTestBranchBit5_offset - 5);
7267     unsigned bit40 = bit_pos << ImmTestBranchBit40_offset;
7268     bit5 &= ImmTestBranchBit5_mask;
7269     bit40 &= ImmTestBranchBit40_mask;
7270     return bit5 | bit40;
7271   }
7272 
7273   // Data Processing encoding.
SF(Register rd)7274   static Instr SF(Register rd) {
7275     return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
7276   }
7277 
ImmAddSub(int imm)7278   static Instr ImmAddSub(int imm) {
7279     VIXL_ASSERT(IsImmAddSub(imm));
7280     if (IsUint12(imm)) {  // No shift required.
7281       imm <<= ImmAddSub_offset;
7282     } else {
7283       imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ImmAddSubShift_offset);
7284     }
7285     return imm;
7286   }
7287 
SVEImmSetBits(unsigned imms,unsigned lane_size)7288   static Instr SVEImmSetBits(unsigned imms, unsigned lane_size) {
7289     VIXL_ASSERT(IsUint6(imms));
7290     VIXL_ASSERT((lane_size == kDRegSize) || IsUint6(imms + 3));
7291     USE(lane_size);
7292     return imms << SVEImmSetBits_offset;
7293   }
7294 
SVEImmRotate(unsigned immr,unsigned lane_size)7295   static Instr SVEImmRotate(unsigned immr, unsigned lane_size) {
7296     VIXL_ASSERT(IsUintN(WhichPowerOf2(lane_size), immr));
7297     USE(lane_size);
7298     return immr << SVEImmRotate_offset;
7299   }
7300 
SVEBitN(unsigned bitn)7301   static Instr SVEBitN(unsigned bitn) {
7302     VIXL_ASSERT(IsUint1(bitn));
7303     return bitn << SVEBitN_offset;
7304   }
7305 
7306   static Instr SVEDtype(unsigned msize_in_bytes_log2,
7307                         unsigned esize_in_bytes_log2,
7308                         bool is_signed,
7309                         int dtype_h_lsb = 23,
7310                         int dtype_l_lsb = 21) {
7311     VIXL_ASSERT(msize_in_bytes_log2 <= kDRegSizeInBytesLog2);
7312     VIXL_ASSERT(esize_in_bytes_log2 <= kDRegSizeInBytesLog2);
7313     Instr dtype_h = msize_in_bytes_log2;
7314     Instr dtype_l = esize_in_bytes_log2;
7315     // Signed forms use the encodings where msize would be greater than esize.
7316     if (is_signed) {
7317       dtype_h = dtype_h ^ 0x3;
7318       dtype_l = dtype_l ^ 0x3;
7319     }
7320     VIXL_ASSERT(IsUint2(dtype_h));
7321     VIXL_ASSERT(IsUint2(dtype_l));
7322     VIXL_ASSERT((dtype_h > dtype_l) == is_signed);
7323 
7324     return (dtype_h << dtype_h_lsb) | (dtype_l << dtype_l_lsb);
7325   }
7326 
SVEDtypeSplit(unsigned msize_in_bytes_log2,unsigned esize_in_bytes_log2,bool is_signed)7327   static Instr SVEDtypeSplit(unsigned msize_in_bytes_log2,
7328                              unsigned esize_in_bytes_log2,
7329                              bool is_signed) {
7330     return SVEDtype(msize_in_bytes_log2,
7331                     esize_in_bytes_log2,
7332                     is_signed,
7333                     23,
7334                     13);
7335   }
7336 
ImmS(unsigned imms,unsigned reg_size)7337   static Instr ImmS(unsigned imms, unsigned reg_size) {
7338     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
7339                 ((reg_size == kWRegSize) && IsUint5(imms)));
7340     USE(reg_size);
7341     return imms << ImmS_offset;
7342   }
7343 
ImmR(unsigned immr,unsigned reg_size)7344   static Instr ImmR(unsigned immr, unsigned reg_size) {
7345     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
7346                 ((reg_size == kWRegSize) && IsUint5(immr)));
7347     USE(reg_size);
7348     VIXL_ASSERT(IsUint6(immr));
7349     return immr << ImmR_offset;
7350   }
7351 
ImmSetBits(unsigned imms,unsigned reg_size)7352   static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
7353     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7354     VIXL_ASSERT(IsUint6(imms));
7355     VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
7356     USE(reg_size);
7357     return imms << ImmSetBits_offset;
7358   }
7359 
ImmRotate(unsigned immr,unsigned reg_size)7360   static Instr ImmRotate(unsigned immr, unsigned reg_size) {
7361     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7362     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
7363                 ((reg_size == kWRegSize) && IsUint5(immr)));
7364     USE(reg_size);
7365     return immr << ImmRotate_offset;
7366   }
7367 
ImmLLiteral(int64_t imm19)7368   static Instr ImmLLiteral(int64_t imm19) {
7369     VIXL_ASSERT(IsInt19(imm19));
7370     return TruncateToUint19(imm19) << ImmLLiteral_offset;
7371   }
7372 
BitN(unsigned bitn,unsigned reg_size)7373   static Instr BitN(unsigned bitn, unsigned reg_size) {
7374     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7375     VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
7376     USE(reg_size);
7377     return bitn << BitN_offset;
7378   }
7379 
ShiftDP(Shift shift)7380   static Instr ShiftDP(Shift shift) {
7381     VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
7382     return shift << ShiftDP_offset;
7383   }
7384 
ImmDPShift(unsigned amount)7385   static Instr ImmDPShift(unsigned amount) {
7386     VIXL_ASSERT(IsUint6(amount));
7387     return amount << ImmDPShift_offset;
7388   }
7389 
ExtendMode(Extend extend)7390   static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; }
7391 
ImmExtendShift(unsigned left_shift)7392   static Instr ImmExtendShift(unsigned left_shift) {
7393     VIXL_ASSERT(left_shift <= 4);
7394     return left_shift << ImmExtendShift_offset;
7395   }
7396 
ImmCondCmp(unsigned imm)7397   static Instr ImmCondCmp(unsigned imm) {
7398     VIXL_ASSERT(IsUint5(imm));
7399     return imm << ImmCondCmp_offset;
7400   }
7401 
Nzcv(StatusFlags nzcv)7402   static Instr Nzcv(StatusFlags nzcv) {
7403     return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
7404   }
7405 
7406   // MemOperand offset encoding.
ImmLSUnsigned(int64_t imm12)7407   static Instr ImmLSUnsigned(int64_t imm12) {
7408     VIXL_ASSERT(IsUint12(imm12));
7409     return TruncateToUint12(imm12) << ImmLSUnsigned_offset;
7410   }
7411 
ImmLS(int64_t imm9)7412   static Instr ImmLS(int64_t imm9) {
7413     VIXL_ASSERT(IsInt9(imm9));
7414     return TruncateToUint9(imm9) << ImmLS_offset;
7415   }
7416 
ImmLSPair(int64_t imm7,unsigned access_size_in_bytes_log2)7417   static Instr ImmLSPair(int64_t imm7, unsigned access_size_in_bytes_log2) {
7418     const auto access_size_in_bytes = 1U << access_size_in_bytes_log2;
7419     VIXL_ASSERT(IsMultiple(imm7, access_size_in_bytes));
7420     int64_t scaled_imm7 = imm7 / access_size_in_bytes;
7421     VIXL_ASSERT(IsInt7(scaled_imm7));
7422     return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
7423   }
7424 
ImmShiftLS(unsigned shift_amount)7425   static Instr ImmShiftLS(unsigned shift_amount) {
7426     VIXL_ASSERT(IsUint1(shift_amount));
7427     return shift_amount << ImmShiftLS_offset;
7428   }
7429 
ImmLSPAC(int64_t imm10)7430   static Instr ImmLSPAC(int64_t imm10) {
7431     VIXL_ASSERT(IsMultiple(imm10, 1 << 3));
7432     int64_t scaled_imm10 = imm10 / (1 << 3);
7433     VIXL_ASSERT(IsInt10(scaled_imm10));
7434     uint32_t s_bit = (scaled_imm10 >> 9) & 1;
7435     return (s_bit << ImmLSPACHi_offset) |
7436            (TruncateToUint9(scaled_imm10) << ImmLSPACLo_offset);
7437   }
7438 
ImmPrefetchOperation(int imm5)7439   static Instr ImmPrefetchOperation(int imm5) {
7440     VIXL_ASSERT(IsUint5(imm5));
7441     return imm5 << ImmPrefetchOperation_offset;
7442   }
7443 
ImmException(int imm16)7444   static Instr ImmException(int imm16) {
7445     VIXL_ASSERT(IsUint16(imm16));
7446     return imm16 << ImmException_offset;
7447   }
7448 
ImmUdf(int imm16)7449   static Instr ImmUdf(int imm16) {
7450     VIXL_ASSERT(IsUint16(imm16));
7451     return imm16 << ImmUdf_offset;
7452   }
7453 
ImmSystemRegister(int imm16)7454   static Instr ImmSystemRegister(int imm16) {
7455     VIXL_ASSERT(IsUint16(imm16));
7456     return imm16 << ImmSystemRegister_offset;
7457   }
7458 
ImmRMIFRotation(int imm6)7459   static Instr ImmRMIFRotation(int imm6) {
7460     VIXL_ASSERT(IsUint6(imm6));
7461     return imm6 << ImmRMIFRotation_offset;
7462   }
7463 
ImmHint(int imm7)7464   static Instr ImmHint(int imm7) {
7465     VIXL_ASSERT(IsUint7(imm7));
7466     return imm7 << ImmHint_offset;
7467   }
7468 
CRm(int imm4)7469   static Instr CRm(int imm4) {
7470     VIXL_ASSERT(IsUint4(imm4));
7471     return imm4 << CRm_offset;
7472   }
7473 
CRn(int imm4)7474   static Instr CRn(int imm4) {
7475     VIXL_ASSERT(IsUint4(imm4));
7476     return imm4 << CRn_offset;
7477   }
7478 
SysOp(int imm14)7479   static Instr SysOp(int imm14) {
7480     VIXL_ASSERT(IsUint14(imm14));
7481     return imm14 << SysOp_offset;
7482   }
7483 
ImmSysOp1(int imm3)7484   static Instr ImmSysOp1(int imm3) {
7485     VIXL_ASSERT(IsUint3(imm3));
7486     return imm3 << SysOp1_offset;
7487   }
7488 
ImmSysOp2(int imm3)7489   static Instr ImmSysOp2(int imm3) {
7490     VIXL_ASSERT(IsUint3(imm3));
7491     return imm3 << SysOp2_offset;
7492   }
7493 
ImmBarrierDomain(int imm2)7494   static Instr ImmBarrierDomain(int imm2) {
7495     VIXL_ASSERT(IsUint2(imm2));
7496     return imm2 << ImmBarrierDomain_offset;
7497   }
7498 
ImmBarrierType(int imm2)7499   static Instr ImmBarrierType(int imm2) {
7500     VIXL_ASSERT(IsUint2(imm2));
7501     return imm2 << ImmBarrierType_offset;
7502   }
7503 
7504   // Move immediates encoding.
ImmMoveWide(uint64_t imm)7505   static Instr ImmMoveWide(uint64_t imm) {
7506     VIXL_ASSERT(IsUint16(imm));
7507     return static_cast<Instr>(imm << ImmMoveWide_offset);
7508   }
7509 
ShiftMoveWide(int64_t shift)7510   static Instr ShiftMoveWide(int64_t shift) {
7511     VIXL_ASSERT(IsUint2(shift));
7512     return static_cast<Instr>(shift << ShiftMoveWide_offset);
7513   }
7514 
7515   // FP Immediates.
7516   static Instr ImmFP16(Float16 imm);
7517   static Instr ImmFP32(float imm);
7518   static Instr ImmFP64(double imm);
7519 
7520   // FP register type.
FPType(VRegister fd)7521   static Instr FPType(VRegister fd) {
7522     VIXL_ASSERT(fd.IsScalar());
7523     switch (fd.GetSizeInBits()) {
7524       case 16:
7525         return FP16;
7526       case 32:
7527         return FP32;
7528       case 64:
7529         return FP64;
7530       default:
7531         VIXL_UNREACHABLE();
7532         return 0;
7533     }
7534   }
7535 
FPScale(unsigned scale)7536   static Instr FPScale(unsigned scale) {
7537     VIXL_ASSERT(IsUint6(scale));
7538     return scale << FPScale_offset;
7539   }
7540 
7541   // Immediate field checking helpers.
7542   static bool IsImmAddSub(int64_t immediate);
7543   static bool IsImmConditionalCompare(int64_t immediate);
7544   static bool IsImmFP16(Float16 imm);
7545 
IsImmFP32(float imm)7546   static bool IsImmFP32(float imm) { return IsImmFP32(FloatToRawbits(imm)); }
7547 
7548   static bool IsImmFP32(uint32_t bits);
7549 
IsImmFP64(double imm)7550   static bool IsImmFP64(double imm) { return IsImmFP64(DoubleToRawbits(imm)); }
7551 
7552   static bool IsImmFP64(uint64_t bits);
7553   static bool IsImmLogical(uint64_t value,
7554                            unsigned width,
7555                            unsigned* n = NULL,
7556                            unsigned* imm_s = NULL,
7557                            unsigned* imm_r = NULL);
7558   static bool IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2);
7559   static bool IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2);
7560   static bool IsImmLSUnscaled(int64_t offset);
7561   static bool IsImmMovn(uint64_t imm, unsigned reg_size);
7562   static bool IsImmMovz(uint64_t imm, unsigned reg_size);
7563 
7564   // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)7565   static Instr VFormat(VRegister vd) {
7566     if (vd.Is64Bits()) {
7567       switch (vd.GetLanes()) {
7568         case 2:
7569           return NEON_2S;
7570         case 4:
7571           return NEON_4H;
7572         case 8:
7573           return NEON_8B;
7574         default:
7575           return 0xffffffff;
7576       }
7577     } else {
7578       VIXL_ASSERT(vd.Is128Bits());
7579       switch (vd.GetLanes()) {
7580         case 2:
7581           return NEON_2D;
7582         case 4:
7583           return NEON_4S;
7584         case 8:
7585           return NEON_8H;
7586         case 16:
7587           return NEON_16B;
7588         default:
7589           return 0xffffffff;
7590       }
7591     }
7592   }
7593 
7594   // Instruction bits for vector format in floating point data processing
7595   // operations.
FPFormat(VRegister vd)7596   static Instr FPFormat(VRegister vd) {
7597     switch (vd.GetLanes()) {
7598       case 1:
7599         // Floating point scalar formats.
7600         switch (vd.GetSizeInBits()) {
7601           case 16:
7602             return FP16;
7603           case 32:
7604             return FP32;
7605           case 64:
7606             return FP64;
7607           default:
7608             VIXL_UNREACHABLE();
7609         }
7610         break;
7611       case 2:
7612         // Two lane floating point vector formats.
7613         switch (vd.GetSizeInBits()) {
7614           case 64:
7615             return NEON_FP_2S;
7616           case 128:
7617             return NEON_FP_2D;
7618           default:
7619             VIXL_UNREACHABLE();
7620         }
7621         break;
7622       case 4:
7623         // Four lane floating point vector formats.
7624         switch (vd.GetSizeInBits()) {
7625           case 64:
7626             return NEON_FP_4H;
7627           case 128:
7628             return NEON_FP_4S;
7629           default:
7630             VIXL_UNREACHABLE();
7631         }
7632         break;
7633       case 8:
7634         // Eight lane floating point vector format.
7635         VIXL_ASSERT(vd.Is128Bits());
7636         return NEON_FP_8H;
7637       default:
7638         VIXL_UNREACHABLE();
7639         return 0;
7640     }
7641     VIXL_UNREACHABLE();
7642     return 0;
7643   }
7644 
7645   // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)7646   static Instr LSVFormat(VRegister vd) {
7647     if (vd.Is64Bits()) {
7648       switch (vd.GetLanes()) {
7649         case 1:
7650           return LS_NEON_1D;
7651         case 2:
7652           return LS_NEON_2S;
7653         case 4:
7654           return LS_NEON_4H;
7655         case 8:
7656           return LS_NEON_8B;
7657         default:
7658           return 0xffffffff;
7659       }
7660     } else {
7661       VIXL_ASSERT(vd.Is128Bits());
7662       switch (vd.GetLanes()) {
7663         case 2:
7664           return LS_NEON_2D;
7665         case 4:
7666           return LS_NEON_4S;
7667         case 8:
7668           return LS_NEON_8H;
7669         case 16:
7670           return LS_NEON_16B;
7671         default:
7672           return 0xffffffff;
7673       }
7674     }
7675   }
7676 
7677   // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)7678   static Instr SFormat(VRegister vd) {
7679     VIXL_ASSERT(vd.GetLanes() == 1);
7680     switch (vd.GetSizeInBytes()) {
7681       case 1:
7682         return NEON_B;
7683       case 2:
7684         return NEON_H;
7685       case 4:
7686         return NEON_S;
7687       case 8:
7688         return NEON_D;
7689       default:
7690         return 0xffffffff;
7691     }
7692   }
7693 
7694   template <typename T>
SVESize(const T & rd)7695   static Instr SVESize(const T& rd) {
7696     VIXL_ASSERT(rd.IsZRegister() || rd.IsPRegister());
7697     VIXL_ASSERT(rd.HasLaneSize());
7698     switch (rd.GetLaneSizeInBytes()) {
7699       case 1:
7700         return SVE_B;
7701       case 2:
7702         return SVE_H;
7703       case 4:
7704         return SVE_S;
7705       case 8:
7706         return SVE_D;
7707       default:
7708         return 0xffffffff;
7709     }
7710   }
7711 
ImmSVEPredicateConstraint(int pattern)7712   static Instr ImmSVEPredicateConstraint(int pattern) {
7713     VIXL_ASSERT(IsUint5(pattern));
7714     return (pattern << ImmSVEPredicateConstraint_offset) &
7715            ImmSVEPredicateConstraint_mask;
7716   }
7717 
ImmNEONHLM(int index,int num_bits)7718   static Instr ImmNEONHLM(int index, int num_bits) {
7719     int h, l, m;
7720     if (num_bits == 3) {
7721       VIXL_ASSERT(IsUint3(index));
7722       h = (index >> 2) & 1;
7723       l = (index >> 1) & 1;
7724       m = (index >> 0) & 1;
7725     } else if (num_bits == 2) {
7726       VIXL_ASSERT(IsUint2(index));
7727       h = (index >> 1) & 1;
7728       l = (index >> 0) & 1;
7729       m = 0;
7730     } else {
7731       VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
7732       h = (index >> 0) & 1;
7733       l = 0;
7734       m = 0;
7735     }
7736     return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
7737   }
7738 
ImmRotFcadd(int rot)7739   static Instr ImmRotFcadd(int rot) {
7740     VIXL_ASSERT(rot == 90 || rot == 270);
7741     return (((rot == 270) ? 1 : 0) << ImmRotFcadd_offset);
7742   }
7743 
ImmRotFcmlaSca(int rot)7744   static Instr ImmRotFcmlaSca(int rot) {
7745     VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
7746     return (rot / 90) << ImmRotFcmlaSca_offset;
7747   }
7748 
ImmRotFcmlaVec(int rot)7749   static Instr ImmRotFcmlaVec(int rot) {
7750     VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
7751     return (rot / 90) << ImmRotFcmlaVec_offset;
7752   }
7753 
ImmNEONExt(int imm4)7754   static Instr ImmNEONExt(int imm4) {
7755     VIXL_ASSERT(IsUint4(imm4));
7756     return imm4 << ImmNEONExt_offset;
7757   }
7758 
ImmNEON5(Instr format,int index)7759   static Instr ImmNEON5(Instr format, int index) {
7760     VIXL_ASSERT(IsUint4(index));
7761     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
7762     int imm5 = (index << (s + 1)) | (1 << s);
7763     return imm5 << ImmNEON5_offset;
7764   }
7765 
ImmNEON4(Instr format,int index)7766   static Instr ImmNEON4(Instr format, int index) {
7767     VIXL_ASSERT(IsUint4(index));
7768     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
7769     int imm4 = index << s;
7770     return imm4 << ImmNEON4_offset;
7771   }
7772 
ImmNEONabcdefgh(int imm8)7773   static Instr ImmNEONabcdefgh(int imm8) {
7774     VIXL_ASSERT(IsUint8(imm8));
7775     Instr instr;
7776     instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
7777     instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
7778     return instr;
7779   }
7780 
NEONCmode(int cmode)7781   static Instr NEONCmode(int cmode) {
7782     VIXL_ASSERT(IsUint4(cmode));
7783     return cmode << NEONCmode_offset;
7784   }
7785 
NEONModImmOp(int op)7786   static Instr NEONModImmOp(int op) {
7787     VIXL_ASSERT(IsUint1(op));
7788     return op << NEONModImmOp_offset;
7789   }
7790 
7791   // Size of the code generated since label to the current position.
GetSizeOfCodeGeneratedSince(Label * label)7792   size_t GetSizeOfCodeGeneratedSince(Label* label) const {
7793     VIXL_ASSERT(label->IsBound());
7794     return GetBuffer().GetOffsetFrom(label->GetLocation());
7795   }
7796   VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince",
7797                   size_t SizeOfCodeGeneratedSince(Label* label) const) {
7798     return GetSizeOfCodeGeneratedSince(label);
7799   }
7800 
7801   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
7802                   size_t GetBufferCapacity() const) {
7803     return GetBuffer().GetCapacity();
7804   }
7805   VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) {
7806     return GetBuffer().GetCapacity();
7807   }
7808 
7809   VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
7810                   size_t GetRemainingBufferSpace() const) {
7811     return GetBuffer().GetRemainingBytes();
7812   }
7813   VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
7814                   size_t RemainingBufferSpace() const) {
7815     return GetBuffer().GetRemainingBytes();
7816   }
7817 
GetPic()7818   PositionIndependentCodeOption GetPic() const { return pic_; }
7819   VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) {
7820     return GetPic();
7821   }
7822 
GetCPUFeatures()7823   CPUFeatures* GetCPUFeatures() { return &cpu_features_; }
7824 
SetCPUFeatures(const CPUFeatures & cpu_features)7825   void SetCPUFeatures(const CPUFeatures& cpu_features) {
7826     cpu_features_ = cpu_features;
7827   }
7828 
AllowPageOffsetDependentCode()7829   bool AllowPageOffsetDependentCode() const {
7830     return (GetPic() == PageOffsetDependentCode) ||
7831            (GetPic() == PositionDependentCode);
7832   }
7833 
AppropriateZeroRegFor(const CPURegister & reg)7834   static Register AppropriateZeroRegFor(const CPURegister& reg) {
7835     return reg.Is64Bits() ? Register(xzr) : Register(wzr);
7836   }
7837 
7838  protected:
7839   void LoadStore(const CPURegister& rt,
7840                  const MemOperand& addr,
7841                  LoadStoreOp op,
7842                  LoadStoreScalingOption option = PreferScaledOffset);
7843 
7844   void LoadStorePAC(const Register& xt,
7845                     const MemOperand& addr,
7846                     LoadStorePACOp op);
7847 
7848   void LoadStorePair(const CPURegister& rt,
7849                      const CPURegister& rt2,
7850                      const MemOperand& addr,
7851                      LoadStorePairOp op);
7852   void LoadStoreStruct(const VRegister& vt,
7853                        const MemOperand& addr,
7854                        NEONLoadStoreMultiStructOp op);
7855   void LoadStoreStruct1(const VRegister& vt,
7856                         int reg_count,
7857                         const MemOperand& addr);
7858   void LoadStoreStructSingle(const VRegister& vt,
7859                              uint32_t lane,
7860                              const MemOperand& addr,
7861                              NEONLoadStoreSingleStructOp op);
7862   void LoadStoreStructSingleAllLanes(const VRegister& vt,
7863                                      const MemOperand& addr,
7864                                      NEONLoadStoreSingleStructOp op);
7865   void LoadStoreStructVerify(const VRegister& vt,
7866                              const MemOperand& addr,
7867                              Instr op);
7868 
7869   // Set `is_load` to false in default as it's only used in the
7870   // scalar-plus-vector form.
7871   Instr SVEMemOperandHelper(unsigned msize_in_bytes_log2,
7872                             int num_regs,
7873                             const SVEMemOperand& addr,
7874                             bool is_load = false);
7875 
7876   // E.g. st1b, st1h, ...
7877   // This supports both contiguous and scatter stores.
7878   void SVESt1Helper(unsigned msize_in_bytes_log2,
7879                     const ZRegister& zt,
7880                     const PRegister& pg,
7881                     const SVEMemOperand& addr);
7882 
7883   // E.g. ld1b, ld1h, ...
7884   // This supports both contiguous and gather loads.
7885   void SVELd1Helper(unsigned msize_in_bytes_log2,
7886                     const ZRegister& zt,
7887                     const PRegisterZ& pg,
7888                     const SVEMemOperand& addr,
7889                     bool is_signed);
7890 
7891   // E.g. ld1rb, ld1rh, ...
7892   void SVELd1BroadcastHelper(unsigned msize_in_bytes_log2,
7893                              const ZRegister& zt,
7894                              const PRegisterZ& pg,
7895                              const SVEMemOperand& addr,
7896                              bool is_signed);
7897 
7898   // E.g. ldff1b, ldff1h, ...
7899   // This supports both contiguous and gather loads.
7900   void SVELdff1Helper(unsigned msize_in_bytes_log2,
7901                       const ZRegister& zt,
7902                       const PRegisterZ& pg,
7903                       const SVEMemOperand& addr,
7904                       bool is_signed);
7905 
7906   // Common code for the helpers above.
7907   void SVELdSt1Helper(unsigned msize_in_bytes_log2,
7908                       const ZRegister& zt,
7909                       const PRegister& pg,
7910                       const SVEMemOperand& addr,
7911                       bool is_signed,
7912                       Instr op);
7913 
7914   // Common code for the helpers above.
7915   void SVEScatterGatherHelper(unsigned msize_in_bytes_log2,
7916                               const ZRegister& zt,
7917                               const PRegister& pg,
7918                               const SVEMemOperand& addr,
7919                               bool is_load,
7920                               bool is_signed,
7921                               bool is_first_fault);
7922 
7923   // E.g. st2b, st3h, ...
7924   void SVESt234Helper(int num_regs,
7925                       const ZRegister& zt1,
7926                       const PRegister& pg,
7927                       const SVEMemOperand& addr);
7928 
7929   // E.g. ld2b, ld3h, ...
7930   void SVELd234Helper(int num_regs,
7931                       const ZRegister& zt1,
7932                       const PRegisterZ& pg,
7933                       const SVEMemOperand& addr);
7934 
7935   // Common code for the helpers above.
7936   void SVELdSt234Helper(int num_regs,
7937                         const ZRegister& zt1,
7938                         const PRegister& pg,
7939                         const SVEMemOperand& addr,
7940                         Instr op);
7941 
7942   // E.g. ld1qb, ld1qh, ldnt1b, ...
7943   void SVELd1St1ScaImmHelper(const ZRegister& zt,
7944                              const PRegister& pg,
7945                              const SVEMemOperand& addr,
7946                              Instr regoffset_op,
7947                              Instr immoffset_op,
7948                              int imm_divisor = 1);
7949 
7950   void SVELd1VecScaHelper(const ZRegister& zt,
7951                           const PRegister& pg,
7952                           const SVEMemOperand& addr,
7953                           uint32_t msize,
7954                           bool is_signed);
7955   void SVESt1VecScaHelper(const ZRegister& zt,
7956                           const PRegister& pg,
7957                           const SVEMemOperand& addr,
7958                           uint32_t msize);
7959 
7960   void Prefetch(PrefetchOperation op,
7961                 const MemOperand& addr,
7962                 LoadStoreScalingOption option = PreferScaledOffset);
7963   void Prefetch(int op,
7964                 const MemOperand& addr,
7965                 LoadStoreScalingOption option = PreferScaledOffset);
7966 
7967   // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
7968   // reports a bogus uninitialised warning then.
7969   void Logical(const Register& rd,
7970                const Register& rn,
7971                const Operand operand,
7972                LogicalOp op);
7973 
7974   void SVELogicalImmediate(const ZRegister& zd, uint64_t imm, Instr op);
7975 
7976   void LogicalImmediate(const Register& rd,
7977                         const Register& rn,
7978                         unsigned n,
7979                         unsigned imm_s,
7980                         unsigned imm_r,
7981                         LogicalOp op);
7982 
7983   void ConditionalCompare(const Register& rn,
7984                           const Operand& operand,
7985                           StatusFlags nzcv,
7986                           Condition cond,
7987                           ConditionalCompareOp op);
7988 
7989   void AddSubWithCarry(const Register& rd,
7990                        const Register& rn,
7991                        const Operand& operand,
7992                        FlagsUpdate S,
7993                        AddSubWithCarryOp op);
7994 
7995   void CompareVectors(const PRegisterWithLaneSize& pd,
7996                       const PRegisterZ& pg,
7997                       const ZRegister& zn,
7998                       const ZRegister& zm,
7999                       SVEIntCompareVectorsOp op);
8000 
8001   void CompareVectors(const PRegisterWithLaneSize& pd,
8002                       const PRegisterZ& pg,
8003                       const ZRegister& zn,
8004                       int imm,
8005                       SVEIntCompareSignedImmOp op);
8006 
8007   void CompareVectors(const PRegisterWithLaneSize& pd,
8008                       const PRegisterZ& pg,
8009                       const ZRegister& zn,
8010                       unsigned imm,
8011                       SVEIntCompareUnsignedImmOp op);
8012 
8013   void SVEIntAddSubtractImmUnpredicatedHelper(
8014       SVEIntAddSubtractImm_UnpredicatedOp op,
8015       const ZRegister& zd,
8016       int imm8,
8017       int shift);
8018 
8019   void SVEElementCountToRegisterHelper(Instr op,
8020                                        const Register& rd,
8021                                        int pattern,
8022                                        int multiplier);
8023 
8024   Instr EncodeSVEShiftLeftImmediate(int shift, int lane_size_in_bits);
8025 
8026   Instr EncodeSVEShiftRightImmediate(int shift, int lane_size_in_bits);
8027 
8028   void SVEBitwiseShiftImmediate(const ZRegister& zd,
8029                                 const ZRegister& zn,
8030                                 Instr encoded_imm,
8031                                 Instr op);
8032 
8033   void SVEBitwiseShiftImmediatePred(const ZRegister& zdn,
8034                                     const PRegisterM& pg,
8035                                     Instr encoded_imm,
8036                                     Instr op);
8037 
8038   Instr SVEMulIndexHelper(unsigned lane_size_in_bytes_log2,
8039                           const ZRegister& zm,
8040                           int index,
8041                           Instr op_h,
8042                           Instr op_s,
8043                           Instr op_d);
8044 
8045   Instr SVEMulLongIndexHelper(const ZRegister& zm, int index);
8046 
8047   Instr SVEMulComplexIndexHelper(const ZRegister& zm, int index);
8048 
8049   void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop,
8050                                                    const PRegister& pg,
8051                                                    const SVEMemOperand& addr,
8052                                                    int prefetch_size);
8053 
8054   void SVEContiguousPrefetchScalarPlusVectorHelper(PrefetchOperation prfop,
8055                                                    const PRegister& pg,
8056                                                    const SVEMemOperand& addr,
8057                                                    int prefetch_size);
8058 
8059   void SVEGatherPrefetchVectorPlusImmediateHelper(PrefetchOperation prfop,
8060                                                   const PRegister& pg,
8061                                                   const SVEMemOperand& addr,
8062                                                   int prefetch_size);
8063 
8064   void SVEGatherPrefetchScalarPlusImmediateHelper(PrefetchOperation prfop,
8065                                                   const PRegister& pg,
8066                                                   const SVEMemOperand& addr,
8067                                                   int prefetch_size);
8068 
8069   void SVEPrefetchHelper(PrefetchOperation prfop,
8070                          const PRegister& pg,
8071                          const SVEMemOperand& addr,
8072                          int prefetch_size);
8073 
SVEImmPrefetchOperation(PrefetchOperation prfop)8074   static Instr SVEImmPrefetchOperation(PrefetchOperation prfop) {
8075     // SVE only supports PLD and PST, not PLI.
8076     VIXL_ASSERT(((prfop >= PLDL1KEEP) && (prfop <= PLDL3STRM)) ||
8077                 ((prfop >= PSTL1KEEP) && (prfop <= PSTL3STRM)));
8078     // Check that we can simply map bits.
8079     VIXL_STATIC_ASSERT(PLDL1KEEP == 0b00000);
8080     VIXL_STATIC_ASSERT(PSTL1KEEP == 0b10000);
8081     // Remaining operations map directly.
8082     return ((prfop & 0b10000) >> 1) | (prfop & 0b00111);
8083   }
8084 
8085   // Functions for emulating operands not directly supported by the instruction
8086   // set.
8087   void EmitShift(const Register& rd,
8088                  const Register& rn,
8089                  Shift shift,
8090                  unsigned amount);
8091   void EmitExtendShift(const Register& rd,
8092                        const Register& rn,
8093                        Extend extend,
8094                        unsigned left_shift);
8095 
8096   void AddSub(const Register& rd,
8097               const Register& rn,
8098               const Operand& operand,
8099               FlagsUpdate S,
8100               AddSubOp op);
8101 
8102   void NEONTable(const VRegister& vd,
8103                  const VRegister& vn,
8104                  const VRegister& vm,
8105                  NEONTableOp op);
8106 
8107   // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
8108   // registers. Only simple loads are supported; sign- and zero-extension (such
8109   // as in LDPSW_x or LDRB_w) are not supported.
8110   static LoadStoreOp LoadOpFor(const CPURegister& rt);
8111   static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
8112                                        const CPURegister& rt2);
8113   static LoadStoreOp StoreOpFor(const CPURegister& rt);
8114   static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
8115                                         const CPURegister& rt2);
8116   static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
8117       const CPURegister& rt, const CPURegister& rt2);
8118   static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
8119       const CPURegister& rt, const CPURegister& rt2);
8120   static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
8121 
8122   // Convenience pass-through for CPU feature checks.
8123   bool CPUHas(CPUFeatures::Feature feature0,
8124               CPUFeatures::Feature feature1 = CPUFeatures::kNone,
8125               CPUFeatures::Feature feature2 = CPUFeatures::kNone,
8126               CPUFeatures::Feature feature3 = CPUFeatures::kNone) const {
8127     return cpu_features_.Has(feature0, feature1, feature2, feature3);
8128   }
8129 
8130   // Determine whether the target CPU has the specified registers, based on the
8131   // currently-enabled CPU features. Presence of a register does not imply
8132   // support for arbitrary operations on it. For example, CPUs with FP have H
8133   // registers, but most half-precision operations require the FPHalf feature.
8134   //
8135   // These are used to check CPU features in loads and stores that have the same
8136   // entry point for both integer and FP registers.
8137   bool CPUHas(const CPURegister& rt) const;
8138   bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const;
8139 
8140   bool CPUHas(SystemRegister sysreg) const;
8141 
8142  private:
8143   static uint32_t FP16ToImm8(Float16 imm);
8144   static uint32_t FP32ToImm8(float imm);
8145   static uint32_t FP64ToImm8(double imm);
8146 
8147   // Instruction helpers.
8148   void MoveWide(const Register& rd,
8149                 uint64_t imm,
8150                 int shift,
8151                 MoveWideImmediateOp mov_op);
8152   void DataProcShiftedRegister(const Register& rd,
8153                                const Register& rn,
8154                                const Operand& operand,
8155                                FlagsUpdate S,
8156                                Instr op);
8157   void DataProcExtendedRegister(const Register& rd,
8158                                 const Register& rn,
8159                                 const Operand& operand,
8160                                 FlagsUpdate S,
8161                                 Instr op);
8162   void LoadStorePairNonTemporal(const CPURegister& rt,
8163                                 const CPURegister& rt2,
8164                                 const MemOperand& addr,
8165                                 LoadStorePairNonTemporalOp op);
8166   void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
8167   void ConditionalSelect(const Register& rd,
8168                          const Register& rn,
8169                          const Register& rm,
8170                          Condition cond,
8171                          ConditionalSelectOp op);
8172   void DataProcessing1Source(const Register& rd,
8173                              const Register& rn,
8174                              DataProcessing1SourceOp op);
8175   void DataProcessing3Source(const Register& rd,
8176                              const Register& rn,
8177                              const Register& rm,
8178                              const Register& ra,
8179                              DataProcessing3SourceOp op);
8180   void FPDataProcessing1Source(const VRegister& fd,
8181                                const VRegister& fn,
8182                                FPDataProcessing1SourceOp op);
8183   void FPDataProcessing3Source(const VRegister& fd,
8184                                const VRegister& fn,
8185                                const VRegister& fm,
8186                                const VRegister& fa,
8187                                FPDataProcessing3SourceOp op);
8188   void NEONAcrossLanesL(const VRegister& vd,
8189                         const VRegister& vn,
8190                         NEONAcrossLanesOp op);
8191   void NEONAcrossLanes(const VRegister& vd,
8192                        const VRegister& vn,
8193                        NEONAcrossLanesOp op,
8194                        Instr op_half);
8195   void NEONModifiedImmShiftLsl(const VRegister& vd,
8196                                const int imm8,
8197                                const int left_shift,
8198                                NEONModifiedImmediateOp op);
8199   void NEONModifiedImmShiftMsl(const VRegister& vd,
8200                                const int imm8,
8201                                const int shift_amount,
8202                                NEONModifiedImmediateOp op);
8203   void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop);
8204   void NEON3Same(const VRegister& vd,
8205                  const VRegister& vn,
8206                  const VRegister& vm,
8207                  NEON3SameOp vop);
8208   void NEON3SameFP16(const VRegister& vd,
8209                      const VRegister& vn,
8210                      const VRegister& vm,
8211                      Instr op);
8212   void NEONFP3Same(const VRegister& vd,
8213                    const VRegister& vn,
8214                    const VRegister& vm,
8215                    Instr op);
8216   void NEON3DifferentL(const VRegister& vd,
8217                        const VRegister& vn,
8218                        const VRegister& vm,
8219                        NEON3DifferentOp vop);
8220   void NEON3DifferentW(const VRegister& vd,
8221                        const VRegister& vn,
8222                        const VRegister& vm,
8223                        NEON3DifferentOp vop);
8224   void NEON3DifferentHN(const VRegister& vd,
8225                         const VRegister& vn,
8226                         const VRegister& vm,
8227                         NEON3DifferentOp vop);
8228   void NEONFP2RegMisc(const VRegister& vd,
8229                       const VRegister& vn,
8230                       NEON2RegMiscOp vop,
8231                       double value = 0.0);
8232   void NEONFP2RegMiscFP16(const VRegister& vd,
8233                           const VRegister& vn,
8234                           NEON2RegMiscFP16Op vop,
8235                           double value = 0.0);
8236   void NEON2RegMisc(const VRegister& vd,
8237                     const VRegister& vn,
8238                     NEON2RegMiscOp vop,
8239                     int value = 0);
8240   void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
8241   void NEONFP2RegMiscFP16(const VRegister& vd, const VRegister& vn, Instr op);
8242   void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
8243   void NEONPerm(const VRegister& vd,
8244                 const VRegister& vn,
8245                 const VRegister& vm,
8246                 NEONPermOp op);
8247   void NEONFPByElement(const VRegister& vd,
8248                        const VRegister& vn,
8249                        const VRegister& vm,
8250                        int vm_index,
8251                        NEONByIndexedElementOp op,
8252                        NEONByIndexedElementOp op_half);
8253   void NEONByElement(const VRegister& vd,
8254                      const VRegister& vn,
8255                      const VRegister& vm,
8256                      int vm_index,
8257                      NEONByIndexedElementOp op);
8258   void NEONByElementL(const VRegister& vd,
8259                       const VRegister& vn,
8260                       const VRegister& vm,
8261                       int vm_index,
8262                       NEONByIndexedElementOp op);
8263   void NEONShiftImmediate(const VRegister& vd,
8264                           const VRegister& vn,
8265                           NEONShiftImmediateOp op,
8266                           int immh_immb);
8267   void NEONShiftLeftImmediate(const VRegister& vd,
8268                               const VRegister& vn,
8269                               int shift,
8270                               NEONShiftImmediateOp op);
8271   void NEONShiftRightImmediate(const VRegister& vd,
8272                                const VRegister& vn,
8273                                int shift,
8274                                NEONShiftImmediateOp op);
8275   void NEONShiftImmediateL(const VRegister& vd,
8276                            const VRegister& vn,
8277                            int shift,
8278                            NEONShiftImmediateOp op);
8279   void NEONShiftImmediateN(const VRegister& vd,
8280                            const VRegister& vn,
8281                            int shift,
8282                            NEONShiftImmediateOp op);
8283   void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
8284 
8285   // If *shift is -1, find values of *imm8 and *shift such that IsInt8(*imm8)
8286   // and *shift is either 0 or 8. Otherwise, leave the values unchanged.
8287   void ResolveSVEImm8Shift(int* imm8, int* shift);
8288 
8289   Instr LoadStoreStructAddrModeField(const MemOperand& addr);
8290 
8291   // Encode the specified MemOperand for the specified access size and scaling
8292   // preference.
8293   Instr LoadStoreMemOperand(const MemOperand& addr,
8294                             unsigned access_size_in_bytes_log2,
8295                             LoadStoreScalingOption option);
8296 
8297   // Link the current (not-yet-emitted) instruction to the specified label, then
8298   // return an offset to be encoded in the instruction. If the label is not yet
8299   // bound, an offset of 0 is returned.
8300   ptrdiff_t LinkAndGetByteOffsetTo(Label* label);
8301   ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label);
8302   ptrdiff_t LinkAndGetPageOffsetTo(Label* label);
8303 
8304   // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
8305   template <int element_shift>
8306   ptrdiff_t LinkAndGetOffsetTo(Label* label);
8307 
8308   // Literal load offset are in words (32-bit).
8309   ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
8310 
8311   // Emit the instruction in buffer_.
Emit(Instr instruction)8312   void Emit(Instr instruction) {
8313     VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
8314     VIXL_ASSERT(AllowAssembler());
8315     GetBuffer()->Emit32(instruction);
8316   }
8317 
8318   PositionIndependentCodeOption pic_;
8319 
8320   CPUFeatures cpu_features_;
8321 };
8322 
8323 
8324 template <typename T>
UpdateValue(T new_value,const Assembler * assembler)8325 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) {
8326   return UpdateValue(new_value,
8327                      assembler->GetBuffer().GetStartAddress<uint8_t*>());
8328 }
8329 
8330 
8331 template <typename T>
UpdateValue(T high64,T low64,const Assembler * assembler)8332 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) {
8333   return UpdateValue(high64,
8334                      low64,
8335                      assembler->GetBuffer().GetStartAddress<uint8_t*>());
8336 }
8337 
8338 
8339 }  // namespace aarch64
8340 
8341 // Required InvalSet template specialisations.
8342 // TODO: These template specialisations should not live in this file.  Move
8343 // Label out of the aarch64 namespace in order to share its implementation
8344 // later.
8345 #define INVAL_SET_TEMPLATE_PARAMETERS                                \
8346   ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t,         \
8347       aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \
8348       aarch64::Label::kReclaimFactor
8349 template <>
GetKey(const ptrdiff_t & element)8350 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::GetKey(
8351     const ptrdiff_t& element) {
8352   return element;
8353 }
8354 template <>
SetKey(ptrdiff_t * element,ptrdiff_t key)8355 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(ptrdiff_t* element,
8356                                                             ptrdiff_t key) {
8357   *element = key;
8358 }
8359 #undef INVAL_SET_TEMPLATE_PARAMETERS
8360 
8361 }  // namespace vixl
8362 
8363 #endif  // VIXL_AARCH64_ASSEMBLER_AARCH64_H_
8364