• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_
28 #define VIXL_AARCH64_ASSEMBLER_AARCH64_H_
29 
30 #include "../assembler-base-vixl.h"
31 #include "../code-generation-scopes-vixl.h"
32 #include "../cpu-features.h"
33 #include "../globals-vixl.h"
34 #include "../invalset-vixl.h"
35 #include "../utils-vixl.h"
36 
37 #include "operands-aarch64.h"
38 
39 namespace vixl {
40 namespace aarch64 {
41 
42 class LabelTestHelper;  // Forward declaration.
43 
44 
45 class Label {
46  public:
Label()47   Label() : location_(kLocationUnbound) {}
~Label()48   ~Label() {
49     // All links to a label must have been resolved before it is destructed.
50     VIXL_ASSERT(!IsLinked());
51   }
52 
IsBound()53   bool IsBound() const { return location_ >= 0; }
IsLinked()54   bool IsLinked() const { return !links_.empty(); }
55 
GetLocation()56   ptrdiff_t GetLocation() const { return location_; }
57   VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) {
58     return GetLocation();
59   }
60 
61   static const int kNPreallocatedLinks = 4;
62   static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
63   static const size_t kReclaimFrom = 512;
64   static const size_t kReclaimFactor = 2;
65 
66   typedef InvalSet<ptrdiff_t,
67                    kNPreallocatedLinks,
68                    ptrdiff_t,
69                    kInvalidLinkKey,
70                    kReclaimFrom,
71                    kReclaimFactor>
72       LinksSetBase;
73   typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
74 
75  private:
76   class LinksSet : public LinksSetBase {
77    public:
LinksSet()78     LinksSet() : LinksSetBase() {}
79   };
80 
81   // Allows iterating over the links of a label. The behaviour is undefined if
82   // the list of links is modified in any way while iterating.
83   class LabelLinksIterator : public LabelLinksIteratorBase {
84    public:
LabelLinksIterator(Label * label)85     explicit LabelLinksIterator(Label* label)
86         : LabelLinksIteratorBase(&label->links_) {}
87 
88     // TODO: Remove these and use the STL-like interface instead.
89     using LabelLinksIteratorBase::Advance;
90     using LabelLinksIteratorBase::Current;
91   };
92 
Bind(ptrdiff_t location)93   void Bind(ptrdiff_t location) {
94     // Labels can only be bound once.
95     VIXL_ASSERT(!IsBound());
96     location_ = location;
97   }
98 
AddLink(ptrdiff_t instruction)99   void AddLink(ptrdiff_t instruction) {
100     // If a label is bound, the assembler already has the information it needs
101     // to write the instruction, so there is no need to add it to links_.
102     VIXL_ASSERT(!IsBound());
103     links_.insert(instruction);
104   }
105 
DeleteLink(ptrdiff_t instruction)106   void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); }
107 
ClearAllLinks()108   void ClearAllLinks() { links_.clear(); }
109 
110   // TODO: The comment below considers average case complexity for our
111   // usual use-cases. The elements of interest are:
112   // - Branches to a label are emitted in order: branch instructions to a label
113   // are generated at an offset in the code generation buffer greater than any
114   // other branch to that same label already generated. As an example, this can
115   // be broken when an instruction is patched to become a branch. Note that the
116   // code will still work, but the complexity considerations below may locally
117   // not apply any more.
118   // - Veneers are generated in order: for multiple branches of the same type
119   // branching to the same unbound label going out of range, veneers are
120   // generated in growing order of the branch instruction offset from the start
121   // of the buffer.
122   //
123   // When creating a veneer for a branch going out of range, the link for this
124   // branch needs to be removed from this `links_`. Since all branches are
125   // tracked in one underlying InvalSet, the complexity for this deletion is the
126   // same as for finding the element, ie. O(n), where n is the number of links
127   // in the set.
128   // This could be reduced to O(1) by using the same trick as used when tracking
129   // branch information for veneers: split the container to use one set per type
130   // of branch. With that setup, when a veneer is created and the link needs to
131   // be deleted, if the two points above hold, it must be the minimum element of
132   // the set for its type of branch, and that minimum element will be accessible
133   // in O(1).
134 
135   // The offsets of the instructions that have linked to this label.
136   LinksSet links_;
137   // The label location.
138   ptrdiff_t location_;
139 
140   static const ptrdiff_t kLocationUnbound = -1;
141 
142 // It is not safe to copy labels, so disable the copy constructor and operator
143 // by declaring them private (without an implementation).
144 #if __cplusplus >= 201103L
145   Label(const Label&) = delete;
146   void operator=(const Label&) = delete;
147 #else
148   Label(const Label&);
149   void operator=(const Label&);
150 #endif
151 
152   // The Assembler class is responsible for binding and linking labels, since
153   // the stored offsets need to be consistent with the Assembler's buffer.
154   friend class Assembler;
155   // The MacroAssembler and VeneerPool handle resolution of branches to distant
156   // targets.
157   friend class MacroAssembler;
158   friend class VeneerPool;
159 };
160 
161 
162 class Assembler;
163 class LiteralPool;
164 
165 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
166 // stream and loaded through a pc relative load. The same literal can be
167 // referred to by multiple instructions but a literal can only reside at one
168 // place in memory. A literal can be used by a load before or after being
169 // placed in memory.
170 //
171 // Internally an offset of 0 is associated with a literal which has been
172 // neither used nor placed. Then two possibilities arise:
173 //  1) the label is placed, the offset (stored as offset + 1) is used to
174 //     resolve any subsequent load using the label.
175 //  2) the label is not placed and offset is the offset of the last load using
176 //     the literal (stored as -offset -1). If multiple loads refer to this
177 //     literal then the last load holds the offset of the preceding load and
178 //     all loads form a chain. Once the offset is placed all the loads in the
179 //     chain are resolved and future loads fall back to possibility 1.
180 class RawLiteral {
181  public:
182   enum DeletionPolicy {
183     kDeletedOnPlacementByPool,
184     kDeletedOnPoolDestruction,
185     kManuallyDeleted
186   };
187 
188   RawLiteral(size_t size,
189              LiteralPool* literal_pool,
190              DeletionPolicy deletion_policy = kManuallyDeleted);
191 
192   // The literal pool only sees and deletes `RawLiteral*` pointers, but they are
193   // actually pointing to `Literal<T>` objects.
~RawLiteral()194   virtual ~RawLiteral() {}
195 
GetSize()196   size_t GetSize() const {
197     VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
198     VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
199     VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) ||
200                 (size_ == kQRegSizeInBytes));
201     return size_;
202   }
203   VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); }
204 
GetRawValue128Low64()205   uint64_t GetRawValue128Low64() const {
206     VIXL_ASSERT(size_ == kQRegSizeInBytes);
207     return low64_;
208   }
209   VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) {
210     return GetRawValue128Low64();
211   }
212 
GetRawValue128High64()213   uint64_t GetRawValue128High64() const {
214     VIXL_ASSERT(size_ == kQRegSizeInBytes);
215     return high64_;
216   }
217   VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) {
218     return GetRawValue128High64();
219   }
220 
GetRawValue64()221   uint64_t GetRawValue64() const {
222     VIXL_ASSERT(size_ == kXRegSizeInBytes);
223     VIXL_ASSERT(high64_ == 0);
224     return low64_;
225   }
226   VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) {
227     return GetRawValue64();
228   }
229 
GetRawValue32()230   uint32_t GetRawValue32() const {
231     VIXL_ASSERT(size_ == kWRegSizeInBytes);
232     VIXL_ASSERT(high64_ == 0);
233     VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_));
234     return static_cast<uint32_t>(low64_);
235   }
236   VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) {
237     return GetRawValue32();
238   }
239 
IsUsed()240   bool IsUsed() const { return offset_ < 0; }
IsPlaced()241   bool IsPlaced() const { return offset_ > 0; }
242 
GetLiteralPool()243   LiteralPool* GetLiteralPool() const { return literal_pool_; }
244 
GetOffset()245   ptrdiff_t GetOffset() const {
246     VIXL_ASSERT(IsPlaced());
247     return offset_ - 1;
248   }
249   VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); }
250 
251  protected:
SetOffset(ptrdiff_t offset)252   void SetOffset(ptrdiff_t offset) {
253     VIXL_ASSERT(offset >= 0);
254     VIXL_ASSERT(IsWordAligned(offset));
255     VIXL_ASSERT(!IsPlaced());
256     offset_ = offset + 1;
257   }
set_offset(ptrdiff_t offset)258   VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) {
259     SetOffset(offset);
260   }
261 
GetLastUse()262   ptrdiff_t GetLastUse() const {
263     VIXL_ASSERT(IsUsed());
264     return -offset_ - 1;
265   }
266   VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); }
267 
SetLastUse(ptrdiff_t offset)268   void SetLastUse(ptrdiff_t offset) {
269     VIXL_ASSERT(offset >= 0);
270     VIXL_ASSERT(IsWordAligned(offset));
271     VIXL_ASSERT(!IsPlaced());
272     offset_ = -offset - 1;
273   }
set_last_use(ptrdiff_t offset)274   VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) {
275     SetLastUse(offset);
276   }
277 
278   size_t size_;
279   ptrdiff_t offset_;
280   uint64_t low64_;
281   uint64_t high64_;
282 
283  private:
284   LiteralPool* literal_pool_;
285   DeletionPolicy deletion_policy_;
286 
287   friend class Assembler;
288   friend class LiteralPool;
289 };
290 
291 
292 template <typename T>
293 class Literal : public RawLiteral {
294  public:
295   explicit Literal(T value,
296                    LiteralPool* literal_pool = NULL,
297                    RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(sizeof (value),literal_pool,ownership)298       : RawLiteral(sizeof(value), literal_pool, ownership) {
299     VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes);
300     UpdateValue(value);
301   }
302 
303   Literal(T high64,
304           T low64,
305           LiteralPool* literal_pool = NULL,
306           RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(kQRegSizeInBytes,literal_pool,ownership)307       : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) {
308     VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2));
309     UpdateValue(high64, low64);
310   }
311 
~Literal()312   virtual ~Literal() {}
313 
314   // Update the value of this literal, if necessary by rewriting the value in
315   // the pool.
316   // If the literal has already been placed in a literal pool, the address of
317   // the start of the code buffer must be provided, as the literal only knows it
318   // offset from there. This also allows patching the value after the code has
319   // been moved in memory.
320   void UpdateValue(T new_value, uint8_t* code_buffer = NULL) {
321     VIXL_ASSERT(sizeof(new_value) == size_);
322     memcpy(&low64_, &new_value, sizeof(new_value));
323     if (IsPlaced()) {
324       VIXL_ASSERT(code_buffer != NULL);
325       RewriteValueInCode(code_buffer);
326     }
327   }
328 
329   void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) {
330     VIXL_ASSERT(sizeof(low64) == size_ / 2);
331     memcpy(&low64_, &low64, sizeof(low64));
332     memcpy(&high64_, &high64, sizeof(high64));
333     if (IsPlaced()) {
334       VIXL_ASSERT(code_buffer != NULL);
335       RewriteValueInCode(code_buffer);
336     }
337   }
338 
339   void UpdateValue(T new_value, const Assembler* assembler);
340   void UpdateValue(T high64, T low64, const Assembler* assembler);
341 
342  private:
RewriteValueInCode(uint8_t * code_buffer)343   void RewriteValueInCode(uint8_t* code_buffer) {
344     VIXL_ASSERT(IsPlaced());
345     VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
346     switch (GetSize()) {
347       case kSRegSizeInBytes:
348         *reinterpret_cast<uint32_t*>(code_buffer + GetOffset()) =
349             GetRawValue32();
350         break;
351       case kDRegSizeInBytes:
352         *reinterpret_cast<uint64_t*>(code_buffer + GetOffset()) =
353             GetRawValue64();
354         break;
355       default:
356         VIXL_ASSERT(GetSize() == kQRegSizeInBytes);
357         uint64_t* base_address =
358             reinterpret_cast<uint64_t*>(code_buffer + GetOffset());
359         *base_address = GetRawValue128Low64();
360         *(base_address + 1) = GetRawValue128High64();
361     }
362   }
363 };
364 
365 
366 // Control whether or not position-independent code should be emitted.
367 enum PositionIndependentCodeOption {
368   // All code generated will be position-independent; all branches and
369   // references to labels generated with the Label class will use PC-relative
370   // addressing.
371   PositionIndependentCode,
372 
373   // Allow VIXL to generate code that refers to absolute addresses. With this
374   // option, it will not be possible to copy the code buffer and run it from a
375   // different address; code must be generated in its final location.
376   PositionDependentCode,
377 
378   // Allow VIXL to assume that the bottom 12 bits of the address will be
379   // constant, but that the top 48 bits may change. This allows `adrp` to
380   // function in systems which copy code between pages, but otherwise maintain
381   // 4KB page alignment.
382   PageOffsetDependentCode
383 };
384 
385 
386 // Control how scaled- and unscaled-offset loads and stores are generated.
387 enum LoadStoreScalingOption {
388   // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
389   // register-offset, pre-index or post-index instructions if necessary.
390   PreferScaledOffset,
391 
392   // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
393   // register-offset, pre-index or post-index instructions if necessary.
394   PreferUnscaledOffset,
395 
396   // Require scaled-immediate-offset instructions.
397   RequireScaledOffset,
398 
399   // Require unscaled-immediate-offset instructions.
400   RequireUnscaledOffset
401 };
402 
403 
404 // Assembler.
405 class Assembler : public vixl::internal::AssemblerBase {
406  public:
407   explicit Assembler(
408       PositionIndependentCodeOption pic = PositionIndependentCode)
pic_(pic)409       : pic_(pic), cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
410   explicit Assembler(
411       size_t capacity,
412       PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(capacity)413       : AssemblerBase(capacity),
414         pic_(pic),
415         cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
416   Assembler(byte* buffer,
417             size_t capacity,
418             PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(buffer,capacity)419       : AssemblerBase(buffer, capacity),
420         pic_(pic),
421         cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
422 
423   // Upon destruction, the code will assert that one of the following is true:
424   //  * The Assembler object has not been used.
425   //  * Nothing has been emitted since the last Reset() call.
426   //  * Nothing has been emitted since the last FinalizeCode() call.
~Assembler()427   ~Assembler() {}
428 
429   // System functions.
430 
431   // Start generating code from the beginning of the buffer, discarding any code
432   // and data that has already been emitted into the buffer.
433   void Reset();
434 
435   // Bind a label to the current PC.
436   void bind(Label* label);
437 
438   // Bind a label to a specified offset from the start of the buffer.
439   void BindToOffset(Label* label, ptrdiff_t offset);
440 
441   // Place a literal at the current PC.
442   void place(RawLiteral* literal);
443 
444   VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) {
445     return GetCursorOffset();
446   }
447 
448   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
449                   ptrdiff_t GetBufferEndOffset() const) {
450     return static_cast<ptrdiff_t>(GetBuffer().GetCapacity());
451   }
452   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
453                   ptrdiff_t BufferEndOffset() const) {
454     return GetBuffer().GetCapacity();
455   }
456 
457   // Return the address of a bound label.
458   template <typename T>
GetLabelAddress(const Label * label)459   T GetLabelAddress(const Label* label) const {
460     VIXL_ASSERT(label->IsBound());
461     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
462     return GetBuffer().GetOffsetAddress<T>(label->GetLocation());
463   }
464 
GetInstructionAt(ptrdiff_t instruction_offset)465   Instruction* GetInstructionAt(ptrdiff_t instruction_offset) {
466     return GetBuffer()->GetOffsetAddress<Instruction*>(instruction_offset);
467   }
468   VIXL_DEPRECATED("GetInstructionAt",
469                   Instruction* InstructionAt(ptrdiff_t instruction_offset)) {
470     return GetInstructionAt(instruction_offset);
471   }
472 
GetInstructionOffset(Instruction * instruction)473   ptrdiff_t GetInstructionOffset(Instruction* instruction) {
474     VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
475     ptrdiff_t offset =
476         instruction - GetBuffer()->GetStartAddress<Instruction*>();
477     VIXL_ASSERT((0 <= offset) &&
478                 (offset < static_cast<ptrdiff_t>(GetBuffer()->GetCapacity())));
479     return offset;
480   }
481   VIXL_DEPRECATED("GetInstructionOffset",
482                   ptrdiff_t InstructionOffset(Instruction* instruction)) {
483     return GetInstructionOffset(instruction);
484   }
485 
486   // Instruction set functions.
487 
488   // Branch / Jump instructions.
489 
490   // Branch to register.
491   void br(const Register& xn);
492 
493   // Branch with link to register.
494   void blr(const Register& xn);
495 
496   // Branch to register with return hint.
497   void ret(const Register& xn = lr);
498 
499   // Branch to register, with pointer authentication. Using key A and a modifier
500   // of zero [Armv8.3].
501   void braaz(const Register& xn);
502 
503   // Branch to register, with pointer authentication. Using key B and a modifier
504   // of zero [Armv8.3].
505   void brabz(const Register& xn);
506 
507   // Branch with link to register, with pointer authentication. Using key A and
508   // a modifier of zero [Armv8.3].
509   void blraaz(const Register& xn);
510 
511   // Branch with link to register, with pointer authentication. Using key B and
512   // a modifier of zero [Armv8.3].
513   void blrabz(const Register& xn);
514 
515   // Return from subroutine, with pointer authentication. Using key A [Armv8.3].
516   void retaa();
517 
518   // Return from subroutine, with pointer authentication. Using key B [Armv8.3].
519   void retab();
520 
521   // Branch to register, with pointer authentication. Using key A [Armv8.3].
522   void braa(const Register& xn, const Register& xm);
523 
524   // Branch to register, with pointer authentication. Using key B [Armv8.3].
525   void brab(const Register& xn, const Register& xm);
526 
527   // Branch with link to register, with pointer authentication. Using key A
528   // [Armv8.3].
529   void blraa(const Register& xn, const Register& xm);
530 
531   // Branch with link to register, with pointer authentication. Using key B
532   // [Armv8.3].
533   void blrab(const Register& xn, const Register& xm);
534 
535   // Unconditional branch to label.
536   void b(Label* label);
537 
538   // Conditional branch to label.
539   void b(Label* label, Condition cond);
540 
541   // Unconditional branch to PC offset.
542   void b(int64_t imm26);
543 
544   // Conditional branch to PC offset.
545   void b(int64_t imm19, Condition cond);
546 
547   // Branch with link to label.
548   void bl(Label* label);
549 
550   // Branch with link to PC offset.
551   void bl(int64_t imm26);
552 
553   // Compare and branch to label if zero.
554   void cbz(const Register& rt, Label* label);
555 
556   // Compare and branch to PC offset if zero.
557   void cbz(const Register& rt, int64_t imm19);
558 
559   // Compare and branch to label if not zero.
560   void cbnz(const Register& rt, Label* label);
561 
562   // Compare and branch to PC offset if not zero.
563   void cbnz(const Register& rt, int64_t imm19);
564 
565   // Table lookup from one register.
566   void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
567 
568   // Table lookup from two registers.
569   void tbl(const VRegister& vd,
570            const VRegister& vn,
571            const VRegister& vn2,
572            const VRegister& vm);
573 
574   // Table lookup from three registers.
575   void tbl(const VRegister& vd,
576            const VRegister& vn,
577            const VRegister& vn2,
578            const VRegister& vn3,
579            const VRegister& vm);
580 
581   // Table lookup from four registers.
582   void tbl(const VRegister& vd,
583            const VRegister& vn,
584            const VRegister& vn2,
585            const VRegister& vn3,
586            const VRegister& vn4,
587            const VRegister& vm);
588 
589   // Table lookup extension from one register.
590   void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
591 
592   // Table lookup extension from two registers.
593   void tbx(const VRegister& vd,
594            const VRegister& vn,
595            const VRegister& vn2,
596            const VRegister& vm);
597 
598   // Table lookup extension from three registers.
599   void tbx(const VRegister& vd,
600            const VRegister& vn,
601            const VRegister& vn2,
602            const VRegister& vn3,
603            const VRegister& vm);
604 
605   // Table lookup extension from four registers.
606   void tbx(const VRegister& vd,
607            const VRegister& vn,
608            const VRegister& vn2,
609            const VRegister& vn3,
610            const VRegister& vn4,
611            const VRegister& vm);
612 
613   // Test bit and branch to label if zero.
614   void tbz(const Register& rt, unsigned bit_pos, Label* label);
615 
616   // Test bit and branch to PC offset if zero.
617   void tbz(const Register& rt, unsigned bit_pos, int64_t imm14);
618 
619   // Test bit and branch to label if not zero.
620   void tbnz(const Register& rt, unsigned bit_pos, Label* label);
621 
622   // Test bit and branch to PC offset if not zero.
623   void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14);
624 
625   // Address calculation instructions.
626   // Calculate a PC-relative address. Unlike for branches the offset in adr is
627   // unscaled (i.e. the result can be unaligned).
628 
629   // Calculate the address of a label.
630   void adr(const Register& xd, Label* label);
631 
632   // Calculate the address of a PC offset.
633   void adr(const Register& xd, int64_t imm21);
634 
635   // Calculate the page address of a label.
636   void adrp(const Register& xd, Label* label);
637 
638   // Calculate the page address of a PC offset.
639   void adrp(const Register& xd, int64_t imm21);
640 
641   // Data Processing instructions.
642 
643   // Add.
644   void add(const Register& rd, const Register& rn, const Operand& operand);
645 
646   // Add and update status flags.
647   void adds(const Register& rd, const Register& rn, const Operand& operand);
648 
649   // Compare negative.
650   void cmn(const Register& rn, const Operand& operand);
651 
652   // Subtract.
653   void sub(const Register& rd, const Register& rn, const Operand& operand);
654 
655   // Subtract and update status flags.
656   void subs(const Register& rd, const Register& rn, const Operand& operand);
657 
658   // Compare.
659   void cmp(const Register& rn, const Operand& operand);
660 
661   // Negate.
662   void neg(const Register& rd, const Operand& operand);
663 
664   // Negate and update status flags.
665   void negs(const Register& rd, const Operand& operand);
666 
667   // Add with carry bit.
668   void adc(const Register& rd, const Register& rn, const Operand& operand);
669 
670   // Add with carry bit and update status flags.
671   void adcs(const Register& rd, const Register& rn, const Operand& operand);
672 
673   // Subtract with carry bit.
674   void sbc(const Register& rd, const Register& rn, const Operand& operand);
675 
676   // Subtract with carry bit and update status flags.
677   void sbcs(const Register& rd, const Register& rn, const Operand& operand);
678 
679   // Rotate register right and insert into NZCV flags under the control of a
680   // mask [Armv8.4].
681   void rmif(const Register& xn, unsigned rotation, StatusFlags flags);
682 
683   // Set NZCV flags from register, treated as an 8-bit value [Armv8.4].
684   void setf8(const Register& rn);
685 
686   // Set NZCV flags from register, treated as an 16-bit value [Armv8.4].
687   void setf16(const Register& rn);
688 
689   // Negate with carry bit.
690   void ngc(const Register& rd, const Operand& operand);
691 
692   // Negate with carry bit and update status flags.
693   void ngcs(const Register& rd, const Operand& operand);
694 
695   // Logical instructions.
696 
697   // Bitwise and (A & B).
698   void and_(const Register& rd, const Register& rn, const Operand& operand);
699 
700   // Bitwise and (A & B) and update status flags.
701   void ands(const Register& rd, const Register& rn, const Operand& operand);
702 
703   // Bit test and set flags.
704   void tst(const Register& rn, const Operand& operand);
705 
706   // Bit clear (A & ~B).
707   void bic(const Register& rd, const Register& rn, const Operand& operand);
708 
709   // Bit clear (A & ~B) and update status flags.
710   void bics(const Register& rd, const Register& rn, const Operand& operand);
711 
712   // Bitwise or (A | B).
713   void orr(const Register& rd, const Register& rn, const Operand& operand);
714 
715   // Bitwise nor (A | ~B).
716   void orn(const Register& rd, const Register& rn, const Operand& operand);
717 
718   // Bitwise eor/xor (A ^ B).
719   void eor(const Register& rd, const Register& rn, const Operand& operand);
720 
721   // Bitwise enor/xnor (A ^ ~B).
722   void eon(const Register& rd, const Register& rn, const Operand& operand);
723 
724   // Logical shift left by variable.
725   void lslv(const Register& rd, const Register& rn, const Register& rm);
726 
727   // Logical shift right by variable.
728   void lsrv(const Register& rd, const Register& rn, const Register& rm);
729 
730   // Arithmetic shift right by variable.
731   void asrv(const Register& rd, const Register& rn, const Register& rm);
732 
733   // Rotate right by variable.
734   void rorv(const Register& rd, const Register& rn, const Register& rm);
735 
736   // Bitfield instructions.
737 
738   // Bitfield move.
739   void bfm(const Register& rd,
740            const Register& rn,
741            unsigned immr,
742            unsigned imms);
743 
744   // Signed bitfield move.
745   void sbfm(const Register& rd,
746             const Register& rn,
747             unsigned immr,
748             unsigned imms);
749 
750   // Unsigned bitfield move.
751   void ubfm(const Register& rd,
752             const Register& rn,
753             unsigned immr,
754             unsigned imms);
755 
756   // Bfm aliases.
757 
758   // Bitfield insert.
bfi(const Register & rd,const Register & rn,unsigned lsb,unsigned width)759   void bfi(const Register& rd,
760            const Register& rn,
761            unsigned lsb,
762            unsigned width) {
763     VIXL_ASSERT(width >= 1);
764     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
765     bfm(rd,
766         rn,
767         (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
768         width - 1);
769   }
770 
771   // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,unsigned lsb,unsigned width)772   void bfxil(const Register& rd,
773              const Register& rn,
774              unsigned lsb,
775              unsigned width) {
776     VIXL_ASSERT(width >= 1);
777     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
778     bfm(rd, rn, lsb, lsb + width - 1);
779   }
780 
781   // Bitfield clear [Armv8.2].
bfc(const Register & rd,unsigned lsb,unsigned width)782   void bfc(const Register& rd, unsigned lsb, unsigned width) {
783     bfi(rd, AppropriateZeroRegFor(rd), lsb, width);
784   }
785 
786   // Sbfm aliases.
787 
788   // Arithmetic shift right.
asr(const Register & rd,const Register & rn,unsigned shift)789   void asr(const Register& rd, const Register& rn, unsigned shift) {
790     VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
791     sbfm(rd, rn, shift, rd.GetSizeInBits() - 1);
792   }
793 
794   // Signed bitfield insert with zero at right.
sbfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)795   void sbfiz(const Register& rd,
796              const Register& rn,
797              unsigned lsb,
798              unsigned width) {
799     VIXL_ASSERT(width >= 1);
800     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
801     sbfm(rd,
802          rn,
803          (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
804          width - 1);
805   }
806 
807   // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)808   void sbfx(const Register& rd,
809             const Register& rn,
810             unsigned lsb,
811             unsigned width) {
812     VIXL_ASSERT(width >= 1);
813     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
814     sbfm(rd, rn, lsb, lsb + width - 1);
815   }
816 
817   // Signed extend byte.
sxtb(const Register & rd,const Register & rn)818   void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); }
819 
820   // Signed extend halfword.
sxth(const Register & rd,const Register & rn)821   void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); }
822 
823   // Signed extend word.
sxtw(const Register & rd,const Register & rn)824   void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); }
825 
826   // Ubfm aliases.
827 
828   // Logical shift left.
lsl(const Register & rd,const Register & rn,unsigned shift)829   void lsl(const Register& rd, const Register& rn, unsigned shift) {
830     unsigned reg_size = rd.GetSizeInBits();
831     VIXL_ASSERT(shift < reg_size);
832     ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
833   }
834 
835   // Logical shift right.
lsr(const Register & rd,const Register & rn,unsigned shift)836   void lsr(const Register& rd, const Register& rn, unsigned shift) {
837     VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
838     ubfm(rd, rn, shift, rd.GetSizeInBits() - 1);
839   }
840 
841   // Unsigned bitfield insert with zero at right.
ubfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)842   void ubfiz(const Register& rd,
843              const Register& rn,
844              unsigned lsb,
845              unsigned width) {
846     VIXL_ASSERT(width >= 1);
847     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
848     ubfm(rd,
849          rn,
850          (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
851          width - 1);
852   }
853 
854   // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)855   void ubfx(const Register& rd,
856             const Register& rn,
857             unsigned lsb,
858             unsigned width) {
859     VIXL_ASSERT(width >= 1);
860     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
861     ubfm(rd, rn, lsb, lsb + width - 1);
862   }
863 
864   // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)865   void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); }
866 
867   // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)868   void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); }
869 
870   // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)871   void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); }
872 
873   // Extract.
874   void extr(const Register& rd,
875             const Register& rn,
876             const Register& rm,
877             unsigned lsb);
878 
879   // Conditional select: rd = cond ? rn : rm.
880   void csel(const Register& rd,
881             const Register& rn,
882             const Register& rm,
883             Condition cond);
884 
885   // Conditional select increment: rd = cond ? rn : rm + 1.
886   void csinc(const Register& rd,
887              const Register& rn,
888              const Register& rm,
889              Condition cond);
890 
891   // Conditional select inversion: rd = cond ? rn : ~rm.
892   void csinv(const Register& rd,
893              const Register& rn,
894              const Register& rm,
895              Condition cond);
896 
897   // Conditional select negation: rd = cond ? rn : -rm.
898   void csneg(const Register& rd,
899              const Register& rn,
900              const Register& rm,
901              Condition cond);
902 
903   // Conditional set: rd = cond ? 1 : 0.
904   void cset(const Register& rd, Condition cond);
905 
906   // Conditional set mask: rd = cond ? -1 : 0.
907   void csetm(const Register& rd, Condition cond);
908 
909   // Conditional increment: rd = cond ? rn + 1 : rn.
910   void cinc(const Register& rd, const Register& rn, Condition cond);
911 
912   // Conditional invert: rd = cond ? ~rn : rn.
913   void cinv(const Register& rd, const Register& rn, Condition cond);
914 
915   // Conditional negate: rd = cond ? -rn : rn.
916   void cneg(const Register& rd, const Register& rn, Condition cond);
917 
918   // Rotate right.
ror(const Register & rd,const Register & rs,unsigned shift)919   void ror(const Register& rd, const Register& rs, unsigned shift) {
920     extr(rd, rs, rs, shift);
921   }
922 
923   // Conditional comparison.
924 
925   // Conditional compare negative.
926   void ccmn(const Register& rn,
927             const Operand& operand,
928             StatusFlags nzcv,
929             Condition cond);
930 
931   // Conditional compare.
932   void ccmp(const Register& rn,
933             const Operand& operand,
934             StatusFlags nzcv,
935             Condition cond);
936 
937   // CRC-32 checksum from byte.
938   void crc32b(const Register& wd, const Register& wn, const Register& wm);
939 
940   // CRC-32 checksum from half-word.
941   void crc32h(const Register& wd, const Register& wn, const Register& wm);
942 
943   // CRC-32 checksum from word.
944   void crc32w(const Register& wd, const Register& wn, const Register& wm);
945 
946   // CRC-32 checksum from double word.
947   void crc32x(const Register& wd, const Register& wn, const Register& xm);
948 
949   // CRC-32 C checksum from byte.
950   void crc32cb(const Register& wd, const Register& wn, const Register& wm);
951 
952   // CRC-32 C checksum from half-word.
953   void crc32ch(const Register& wd, const Register& wn, const Register& wm);
954 
955   // CRC-32 C checksum from word.
956   void crc32cw(const Register& wd, const Register& wn, const Register& wm);
957 
958   // CRC-32C checksum from double word.
959   void crc32cx(const Register& wd, const Register& wn, const Register& xm);
960 
961   // Multiply.
962   void mul(const Register& rd, const Register& rn, const Register& rm);
963 
964   // Negated multiply.
965   void mneg(const Register& rd, const Register& rn, const Register& rm);
966 
967   // Signed long multiply: 32 x 32 -> 64-bit.
968   void smull(const Register& xd, const Register& wn, const Register& wm);
969 
970   // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
971   void smulh(const Register& xd, const Register& xn, const Register& xm);
972 
973   // Multiply and accumulate.
974   void madd(const Register& rd,
975             const Register& rn,
976             const Register& rm,
977             const Register& ra);
978 
979   // Multiply and subtract.
980   void msub(const Register& rd,
981             const Register& rn,
982             const Register& rm,
983             const Register& ra);
984 
985   // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
986   void smaddl(const Register& xd,
987               const Register& wn,
988               const Register& wm,
989               const Register& xa);
990 
991   // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
992   void umaddl(const Register& xd,
993               const Register& wn,
994               const Register& wm,
995               const Register& xa);
996 
997   // Unsigned long multiply: 32 x 32 -> 64-bit.
umull(const Register & xd,const Register & wn,const Register & wm)998   void umull(const Register& xd, const Register& wn, const Register& wm) {
999     umaddl(xd, wn, wm, xzr);
1000   }
1001 
1002   // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
1003   void umulh(const Register& xd, const Register& xn, const Register& xm);
1004 
1005   // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1006   void smsubl(const Register& xd,
1007               const Register& wn,
1008               const Register& wm,
1009               const Register& xa);
1010 
1011   // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1012   void umsubl(const Register& xd,
1013               const Register& wn,
1014               const Register& wm,
1015               const Register& xa);
1016 
1017   // Signed integer divide.
1018   void sdiv(const Register& rd, const Register& rn, const Register& rm);
1019 
1020   // Unsigned integer divide.
1021   void udiv(const Register& rd, const Register& rn, const Register& rm);
1022 
1023   // Bit reverse.
1024   void rbit(const Register& rd, const Register& rn);
1025 
1026   // Reverse bytes in 16-bit half words.
1027   void rev16(const Register& rd, const Register& rn);
1028 
1029   // Reverse bytes in 32-bit words.
1030   void rev32(const Register& xd, const Register& xn);
1031 
1032   // Reverse bytes in 64-bit general purpose register, an alias for rev
1033   // [Armv8.2].
rev64(const Register & xd,const Register & xn)1034   void rev64(const Register& xd, const Register& xn) {
1035     VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits());
1036     rev(xd, xn);
1037   }
1038 
1039   // Reverse bytes.
1040   void rev(const Register& rd, const Register& rn);
1041 
1042   // Count leading zeroes.
1043   void clz(const Register& rd, const Register& rn);
1044 
1045   // Count leading sign bits.
1046   void cls(const Register& rd, const Register& rn);
1047 
1048   // Pointer Authentication Code for Instruction address, using key A [Armv8.3].
1049   void pacia(const Register& xd, const Register& rn);
1050 
1051   // Pointer Authentication Code for Instruction address, using key A and a
1052   // modifier of zero [Armv8.3].
1053   void paciza(const Register& xd);
1054 
1055   // Pointer Authentication Code for Instruction address, using key A, with
1056   // address in x17 and modifier in x16 [Armv8.3].
1057   void pacia1716();
1058 
1059   // Pointer Authentication Code for Instruction address, using key A, with
1060   // address in LR and modifier in SP [Armv8.3].
1061   void paciasp();
1062 
1063   // Pointer Authentication Code for Instruction address, using key A, with
1064   // address in LR and a modifier of zero [Armv8.3].
1065   void paciaz();
1066 
1067   // Pointer Authentication Code for Instruction address, using key B [Armv8.3].
1068   void pacib(const Register& xd, const Register& xn);
1069 
1070   // Pointer Authentication Code for Instruction address, using key B and a
1071   // modifier of zero [Armv8.3].
1072   void pacizb(const Register& xd);
1073 
1074   // Pointer Authentication Code for Instruction address, using key B, with
1075   // address in x17 and modifier in x16 [Armv8.3].
1076   void pacib1716();
1077 
1078   // Pointer Authentication Code for Instruction address, using key B, with
1079   // address in LR and modifier in SP [Armv8.3].
1080   void pacibsp();
1081 
1082   // Pointer Authentication Code for Instruction address, using key B, with
1083   // address in LR and a modifier of zero [Armv8.3].
1084   void pacibz();
1085 
1086   // Pointer Authentication Code for Data address, using key A [Armv8.3].
1087   void pacda(const Register& xd, const Register& xn);
1088 
1089   // Pointer Authentication Code for Data address, using key A and a modifier of
1090   // zero [Armv8.3].
1091   void pacdza(const Register& xd);
1092 
1093   // Pointer Authentication Code for Data address, using key B [Armv8.3].
1094   void pacdb(const Register& xd, const Register& xn);
1095 
1096   // Pointer Authentication Code for Data address, using key B and a modifier of
1097   // zero [Armv8.3].
1098   void pacdzb(const Register& xd);
1099 
1100   // Pointer Authentication Code, using Generic key [Armv8.3].
1101   void pacga(const Register& xd, const Register& xn, const Register& xm);
1102 
1103   // Authenticate Instruction address, using key A [Armv8.3].
1104   void autia(const Register& xd, const Register& xn);
1105 
1106   // Authenticate Instruction address, using key A and a modifier of zero
1107   // [Armv8.3].
1108   void autiza(const Register& xd);
1109 
1110   // Authenticate Instruction address, using key A, with address in x17 and
1111   // modifier in x16 [Armv8.3].
1112   void autia1716();
1113 
1114   // Authenticate Instruction address, using key A, with address in LR and
1115   // modifier in SP [Armv8.3].
1116   void autiasp();
1117 
1118   // Authenticate Instruction address, using key A, with address in LR and a
1119   // modifier of zero [Armv8.3].
1120   void autiaz();
1121 
1122   // Authenticate Instruction address, using key B [Armv8.3].
1123   void autib(const Register& xd, const Register& xn);
1124 
1125   // Authenticate Instruction address, using key B and a modifier of zero
1126   // [Armv8.3].
1127   void autizb(const Register& xd);
1128 
1129   // Authenticate Instruction address, using key B, with address in x17 and
1130   // modifier in x16 [Armv8.3].
1131   void autib1716();
1132 
1133   // Authenticate Instruction address, using key B, with address in LR and
1134   // modifier in SP [Armv8.3].
1135   void autibsp();
1136 
1137   // Authenticate Instruction address, using key B, with address in LR and a
1138   // modifier of zero [Armv8.3].
1139   void autibz();
1140 
1141   // Authenticate Data address, using key A [Armv8.3].
1142   void autda(const Register& xd, const Register& xn);
1143 
1144   // Authenticate Data address, using key A and a modifier of zero [Armv8.3].
1145   void autdza(const Register& xd);
1146 
1147   // Authenticate Data address, using key B [Armv8.3].
1148   void autdb(const Register& xd, const Register& xn);
1149 
1150   // Authenticate Data address, using key B and a modifier of zero [Armv8.3].
1151   void autdzb(const Register& xd);
1152 
1153   // Strip Pointer Authentication Code of Data address [Armv8.3].
1154   void xpacd(const Register& xd);
1155 
1156   // Strip Pointer Authentication Code of Instruction address [Armv8.3].
1157   void xpaci(const Register& xd);
1158 
1159   // Strip Pointer Authentication Code of Instruction address in LR [Armv8.3].
1160   void xpaclri();
1161 
1162   // Memory instructions.
1163 
1164   // Load integer or FP register.
1165   void ldr(const CPURegister& rt,
1166            const MemOperand& src,
1167            LoadStoreScalingOption option = PreferScaledOffset);
1168 
1169   // Store integer or FP register.
1170   void str(const CPURegister& rt,
1171            const MemOperand& dst,
1172            LoadStoreScalingOption option = PreferScaledOffset);
1173 
1174   // Load word with sign extension.
1175   void ldrsw(const Register& xt,
1176              const MemOperand& src,
1177              LoadStoreScalingOption option = PreferScaledOffset);
1178 
1179   // Load byte.
1180   void ldrb(const Register& rt,
1181             const MemOperand& src,
1182             LoadStoreScalingOption option = PreferScaledOffset);
1183 
1184   // Store byte.
1185   void strb(const Register& rt,
1186             const MemOperand& dst,
1187             LoadStoreScalingOption option = PreferScaledOffset);
1188 
1189   // Load byte with sign extension.
1190   void ldrsb(const Register& rt,
1191              const MemOperand& src,
1192              LoadStoreScalingOption option = PreferScaledOffset);
1193 
1194   // Load half-word.
1195   void ldrh(const Register& rt,
1196             const MemOperand& src,
1197             LoadStoreScalingOption option = PreferScaledOffset);
1198 
1199   // Store half-word.
1200   void strh(const Register& rt,
1201             const MemOperand& dst,
1202             LoadStoreScalingOption option = PreferScaledOffset);
1203 
1204   // Load half-word with sign extension.
1205   void ldrsh(const Register& rt,
1206              const MemOperand& src,
1207              LoadStoreScalingOption option = PreferScaledOffset);
1208 
1209   // Load integer or FP register (with unscaled offset).
1210   void ldur(const CPURegister& rt,
1211             const MemOperand& src,
1212             LoadStoreScalingOption option = PreferUnscaledOffset);
1213 
1214   // Store integer or FP register (with unscaled offset).
1215   void stur(const CPURegister& rt,
1216             const MemOperand& src,
1217             LoadStoreScalingOption option = PreferUnscaledOffset);
1218 
1219   // Load word with sign extension.
1220   void ldursw(const Register& xt,
1221               const MemOperand& src,
1222               LoadStoreScalingOption option = PreferUnscaledOffset);
1223 
1224   // Load byte (with unscaled offset).
1225   void ldurb(const Register& rt,
1226              const MemOperand& src,
1227              LoadStoreScalingOption option = PreferUnscaledOffset);
1228 
1229   // Store byte (with unscaled offset).
1230   void sturb(const Register& rt,
1231              const MemOperand& dst,
1232              LoadStoreScalingOption option = PreferUnscaledOffset);
1233 
1234   // Load byte with sign extension (and unscaled offset).
1235   void ldursb(const Register& rt,
1236               const MemOperand& src,
1237               LoadStoreScalingOption option = PreferUnscaledOffset);
1238 
1239   // Load half-word (with unscaled offset).
1240   void ldurh(const Register& rt,
1241              const MemOperand& src,
1242              LoadStoreScalingOption option = PreferUnscaledOffset);
1243 
1244   // Store half-word (with unscaled offset).
1245   void sturh(const Register& rt,
1246              const MemOperand& dst,
1247              LoadStoreScalingOption option = PreferUnscaledOffset);
1248 
1249   // Load half-word with sign extension (and unscaled offset).
1250   void ldursh(const Register& rt,
1251               const MemOperand& src,
1252               LoadStoreScalingOption option = PreferUnscaledOffset);
1253 
1254   // Load double-word with pointer authentication, using data key A and a
1255   // modifier of zero [Armv8.3].
1256   void ldraa(const Register& xt, const MemOperand& src);
1257 
1258   // Load double-word with pointer authentication, using data key B and a
1259   // modifier of zero [Armv8.3].
1260   void ldrab(const Register& xt, const MemOperand& src);
1261 
1262   // Load integer or FP register pair.
1263   void ldp(const CPURegister& rt,
1264            const CPURegister& rt2,
1265            const MemOperand& src);
1266 
1267   // Store integer or FP register pair.
1268   void stp(const CPURegister& rt,
1269            const CPURegister& rt2,
1270            const MemOperand& dst);
1271 
1272   // Load word pair with sign extension.
1273   void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src);
1274 
1275   // Load integer or FP register pair, non-temporal.
1276   void ldnp(const CPURegister& rt,
1277             const CPURegister& rt2,
1278             const MemOperand& src);
1279 
1280   // Store integer or FP register pair, non-temporal.
1281   void stnp(const CPURegister& rt,
1282             const CPURegister& rt2,
1283             const MemOperand& dst);
1284 
1285   // Load integer or FP register from literal pool.
1286   void ldr(const CPURegister& rt, RawLiteral* literal);
1287 
1288   // Load word with sign extension from literal pool.
1289   void ldrsw(const Register& xt, RawLiteral* literal);
1290 
1291   // Load integer or FP register from pc + imm19 << 2.
1292   void ldr(const CPURegister& rt, int64_t imm19);
1293 
1294   // Load word with sign extension from pc + imm19 << 2.
1295   void ldrsw(const Register& xt, int64_t imm19);
1296 
1297   // Store exclusive byte.
1298   void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1299 
1300   // Store exclusive half-word.
1301   void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1302 
1303   // Store exclusive register.
1304   void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1305 
1306   // Load exclusive byte.
1307   void ldxrb(const Register& rt, const MemOperand& src);
1308 
1309   // Load exclusive half-word.
1310   void ldxrh(const Register& rt, const MemOperand& src);
1311 
1312   // Load exclusive register.
1313   void ldxr(const Register& rt, const MemOperand& src);
1314 
1315   // Store exclusive register pair.
1316   void stxp(const Register& rs,
1317             const Register& rt,
1318             const Register& rt2,
1319             const MemOperand& dst);
1320 
1321   // Load exclusive register pair.
1322   void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1323 
1324   // Store-release exclusive byte.
1325   void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1326 
1327   // Store-release exclusive half-word.
1328   void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1329 
1330   // Store-release exclusive register.
1331   void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1332 
1333   // Load-acquire exclusive byte.
1334   void ldaxrb(const Register& rt, const MemOperand& src);
1335 
1336   // Load-acquire exclusive half-word.
1337   void ldaxrh(const Register& rt, const MemOperand& src);
1338 
1339   // Load-acquire exclusive register.
1340   void ldaxr(const Register& rt, const MemOperand& src);
1341 
1342   // Store-release exclusive register pair.
1343   void stlxp(const Register& rs,
1344              const Register& rt,
1345              const Register& rt2,
1346              const MemOperand& dst);
1347 
1348   // Load-acquire exclusive register pair.
1349   void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1350 
1351   // Store-release byte.
1352   void stlrb(const Register& rt, const MemOperand& dst);
1353 
1354   // Store-release half-word.
1355   void stlrh(const Register& rt, const MemOperand& dst);
1356 
1357   // Store-release register.
1358   void stlr(const Register& rt, const MemOperand& dst);
1359 
1360   // Load-acquire byte.
1361   void ldarb(const Register& rt, const MemOperand& src);
1362 
1363   // Load-acquire half-word.
1364   void ldarh(const Register& rt, const MemOperand& src);
1365 
1366   // Load-acquire register.
1367   void ldar(const Register& rt, const MemOperand& src);
1368 
1369   // Store LORelease byte [Armv8.1].
1370   void stllrb(const Register& rt, const MemOperand& dst);
1371 
1372   // Store LORelease half-word [Armv8.1].
1373   void stllrh(const Register& rt, const MemOperand& dst);
1374 
1375   // Store LORelease register [Armv8.1].
1376   void stllr(const Register& rt, const MemOperand& dst);
1377 
1378   // Load LORelease byte [Armv8.1].
1379   void ldlarb(const Register& rt, const MemOperand& src);
1380 
1381   // Load LORelease half-word [Armv8.1].
1382   void ldlarh(const Register& rt, const MemOperand& src);
1383 
1384   // Load LORelease register [Armv8.1].
1385   void ldlar(const Register& rt, const MemOperand& src);
1386 
1387   // Compare and Swap word or doubleword in memory [Armv8.1].
1388   void cas(const Register& rs, const Register& rt, const MemOperand& src);
1389 
1390   // Compare and Swap word or doubleword in memory [Armv8.1].
1391   void casa(const Register& rs, const Register& rt, const MemOperand& src);
1392 
1393   // Compare and Swap word or doubleword in memory [Armv8.1].
1394   void casl(const Register& rs, const Register& rt, const MemOperand& src);
1395 
1396   // Compare and Swap word or doubleword in memory [Armv8.1].
1397   void casal(const Register& rs, const Register& rt, const MemOperand& src);
1398 
1399   // Compare and Swap byte in memory [Armv8.1].
1400   void casb(const Register& rs, const Register& rt, const MemOperand& src);
1401 
1402   // Compare and Swap byte in memory [Armv8.1].
1403   void casab(const Register& rs, const Register& rt, const MemOperand& src);
1404 
1405   // Compare and Swap byte in memory [Armv8.1].
1406   void caslb(const Register& rs, const Register& rt, const MemOperand& src);
1407 
1408   // Compare and Swap byte in memory [Armv8.1].
1409   void casalb(const Register& rs, const Register& rt, const MemOperand& src);
1410 
1411   // Compare and Swap halfword in memory [Armv8.1].
1412   void cash(const Register& rs, const Register& rt, const MemOperand& src);
1413 
1414   // Compare and Swap halfword in memory [Armv8.1].
1415   void casah(const Register& rs, const Register& rt, const MemOperand& src);
1416 
1417   // Compare and Swap halfword in memory [Armv8.1].
1418   void caslh(const Register& rs, const Register& rt, const MemOperand& src);
1419 
1420   // Compare and Swap halfword in memory [Armv8.1].
1421   void casalh(const Register& rs, const Register& rt, const MemOperand& src);
1422 
1423   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1424   void casp(const Register& rs,
1425             const Register& rs2,
1426             const Register& rt,
1427             const Register& rt2,
1428             const MemOperand& src);
1429 
1430   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1431   void caspa(const Register& rs,
1432              const Register& rs2,
1433              const Register& rt,
1434              const Register& rt2,
1435              const MemOperand& src);
1436 
1437   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1438   void caspl(const Register& rs,
1439              const Register& rs2,
1440              const Register& rt,
1441              const Register& rt2,
1442              const MemOperand& src);
1443 
1444   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1445   void caspal(const Register& rs,
1446               const Register& rs2,
1447               const Register& rt,
1448               const Register& rt2,
1449               const MemOperand& src);
1450 
1451   // Store-release byte (with unscaled offset) [Armv8.4].
1452   void stlurb(const Register& rt, const MemOperand& dst);
1453 
1454   // Load-acquire RCpc Register byte (with unscaled offset) [Armv8.4].
1455   void ldapurb(const Register& rt, const MemOperand& src);
1456 
1457   // Load-acquire RCpc Register signed byte (with unscaled offset) [Armv8.4].
1458   void ldapursb(const Register& rt, const MemOperand& src);
1459 
1460   // Store-release half-word (with unscaled offset) [Armv8.4].
1461   void stlurh(const Register& rt, const MemOperand& dst);
1462 
1463   // Load-acquire RCpc Register half-word (with unscaled offset) [Armv8.4].
1464   void ldapurh(const Register& rt, const MemOperand& src);
1465 
1466   // Load-acquire RCpc Register signed half-word (with unscaled offset)
1467   // [Armv8.4].
1468   void ldapursh(const Register& rt, const MemOperand& src);
1469 
1470   // Store-release word or double-word (with unscaled offset) [Armv8.4].
1471   void stlur(const Register& rt, const MemOperand& dst);
1472 
1473   // Load-acquire RCpc Register word or double-word (with unscaled offset)
1474   // [Armv8.4].
1475   void ldapur(const Register& rt, const MemOperand& src);
1476 
1477   // Load-acquire RCpc Register signed word (with unscaled offset) [Armv8.4].
1478   void ldapursw(const Register& xt, const MemOperand& src);
1479 
1480   // Atomic add on byte in memory [Armv8.1]
1481   void ldaddb(const Register& rs, const Register& rt, const MemOperand& src);
1482 
1483   // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1]
1484   void ldaddab(const Register& rs, const Register& rt, const MemOperand& src);
1485 
1486   // Atomic add on byte in memory, with Store-release semantics [Armv8.1]
1487   void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src);
1488 
1489   // Atomic add on byte in memory, with Load-acquire and Store-release semantics
1490   // [Armv8.1]
1491   void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src);
1492 
1493   // Atomic add on halfword in memory [Armv8.1]
1494   void ldaddh(const Register& rs, const Register& rt, const MemOperand& src);
1495 
1496   // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1]
1497   void ldaddah(const Register& rs, const Register& rt, const MemOperand& src);
1498 
1499   // Atomic add on halfword in memory, with Store-release semantics [Armv8.1]
1500   void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src);
1501 
1502   // Atomic add on halfword in memory, with Load-acquire and Store-release
1503   // semantics [Armv8.1]
1504   void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src);
1505 
1506   // Atomic add on word or doubleword in memory [Armv8.1]
1507   void ldadd(const Register& rs, const Register& rt, const MemOperand& src);
1508 
1509   // Atomic add on word or doubleword in memory, with Load-acquire semantics
1510   // [Armv8.1]
1511   void ldadda(const Register& rs, const Register& rt, const MemOperand& src);
1512 
1513   // Atomic add on word or doubleword in memory, with Store-release semantics
1514   // [Armv8.1]
1515   void ldaddl(const Register& rs, const Register& rt, const MemOperand& src);
1516 
1517   // Atomic add on word or doubleword in memory, with Load-acquire and
1518   // Store-release semantics [Armv8.1]
1519   void ldaddal(const Register& rs, const Register& rt, const MemOperand& src);
1520 
1521   // Atomic bit clear on byte in memory [Armv8.1]
1522   void ldclrb(const Register& rs, const Register& rt, const MemOperand& src);
1523 
1524   // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1]
1525   void ldclrab(const Register& rs, const Register& rt, const MemOperand& src);
1526 
1527   // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1]
1528   void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src);
1529 
1530   // Atomic bit clear on byte in memory, with Load-acquire and Store-release
1531   // semantics [Armv8.1]
1532   void ldclralb(const Register& rs, const Register& rt, const MemOperand& src);
1533 
1534   // Atomic bit clear on halfword in memory [Armv8.1]
1535   void ldclrh(const Register& rs, const Register& rt, const MemOperand& src);
1536 
1537   // Atomic bit clear on halfword in memory, with Load-acquire semantics
1538   // [Armv8.1]
1539   void ldclrah(const Register& rs, const Register& rt, const MemOperand& src);
1540 
1541   // Atomic bit clear on halfword in memory, with Store-release semantics
1542   // [Armv8.1]
1543   void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src);
1544 
1545   // Atomic bit clear on halfword in memory, with Load-acquire and Store-release
1546   // semantics [Armv8.1]
1547   void ldclralh(const Register& rs, const Register& rt, const MemOperand& src);
1548 
1549   // Atomic bit clear on word or doubleword in memory [Armv8.1]
1550   void ldclr(const Register& rs, const Register& rt, const MemOperand& src);
1551 
1552   // Atomic bit clear on word or doubleword in memory, with Load-acquire
1553   // semantics [Armv8.1]
1554   void ldclra(const Register& rs, const Register& rt, const MemOperand& src);
1555 
1556   // Atomic bit clear on word or doubleword in memory, with Store-release
1557   // semantics [Armv8.1]
1558   void ldclrl(const Register& rs, const Register& rt, const MemOperand& src);
1559 
1560   // Atomic bit clear on word or doubleword in memory, with Load-acquire and
1561   // Store-release semantics [Armv8.1]
1562   void ldclral(const Register& rs, const Register& rt, const MemOperand& src);
1563 
1564   // Atomic exclusive OR on byte in memory [Armv8.1]
1565   void ldeorb(const Register& rs, const Register& rt, const MemOperand& src);
1566 
1567   // Atomic exclusive OR on byte in memory, with Load-acquire semantics
1568   // [Armv8.1]
1569   void ldeorab(const Register& rs, const Register& rt, const MemOperand& src);
1570 
1571   // Atomic exclusive OR on byte in memory, with Store-release semantics
1572   // [Armv8.1]
1573   void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src);
1574 
1575   // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release
1576   // semantics [Armv8.1]
1577   void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src);
1578 
1579   // Atomic exclusive OR on halfword in memory [Armv8.1]
1580   void ldeorh(const Register& rs, const Register& rt, const MemOperand& src);
1581 
1582   // Atomic exclusive OR on halfword in memory, with Load-acquire semantics
1583   // [Armv8.1]
1584   void ldeorah(const Register& rs, const Register& rt, const MemOperand& src);
1585 
1586   // Atomic exclusive OR on halfword in memory, with Store-release semantics
1587   // [Armv8.1]
1588   void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src);
1589 
1590   // Atomic exclusive OR on halfword in memory, with Load-acquire and
1591   // Store-release semantics [Armv8.1]
1592   void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src);
1593 
1594   // Atomic exclusive OR on word or doubleword in memory [Armv8.1]
1595   void ldeor(const Register& rs, const Register& rt, const MemOperand& src);
1596 
1597   // Atomic exclusive OR on word or doubleword in memory, with Load-acquire
1598   // semantics [Armv8.1]
1599   void ldeora(const Register& rs, const Register& rt, const MemOperand& src);
1600 
1601   // Atomic exclusive OR on word or doubleword in memory, with Store-release
1602   // semantics [Armv8.1]
1603   void ldeorl(const Register& rs, const Register& rt, const MemOperand& src);
1604 
1605   // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and
1606   // Store-release semantics [Armv8.1]
1607   void ldeoral(const Register& rs, const Register& rt, const MemOperand& src);
1608 
1609   // Atomic bit set on byte in memory [Armv8.1]
1610   void ldsetb(const Register& rs, const Register& rt, const MemOperand& src);
1611 
1612   // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1]
1613   void ldsetab(const Register& rs, const Register& rt, const MemOperand& src);
1614 
1615   // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1]
1616   void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src);
1617 
1618   // Atomic bit set on byte in memory, with Load-acquire and Store-release
1619   // semantics [Armv8.1]
1620   void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src);
1621 
1622   // Atomic bit set on halfword in memory [Armv8.1]
1623   void ldseth(const Register& rs, const Register& rt, const MemOperand& src);
1624 
1625   // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1]
1626   void ldsetah(const Register& rs, const Register& rt, const MemOperand& src);
1627 
1628   // Atomic bit set on halfword in memory, with Store-release semantics
1629   // [Armv8.1]
1630   void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src);
1631 
1632   // Atomic bit set on halfword in memory, with Load-acquire and Store-release
1633   // semantics [Armv8.1]
1634   void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src);
1635 
1636   // Atomic bit set on word or doubleword in memory [Armv8.1]
1637   void ldset(const Register& rs, const Register& rt, const MemOperand& src);
1638 
1639   // Atomic bit set on word or doubleword in memory, with Load-acquire semantics
1640   // [Armv8.1]
1641   void ldseta(const Register& rs, const Register& rt, const MemOperand& src);
1642 
1643   // Atomic bit set on word or doubleword in memory, with Store-release
1644   // semantics [Armv8.1]
1645   void ldsetl(const Register& rs, const Register& rt, const MemOperand& src);
1646 
1647   // Atomic bit set on word or doubleword in memory, with Load-acquire and
1648   // Store-release semantics [Armv8.1]
1649   void ldsetal(const Register& rs, const Register& rt, const MemOperand& src);
1650 
1651   // Atomic signed maximum on byte in memory [Armv8.1]
1652   void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src);
1653 
1654   // Atomic signed maximum on byte in memory, with Load-acquire semantics
1655   // [Armv8.1]
1656   void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src);
1657 
1658   // Atomic signed maximum on byte in memory, with Store-release semantics
1659   // [Armv8.1]
1660   void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1661 
1662   // Atomic signed maximum on byte in memory, with Load-acquire and
1663   // Store-release semantics [Armv8.1]
1664   void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1665 
1666   // Atomic signed maximum on halfword in memory [Armv8.1]
1667   void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src);
1668 
1669   // Atomic signed maximum on halfword in memory, with Load-acquire semantics
1670   // [Armv8.1]
1671   void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src);
1672 
1673   // Atomic signed maximum on halfword in memory, with Store-release semantics
1674   // [Armv8.1]
1675   void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1676 
1677   // Atomic signed maximum on halfword in memory, with Load-acquire and
1678   // Store-release semantics [Armv8.1]
1679   void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1680 
1681   // Atomic signed maximum on word or doubleword in memory [Armv8.1]
1682   void ldsmax(const Register& rs, const Register& rt, const MemOperand& src);
1683 
1684   // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1685   // semantics [Armv8.1]
1686   void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src);
1687 
1688   // Atomic signed maximum on word or doubleword in memory, with Store-release
1689   // semantics [Armv8.1]
1690   void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src);
1691 
1692   // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1693   // and Store-release semantics [Armv8.1]
1694   void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src);
1695 
1696   // Atomic signed minimum on byte in memory [Armv8.1]
1697   void ldsminb(const Register& rs, const Register& rt, const MemOperand& src);
1698 
1699   // Atomic signed minimum on byte in memory, with Load-acquire semantics
1700   // [Armv8.1]
1701   void ldsminab(const Register& rs, const Register& rt, const MemOperand& src);
1702 
1703   // Atomic signed minimum on byte in memory, with Store-release semantics
1704   // [Armv8.1]
1705   void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src);
1706 
1707   // Atomic signed minimum on byte in memory, with Load-acquire and
1708   // Store-release semantics [Armv8.1]
1709   void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src);
1710 
1711   // Atomic signed minimum on halfword in memory [Armv8.1]
1712   void ldsminh(const Register& rs, const Register& rt, const MemOperand& src);
1713 
1714   // Atomic signed minimum on halfword in memory, with Load-acquire semantics
1715   // [Armv8.1]
1716   void ldsminah(const Register& rs, const Register& rt, const MemOperand& src);
1717 
1718   // Atomic signed minimum on halfword in memory, with Store-release semantics
1719   // [Armv8.1]
1720   void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src);
1721 
1722   // Atomic signed minimum on halfword in memory, with Load-acquire and
1723   // Store-release semantics [Armv8.1]
1724   void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src);
1725 
1726   // Atomic signed minimum on word or doubleword in memory [Armv8.1]
1727   void ldsmin(const Register& rs, const Register& rt, const MemOperand& src);
1728 
1729   // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1730   // semantics [Armv8.1]
1731   void ldsmina(const Register& rs, const Register& rt, const MemOperand& src);
1732 
1733   // Atomic signed minimum on word or doubleword in memory, with Store-release
1734   // semantics [Armv8.1]
1735   void ldsminl(const Register& rs, const Register& rt, const MemOperand& src);
1736 
1737   // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1738   // and Store-release semantics [Armv8.1]
1739   void ldsminal(const Register& rs, const Register& rt, const MemOperand& src);
1740 
1741   // Atomic unsigned maximum on byte in memory [Armv8.1]
1742   void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src);
1743 
1744   // Atomic unsigned maximum on byte in memory, with Load-acquire semantics
1745   // [Armv8.1]
1746   void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src);
1747 
1748   // Atomic unsigned maximum on byte in memory, with Store-release semantics
1749   // [Armv8.1]
1750   void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1751 
1752   // Atomic unsigned maximum on byte in memory, with Load-acquire and
1753   // Store-release semantics [Armv8.1]
1754   void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1755 
1756   // Atomic unsigned maximum on halfword in memory [Armv8.1]
1757   void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src);
1758 
1759   // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics
1760   // [Armv8.1]
1761   void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src);
1762 
1763   // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1764   // [Armv8.1]
1765   void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1766 
1767   // Atomic unsigned maximum on halfword in memory, with Load-acquire and
1768   // Store-release semantics [Armv8.1]
1769   void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1770 
1771   // Atomic unsigned maximum on word or doubleword in memory [Armv8.1]
1772   void ldumax(const Register& rs, const Register& rt, const MemOperand& src);
1773 
1774   // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1775   // semantics [Armv8.1]
1776   void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src);
1777 
1778   // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1779   // semantics [Armv8.1]
1780   void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src);
1781 
1782   // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1783   // and Store-release semantics [Armv8.1]
1784   void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src);
1785 
1786   // Atomic unsigned minimum on byte in memory [Armv8.1]
1787   void lduminb(const Register& rs, const Register& rt, const MemOperand& src);
1788 
1789   // Atomic unsigned minimum on byte in memory, with Load-acquire semantics
1790   // [Armv8.1]
1791   void lduminab(const Register& rs, const Register& rt, const MemOperand& src);
1792 
1793   // Atomic unsigned minimum on byte in memory, with Store-release semantics
1794   // [Armv8.1]
1795   void lduminlb(const Register& rs, const Register& rt, const MemOperand& src);
1796 
1797   // Atomic unsigned minimum on byte in memory, with Load-acquire and
1798   // Store-release semantics [Armv8.1]
1799   void lduminalb(const Register& rs, const Register& rt, const MemOperand& src);
1800 
1801   // Atomic unsigned minimum on halfword in memory [Armv8.1]
1802   void lduminh(const Register& rs, const Register& rt, const MemOperand& src);
1803 
1804   // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics
1805   // [Armv8.1]
1806   void lduminah(const Register& rs, const Register& rt, const MemOperand& src);
1807 
1808   // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1809   // [Armv8.1]
1810   void lduminlh(const Register& rs, const Register& rt, const MemOperand& src);
1811 
1812   // Atomic unsigned minimum on halfword in memory, with Load-acquire and
1813   // Store-release semantics [Armv8.1]
1814   void lduminalh(const Register& rs, const Register& rt, const MemOperand& src);
1815 
1816   // Atomic unsigned minimum on word or doubleword in memory [Armv8.1]
1817   void ldumin(const Register& rs, const Register& rt, const MemOperand& src);
1818 
1819   // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1820   // semantics [Armv8.1]
1821   void ldumina(const Register& rs, const Register& rt, const MemOperand& src);
1822 
1823   // Atomic unsigned minimum on word or doubleword in memory, with Store-release
1824   // semantics [Armv8.1]
1825   void lduminl(const Register& rs, const Register& rt, const MemOperand& src);
1826 
1827   // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1828   // and Store-release semantics [Armv8.1]
1829   void lduminal(const Register& rs, const Register& rt, const MemOperand& src);
1830 
1831   // Atomic add on byte in memory, without return. [Armv8.1]
1832   void staddb(const Register& rs, const MemOperand& src);
1833 
1834   // Atomic add on byte in memory, with Store-release semantics and without
1835   // return. [Armv8.1]
1836   void staddlb(const Register& rs, const MemOperand& src);
1837 
1838   // Atomic add on halfword in memory, without return. [Armv8.1]
1839   void staddh(const Register& rs, const MemOperand& src);
1840 
1841   // Atomic add on halfword in memory, with Store-release semantics and without
1842   // return. [Armv8.1]
1843   void staddlh(const Register& rs, const MemOperand& src);
1844 
1845   // Atomic add on word or doubleword in memory, without return. [Armv8.1]
1846   void stadd(const Register& rs, const MemOperand& src);
1847 
1848   // Atomic add on word or doubleword in memory, with Store-release semantics
1849   // and without return. [Armv8.1]
1850   void staddl(const Register& rs, const MemOperand& src);
1851 
1852   // Atomic bit clear on byte in memory, without return. [Armv8.1]
1853   void stclrb(const Register& rs, const MemOperand& src);
1854 
1855   // Atomic bit clear on byte in memory, with Store-release semantics and
1856   // without return. [Armv8.1]
1857   void stclrlb(const Register& rs, const MemOperand& src);
1858 
1859   // Atomic bit clear on halfword in memory, without return. [Armv8.1]
1860   void stclrh(const Register& rs, const MemOperand& src);
1861 
1862   // Atomic bit clear on halfword in memory, with Store-release semantics and
1863   // without return. [Armv8.1]
1864   void stclrlh(const Register& rs, const MemOperand& src);
1865 
1866   // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1]
1867   void stclr(const Register& rs, const MemOperand& src);
1868 
1869   // Atomic bit clear on word or doubleword in memory, with Store-release
1870   // semantics and without return. [Armv8.1]
1871   void stclrl(const Register& rs, const MemOperand& src);
1872 
1873   // Atomic exclusive OR on byte in memory, without return. [Armv8.1]
1874   void steorb(const Register& rs, const MemOperand& src);
1875 
1876   // Atomic exclusive OR on byte in memory, with Store-release semantics and
1877   // without return. [Armv8.1]
1878   void steorlb(const Register& rs, const MemOperand& src);
1879 
1880   // Atomic exclusive OR on halfword in memory, without return. [Armv8.1]
1881   void steorh(const Register& rs, const MemOperand& src);
1882 
1883   // Atomic exclusive OR on halfword in memory, with Store-release semantics
1884   // and without return. [Armv8.1]
1885   void steorlh(const Register& rs, const MemOperand& src);
1886 
1887   // Atomic exclusive OR on word or doubleword in memory, without return.
1888   // [Armv8.1]
1889   void steor(const Register& rs, const MemOperand& src);
1890 
1891   // Atomic exclusive OR on word or doubleword in memory, with Store-release
1892   // semantics and without return. [Armv8.1]
1893   void steorl(const Register& rs, const MemOperand& src);
1894 
1895   // Atomic bit set on byte in memory, without return. [Armv8.1]
1896   void stsetb(const Register& rs, const MemOperand& src);
1897 
1898   // Atomic bit set on byte in memory, with Store-release semantics and without
1899   // return. [Armv8.1]
1900   void stsetlb(const Register& rs, const MemOperand& src);
1901 
1902   // Atomic bit set on halfword in memory, without return. [Armv8.1]
1903   void stseth(const Register& rs, const MemOperand& src);
1904 
1905   // Atomic bit set on halfword in memory, with Store-release semantics and
1906   // without return. [Armv8.1]
1907   void stsetlh(const Register& rs, const MemOperand& src);
1908 
1909   // Atomic bit set on word or doubleword in memory, without return. [Armv8.1]
1910   void stset(const Register& rs, const MemOperand& src);
1911 
1912   // Atomic bit set on word or doubleword in memory, with Store-release
1913   // semantics and without return. [Armv8.1]
1914   void stsetl(const Register& rs, const MemOperand& src);
1915 
1916   // Atomic signed maximum on byte in memory, without return. [Armv8.1]
1917   void stsmaxb(const Register& rs, const MemOperand& src);
1918 
1919   // Atomic signed maximum on byte in memory, with Store-release semantics and
1920   // without return. [Armv8.1]
1921   void stsmaxlb(const Register& rs, const MemOperand& src);
1922 
1923   // Atomic signed maximum on halfword in memory, without return. [Armv8.1]
1924   void stsmaxh(const Register& rs, const MemOperand& src);
1925 
1926   // Atomic signed maximum on halfword in memory, with Store-release semantics
1927   // and without return. [Armv8.1]
1928   void stsmaxlh(const Register& rs, const MemOperand& src);
1929 
1930   // Atomic signed maximum on word or doubleword in memory, without return.
1931   // [Armv8.1]
1932   void stsmax(const Register& rs, const MemOperand& src);
1933 
1934   // Atomic signed maximum on word or doubleword in memory, with Store-release
1935   // semantics and without return. [Armv8.1]
1936   void stsmaxl(const Register& rs, const MemOperand& src);
1937 
1938   // Atomic signed minimum on byte in memory, without return. [Armv8.1]
1939   void stsminb(const Register& rs, const MemOperand& src);
1940 
1941   // Atomic signed minimum on byte in memory, with Store-release semantics and
1942   // without return. [Armv8.1]
1943   void stsminlb(const Register& rs, const MemOperand& src);
1944 
1945   // Atomic signed minimum on halfword in memory, without return. [Armv8.1]
1946   void stsminh(const Register& rs, const MemOperand& src);
1947 
1948   // Atomic signed minimum on halfword in memory, with Store-release semantics
1949   // and without return. [Armv8.1]
1950   void stsminlh(const Register& rs, const MemOperand& src);
1951 
1952   // Atomic signed minimum on word or doubleword in memory, without return.
1953   // [Armv8.1]
1954   void stsmin(const Register& rs, const MemOperand& src);
1955 
1956   // Atomic signed minimum on word or doubleword in memory, with Store-release
1957   // semantics and without return. semantics [Armv8.1]
1958   void stsminl(const Register& rs, const MemOperand& src);
1959 
1960   // Atomic unsigned maximum on byte in memory, without return. [Armv8.1]
1961   void stumaxb(const Register& rs, const MemOperand& src);
1962 
1963   // Atomic unsigned maximum on byte in memory, with Store-release semantics and
1964   // without return. [Armv8.1]
1965   void stumaxlb(const Register& rs, const MemOperand& src);
1966 
1967   // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1]
1968   void stumaxh(const Register& rs, const MemOperand& src);
1969 
1970   // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1971   // and without return. [Armv8.1]
1972   void stumaxlh(const Register& rs, const MemOperand& src);
1973 
1974   // Atomic unsigned maximum on word or doubleword in memory, without return.
1975   // [Armv8.1]
1976   void stumax(const Register& rs, const MemOperand& src);
1977 
1978   // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1979   // semantics and without return. [Armv8.1]
1980   void stumaxl(const Register& rs, const MemOperand& src);
1981 
1982   // Atomic unsigned minimum on byte in memory, without return. [Armv8.1]
1983   void stuminb(const Register& rs, const MemOperand& src);
1984 
1985   // Atomic unsigned minimum on byte in memory, with Store-release semantics and
1986   // without return. [Armv8.1]
1987   void stuminlb(const Register& rs, const MemOperand& src);
1988 
1989   // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1]
1990   void stuminh(const Register& rs, const MemOperand& src);
1991 
1992   // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1993   // and without return. [Armv8.1]
1994   void stuminlh(const Register& rs, const MemOperand& src);
1995 
1996   // Atomic unsigned minimum on word or doubleword in memory, without return.
1997   // [Armv8.1]
1998   void stumin(const Register& rs, const MemOperand& src);
1999 
2000   // Atomic unsigned minimum on word or doubleword in memory, with Store-release
2001   // semantics and without return. [Armv8.1]
2002   void stuminl(const Register& rs, const MemOperand& src);
2003 
2004   // Swap byte in memory [Armv8.1]
2005   void swpb(const Register& rs, const Register& rt, const MemOperand& src);
2006 
2007   // Swap byte in memory, with Load-acquire semantics [Armv8.1]
2008   void swpab(const Register& rs, const Register& rt, const MemOperand& src);
2009 
2010   // Swap byte in memory, with Store-release semantics [Armv8.1]
2011   void swplb(const Register& rs, const Register& rt, const MemOperand& src);
2012 
2013   // Swap byte in memory, with Load-acquire and Store-release semantics
2014   // [Armv8.1]
2015   void swpalb(const Register& rs, const Register& rt, const MemOperand& src);
2016 
2017   // Swap halfword in memory [Armv8.1]
2018   void swph(const Register& rs, const Register& rt, const MemOperand& src);
2019 
2020   // Swap halfword in memory, with Load-acquire semantics [Armv8.1]
2021   void swpah(const Register& rs, const Register& rt, const MemOperand& src);
2022 
2023   // Swap halfword in memory, with Store-release semantics [Armv8.1]
2024   void swplh(const Register& rs, const Register& rt, const MemOperand& src);
2025 
2026   // Swap halfword in memory, with Load-acquire and Store-release semantics
2027   // [Armv8.1]
2028   void swpalh(const Register& rs, const Register& rt, const MemOperand& src);
2029 
2030   // Swap word or doubleword in memory [Armv8.1]
2031   void swp(const Register& rs, const Register& rt, const MemOperand& src);
2032 
2033   // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1]
2034   void swpa(const Register& rs, const Register& rt, const MemOperand& src);
2035 
2036   // Swap word or doubleword in memory, with Store-release semantics [Armv8.1]
2037   void swpl(const Register& rs, const Register& rt, const MemOperand& src);
2038 
2039   // Swap word or doubleword in memory, with Load-acquire and Store-release
2040   // semantics [Armv8.1]
2041   void swpal(const Register& rs, const Register& rt, const MemOperand& src);
2042 
2043   // Load-Acquire RCpc Register byte [Armv8.3]
2044   void ldaprb(const Register& rt, const MemOperand& src);
2045 
2046   // Load-Acquire RCpc Register halfword [Armv8.3]
2047   void ldaprh(const Register& rt, const MemOperand& src);
2048 
2049   // Load-Acquire RCpc Register word or doubleword [Armv8.3]
2050   void ldapr(const Register& rt, const MemOperand& src);
2051 
2052   // Prefetch memory.
2053   void prfm(PrefetchOperation op,
2054             const MemOperand& addr,
2055             LoadStoreScalingOption option = PreferScaledOffset);
2056 
2057   // Prefetch memory (with unscaled offset).
2058   void prfum(PrefetchOperation op,
2059              const MemOperand& addr,
2060              LoadStoreScalingOption option = PreferUnscaledOffset);
2061 
2062   // Prefetch memory in the literal pool.
2063   void prfm(PrefetchOperation op, RawLiteral* literal);
2064 
2065   // Prefetch from pc + imm19 << 2.
2066   void prfm(PrefetchOperation op, int64_t imm19);
2067 
2068   // Prefetch memory (allowing unallocated hints).
2069   void prfm(int op,
2070             const MemOperand& addr,
2071             LoadStoreScalingOption option = PreferScaledOffset);
2072 
2073   // Prefetch memory (with unscaled offset, allowing unallocated hints).
2074   void prfum(int op,
2075              const MemOperand& addr,
2076              LoadStoreScalingOption option = PreferUnscaledOffset);
2077 
2078   // Prefetch memory in the literal pool (allowing unallocated hints).
2079   void prfm(int op, RawLiteral* literal);
2080 
2081   // Prefetch from pc + imm19 << 2 (allowing unallocated hints).
2082   void prfm(int op, int64_t imm19);
2083 
2084   // Move instructions. The default shift of -1 indicates that the move
2085   // instruction will calculate an appropriate 16-bit immediate and left shift
2086   // that is equal to the 64-bit immediate argument. If an explicit left shift
2087   // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
2088   //
2089   // For movk, an explicit shift can be used to indicate which half word should
2090   // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
2091   // half word with zero, whereas movk(x0, 0, 48) will overwrite the
2092   // most-significant.
2093 
2094   // Move immediate and keep.
2095   void movk(const Register& rd, uint64_t imm, int shift = -1) {
2096     MoveWide(rd, imm, shift, MOVK);
2097   }
2098 
2099   // Move inverted immediate.
2100   void movn(const Register& rd, uint64_t imm, int shift = -1) {
2101     MoveWide(rd, imm, shift, MOVN);
2102   }
2103 
2104   // Move immediate.
2105   void movz(const Register& rd, uint64_t imm, int shift = -1) {
2106     MoveWide(rd, imm, shift, MOVZ);
2107   }
2108 
2109   // Move immediate, aliases for movz, movn, orr.
mov(const Register & rd,uint64_t imm)2110   void mov(const Register& rd, uint64_t imm) {
2111     if (!OneInstrMoveImmediateHelper(this, rd, imm)) {
2112       VIXL_UNIMPLEMENTED();
2113     }
2114   }
2115 
2116   // Misc instructions.
2117 
2118   // Monitor debug-mode breakpoint.
2119   void brk(int code);
2120 
2121   // Halting debug-mode breakpoint.
2122   void hlt(int code);
2123 
2124   // Generate exception targeting EL1.
2125   void svc(int code);
2126 
2127   // Generate undefined instruction exception.
2128   void udf(int code);
2129 
2130   // Move register to register.
2131   void mov(const Register& rd, const Register& rn);
2132 
2133   // Move inverted operand to register.
2134   void mvn(const Register& rd, const Operand& operand);
2135 
2136   // System instructions.
2137 
2138   // Move to register from system register.
2139   void mrs(const Register& xt, SystemRegister sysreg);
2140 
2141   // Move from register to system register.
2142   void msr(SystemRegister sysreg, const Register& xt);
2143 
2144   // Invert carry flag [Armv8.4].
2145   void cfinv();
2146 
2147   // Convert floating-point condition flags from alternative format to Arm
2148   // format [Armv8.5].
2149   void xaflag();
2150 
2151   // Convert floating-point condition flags from Arm format to alternative
2152   // format [Armv8.5].
2153   void axflag();
2154 
2155   // System instruction.
2156   void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr);
2157 
2158   // System instruction with pre-encoded op (op1:crn:crm:op2).
2159   void sys(int op, const Register& xt = xzr);
2160 
2161   // System instruction with result.
2162   void sysl(int op, const Register& xt = xzr);
2163 
2164   // System data cache operation.
2165   void dc(DataCacheOp op, const Register& rt);
2166 
2167   // System instruction cache operation.
2168   void ic(InstructionCacheOp op, const Register& rt);
2169 
2170   // System hint (named type).
2171   void hint(SystemHint code);
2172 
2173   // System hint (numbered type).
2174   void hint(int imm7);
2175 
2176   // Clear exclusive monitor.
2177   void clrex(int imm4 = 0xf);
2178 
2179   // Data memory barrier.
2180   void dmb(BarrierDomain domain, BarrierType type);
2181 
2182   // Data synchronization barrier.
2183   void dsb(BarrierDomain domain, BarrierType type);
2184 
2185   // Instruction synchronization barrier.
2186   void isb();
2187 
2188   // Error synchronization barrier.
2189   void esb();
2190 
2191   // Conditional speculation dependency barrier.
2192   void csdb();
2193 
2194   // No-op.
nop()2195   void nop() { hint(NOP); }
2196 
2197   // Branch target identification.
2198   void bti(BranchTargetIdentifier id);
2199 
2200   // FP and NEON instructions.
2201 
2202   // Move double precision immediate to FP register.
2203   void fmov(const VRegister& vd, double imm);
2204 
2205   // Move single precision immediate to FP register.
2206   void fmov(const VRegister& vd, float imm);
2207 
2208   // Move half precision immediate to FP register [Armv8.2].
2209   void fmov(const VRegister& vd, Float16 imm);
2210 
2211   // Move FP register to register.
2212   void fmov(const Register& rd, const VRegister& fn);
2213 
2214   // Move register to FP register.
2215   void fmov(const VRegister& vd, const Register& rn);
2216 
2217   // Move FP register to FP register.
2218   void fmov(const VRegister& vd, const VRegister& fn);
2219 
2220   // Move 64-bit register to top half of 128-bit FP register.
2221   void fmov(const VRegister& vd, int index, const Register& rn);
2222 
2223   // Move top half of 128-bit FP register to 64-bit register.
2224   void fmov(const Register& rd, const VRegister& vn, int index);
2225 
2226   // FP add.
2227   void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2228 
2229   // FP subtract.
2230   void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2231 
2232   // FP multiply.
2233   void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2234 
2235   // FP fused multiply-add.
2236   void fmadd(const VRegister& vd,
2237              const VRegister& vn,
2238              const VRegister& vm,
2239              const VRegister& va);
2240 
2241   // FP fused multiply-subtract.
2242   void fmsub(const VRegister& vd,
2243              const VRegister& vn,
2244              const VRegister& vm,
2245              const VRegister& va);
2246 
2247   // FP fused multiply-add and negate.
2248   void fnmadd(const VRegister& vd,
2249               const VRegister& vn,
2250               const VRegister& vm,
2251               const VRegister& va);
2252 
2253   // FP fused multiply-subtract and negate.
2254   void fnmsub(const VRegister& vd,
2255               const VRegister& vn,
2256               const VRegister& vm,
2257               const VRegister& va);
2258 
2259   // FP multiply-negate scalar.
2260   void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2261 
2262   // FP reciprocal exponent scalar.
2263   void frecpx(const VRegister& vd, const VRegister& vn);
2264 
2265   // FP divide.
2266   void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2267 
2268   // FP maximum.
2269   void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2270 
2271   // FP minimum.
2272   void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2273 
2274   // FP maximum number.
2275   void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2276 
2277   // FP minimum number.
2278   void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2279 
2280   // FP absolute.
2281   void fabs(const VRegister& vd, const VRegister& vn);
2282 
2283   // FP negate.
2284   void fneg(const VRegister& vd, const VRegister& vn);
2285 
2286   // FP square root.
2287   void fsqrt(const VRegister& vd, const VRegister& vn);
2288 
2289   // FP round to integer, nearest with ties to away.
2290   void frinta(const VRegister& vd, const VRegister& vn);
2291 
2292   // FP round to integer, implicit rounding.
2293   void frinti(const VRegister& vd, const VRegister& vn);
2294 
2295   // FP round to integer, toward minus infinity.
2296   void frintm(const VRegister& vd, const VRegister& vn);
2297 
2298   // FP round to integer, nearest with ties to even.
2299   void frintn(const VRegister& vd, const VRegister& vn);
2300 
2301   // FP round to integer, toward plus infinity.
2302   void frintp(const VRegister& vd, const VRegister& vn);
2303 
2304   // FP round to integer, exact, implicit rounding.
2305   void frintx(const VRegister& vd, const VRegister& vn);
2306 
2307   // FP round to integer, towards zero.
2308   void frintz(const VRegister& vd, const VRegister& vn);
2309 
2310   // FP round to 32-bit integer, exact, implicit rounding [Armv8.5].
2311   void frint32x(const VRegister& vd, const VRegister& vn);
2312 
2313   // FP round to 32-bit integer, towards zero [Armv8.5].
2314   void frint32z(const VRegister& vd, const VRegister& vn);
2315 
2316   // FP round to 64-bit integer, exact, implicit rounding [Armv8.5].
2317   void frint64x(const VRegister& vd, const VRegister& vn);
2318 
2319   // FP round to 64-bit integer, towards zero [Armv8.5].
2320   void frint64z(const VRegister& vd, const VRegister& vn);
2321 
2322   void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap);
2323 
2324   void FPCompareMacro(const VRegister& vn,
2325                       const VRegister& vm,
2326                       FPTrapFlags trap);
2327 
2328   // FP compare registers.
2329   void fcmp(const VRegister& vn, const VRegister& vm);
2330 
2331   // FP compare immediate.
2332   void fcmp(const VRegister& vn, double value);
2333 
2334   void FPCCompareMacro(const VRegister& vn,
2335                        const VRegister& vm,
2336                        StatusFlags nzcv,
2337                        Condition cond,
2338                        FPTrapFlags trap);
2339 
2340   // FP conditional compare.
2341   void fccmp(const VRegister& vn,
2342              const VRegister& vm,
2343              StatusFlags nzcv,
2344              Condition cond);
2345 
2346   // FP signaling compare registers.
2347   void fcmpe(const VRegister& vn, const VRegister& vm);
2348 
2349   // FP signaling compare immediate.
2350   void fcmpe(const VRegister& vn, double value);
2351 
2352   // FP conditional signaling compare.
2353   void fccmpe(const VRegister& vn,
2354               const VRegister& vm,
2355               StatusFlags nzcv,
2356               Condition cond);
2357 
2358   // FP conditional select.
2359   void fcsel(const VRegister& vd,
2360              const VRegister& vn,
2361              const VRegister& vm,
2362              Condition cond);
2363 
2364   // Common FP Convert functions.
2365   void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
2366   void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2367   void NEONFP16ConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2368 
2369   // FP convert between precisions.
2370   void fcvt(const VRegister& vd, const VRegister& vn);
2371 
2372   // FP convert to higher precision.
2373   void fcvtl(const VRegister& vd, const VRegister& vn);
2374 
2375   // FP convert to higher precision (second part).
2376   void fcvtl2(const VRegister& vd, const VRegister& vn);
2377 
2378   // FP convert to lower precision.
2379   void fcvtn(const VRegister& vd, const VRegister& vn);
2380 
2381   // FP convert to lower prevision (second part).
2382   void fcvtn2(const VRegister& vd, const VRegister& vn);
2383 
2384   // FP convert to lower precision, rounding to odd.
2385   void fcvtxn(const VRegister& vd, const VRegister& vn);
2386 
2387   // FP convert to lower precision, rounding to odd (second part).
2388   void fcvtxn2(const VRegister& vd, const VRegister& vn);
2389 
2390   // FP convert to signed integer, nearest with ties to away.
2391   void fcvtas(const Register& rd, const VRegister& vn);
2392 
2393   // FP convert to unsigned integer, nearest with ties to away.
2394   void fcvtau(const Register& rd, const VRegister& vn);
2395 
2396   // FP convert to signed integer, nearest with ties to away.
2397   void fcvtas(const VRegister& vd, const VRegister& vn);
2398 
2399   // FP convert to unsigned integer, nearest with ties to away.
2400   void fcvtau(const VRegister& vd, const VRegister& vn);
2401 
2402   // FP convert to signed integer, round towards -infinity.
2403   void fcvtms(const Register& rd, const VRegister& vn);
2404 
2405   // FP convert to unsigned integer, round towards -infinity.
2406   void fcvtmu(const Register& rd, const VRegister& vn);
2407 
2408   // FP convert to signed integer, round towards -infinity.
2409   void fcvtms(const VRegister& vd, const VRegister& vn);
2410 
2411   // FP convert to unsigned integer, round towards -infinity.
2412   void fcvtmu(const VRegister& vd, const VRegister& vn);
2413 
2414   // FP convert to signed integer, nearest with ties to even.
2415   void fcvtns(const Register& rd, const VRegister& vn);
2416 
2417   // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
2418   void fjcvtzs(const Register& rd, const VRegister& vn);
2419 
2420   // FP convert to unsigned integer, nearest with ties to even.
2421   void fcvtnu(const Register& rd, const VRegister& vn);
2422 
2423   // FP convert to signed integer, nearest with ties to even.
2424   void fcvtns(const VRegister& rd, const VRegister& vn);
2425 
2426   // FP convert to unsigned integer, nearest with ties to even.
2427   void fcvtnu(const VRegister& rd, const VRegister& vn);
2428 
2429   // FP convert to signed integer or fixed-point, round towards zero.
2430   void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2431 
2432   // FP convert to unsigned integer or fixed-point, round towards zero.
2433   void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2434 
2435   // FP convert to signed integer or fixed-point, round towards zero.
2436   void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2437 
2438   // FP convert to unsigned integer or fixed-point, round towards zero.
2439   void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2440 
2441   // FP convert to signed integer, round towards +infinity.
2442   void fcvtps(const Register& rd, const VRegister& vn);
2443 
2444   // FP convert to unsigned integer, round towards +infinity.
2445   void fcvtpu(const Register& rd, const VRegister& vn);
2446 
2447   // FP convert to signed integer, round towards +infinity.
2448   void fcvtps(const VRegister& vd, const VRegister& vn);
2449 
2450   // FP convert to unsigned integer, round towards +infinity.
2451   void fcvtpu(const VRegister& vd, const VRegister& vn);
2452 
2453   // Convert signed integer or fixed point to FP.
2454   void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2455 
2456   // Convert unsigned integer or fixed point to FP.
2457   void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2458 
2459   // Convert signed integer or fixed-point to FP.
2460   void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2461 
2462   // Convert unsigned integer or fixed-point to FP.
2463   void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2464 
2465   // Unsigned absolute difference.
2466   void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2467 
2468   // Signed absolute difference.
2469   void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2470 
2471   // Unsigned absolute difference and accumulate.
2472   void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2473 
2474   // Signed absolute difference and accumulate.
2475   void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2476 
2477   // Add.
2478   void add(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2479 
2480   // Subtract.
2481   void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2482 
2483   // Unsigned halving add.
2484   void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2485 
2486   // Signed halving add.
2487   void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2488 
2489   // Unsigned rounding halving add.
2490   void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2491 
2492   // Signed rounding halving add.
2493   void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2494 
2495   // Unsigned halving sub.
2496   void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2497 
2498   // Signed halving sub.
2499   void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2500 
2501   // Unsigned saturating add.
2502   void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2503 
2504   // Signed saturating add.
2505   void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2506 
2507   // Unsigned saturating subtract.
2508   void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2509 
2510   // Signed saturating subtract.
2511   void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2512 
2513   // Add pairwise.
2514   void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2515 
2516   // Add pair of elements scalar.
2517   void addp(const VRegister& vd, const VRegister& vn);
2518 
2519   // Multiply-add to accumulator.
2520   void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2521 
2522   // Multiply-subtract to accumulator.
2523   void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2524 
2525   // Multiply.
2526   void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2527 
2528   // Multiply by scalar element.
2529   void mul(const VRegister& vd,
2530            const VRegister& vn,
2531            const VRegister& vm,
2532            int vm_index);
2533 
2534   // Multiply-add by scalar element.
2535   void mla(const VRegister& vd,
2536            const VRegister& vn,
2537            const VRegister& vm,
2538            int vm_index);
2539 
2540   // Multiply-subtract by scalar element.
2541   void mls(const VRegister& vd,
2542            const VRegister& vn,
2543            const VRegister& vm,
2544            int vm_index);
2545 
2546   // Signed long multiply-add by scalar element.
2547   void smlal(const VRegister& vd,
2548              const VRegister& vn,
2549              const VRegister& vm,
2550              int vm_index);
2551 
2552   // Signed long multiply-add by scalar element (second part).
2553   void smlal2(const VRegister& vd,
2554               const VRegister& vn,
2555               const VRegister& vm,
2556               int vm_index);
2557 
2558   // Unsigned long multiply-add by scalar element.
2559   void umlal(const VRegister& vd,
2560              const VRegister& vn,
2561              const VRegister& vm,
2562              int vm_index);
2563 
2564   // Unsigned long multiply-add by scalar element (second part).
2565   void umlal2(const VRegister& vd,
2566               const VRegister& vn,
2567               const VRegister& vm,
2568               int vm_index);
2569 
2570   // Signed long multiply-sub by scalar element.
2571   void smlsl(const VRegister& vd,
2572              const VRegister& vn,
2573              const VRegister& vm,
2574              int vm_index);
2575 
2576   // Signed long multiply-sub by scalar element (second part).
2577   void smlsl2(const VRegister& vd,
2578               const VRegister& vn,
2579               const VRegister& vm,
2580               int vm_index);
2581 
2582   // Unsigned long multiply-sub by scalar element.
2583   void umlsl(const VRegister& vd,
2584              const VRegister& vn,
2585              const VRegister& vm,
2586              int vm_index);
2587 
2588   // Unsigned long multiply-sub by scalar element (second part).
2589   void umlsl2(const VRegister& vd,
2590               const VRegister& vn,
2591               const VRegister& vm,
2592               int vm_index);
2593 
2594   // Signed long multiply by scalar element.
2595   void smull(const VRegister& vd,
2596              const VRegister& vn,
2597              const VRegister& vm,
2598              int vm_index);
2599 
2600   // Signed long multiply by scalar element (second part).
2601   void smull2(const VRegister& vd,
2602               const VRegister& vn,
2603               const VRegister& vm,
2604               int vm_index);
2605 
2606   // Unsigned long multiply by scalar element.
2607   void umull(const VRegister& vd,
2608              const VRegister& vn,
2609              const VRegister& vm,
2610              int vm_index);
2611 
2612   // Unsigned long multiply by scalar element (second part).
2613   void umull2(const VRegister& vd,
2614               const VRegister& vn,
2615               const VRegister& vm,
2616               int vm_index);
2617 
2618   // Signed saturating double long multiply by element.
2619   void sqdmull(const VRegister& vd,
2620                const VRegister& vn,
2621                const VRegister& vm,
2622                int vm_index);
2623 
2624   // Signed saturating double long multiply by element (second part).
2625   void sqdmull2(const VRegister& vd,
2626                 const VRegister& vn,
2627                 const VRegister& vm,
2628                 int vm_index);
2629 
2630   // Signed saturating doubling long multiply-add by element.
2631   void sqdmlal(const VRegister& vd,
2632                const VRegister& vn,
2633                const VRegister& vm,
2634                int vm_index);
2635 
2636   // Signed saturating doubling long multiply-add by element (second part).
2637   void sqdmlal2(const VRegister& vd,
2638                 const VRegister& vn,
2639                 const VRegister& vm,
2640                 int vm_index);
2641 
2642   // Signed saturating doubling long multiply-sub by element.
2643   void sqdmlsl(const VRegister& vd,
2644                const VRegister& vn,
2645                const VRegister& vm,
2646                int vm_index);
2647 
2648   // Signed saturating doubling long multiply-sub by element (second part).
2649   void sqdmlsl2(const VRegister& vd,
2650                 const VRegister& vn,
2651                 const VRegister& vm,
2652                 int vm_index);
2653 
2654   // Compare equal.
2655   void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2656 
2657   // Compare signed greater than or equal.
2658   void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2659 
2660   // Compare signed greater than.
2661   void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2662 
2663   // Compare unsigned higher.
2664   void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2665 
2666   // Compare unsigned higher or same.
2667   void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2668 
2669   // Compare bitwise test bits nonzero.
2670   void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2671 
2672   // Compare bitwise to zero.
2673   void cmeq(const VRegister& vd, const VRegister& vn, int value);
2674 
2675   // Compare signed greater than or equal to zero.
2676   void cmge(const VRegister& vd, const VRegister& vn, int value);
2677 
2678   // Compare signed greater than zero.
2679   void cmgt(const VRegister& vd, const VRegister& vn, int value);
2680 
2681   // Compare signed less than or equal to zero.
2682   void cmle(const VRegister& vd, const VRegister& vn, int value);
2683 
2684   // Compare signed less than zero.
2685   void cmlt(const VRegister& vd, const VRegister& vn, int value);
2686 
2687   // Signed shift left by register.
2688   void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2689 
2690   // Unsigned shift left by register.
2691   void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2692 
2693   // Signed saturating shift left by register.
2694   void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2695 
2696   // Unsigned saturating shift left by register.
2697   void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2698 
2699   // Signed rounding shift left by register.
2700   void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2701 
2702   // Unsigned rounding shift left by register.
2703   void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2704 
2705   // Signed saturating rounding shift left by register.
2706   void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2707 
2708   // Unsigned saturating rounding shift left by register.
2709   void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2710 
2711   // Bitwise and.
2712   void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2713 
2714   // Bitwise or.
2715   void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2716 
2717   // Bitwise or immediate.
2718   void orr(const VRegister& vd, const int imm8, const int left_shift = 0);
2719 
2720   // Move register to register.
2721   void mov(const VRegister& vd, const VRegister& vn);
2722 
2723   // Bitwise orn.
2724   void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2725 
2726   // Bitwise eor.
2727   void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2728 
2729   // Bit clear immediate.
2730   void bic(const VRegister& vd, const int imm8, const int left_shift = 0);
2731 
2732   // Bit clear.
2733   void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2734 
2735   // Bitwise insert if false.
2736   void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2737 
2738   // Bitwise insert if true.
2739   void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2740 
2741   // Bitwise select.
2742   void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2743 
2744   // Polynomial multiply.
2745   void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2746 
2747   // Vector move immediate.
2748   void movi(const VRegister& vd,
2749             const uint64_t imm,
2750             Shift shift = LSL,
2751             const int shift_amount = 0);
2752 
2753   // Bitwise not.
2754   void mvn(const VRegister& vd, const VRegister& vn);
2755 
2756   // Vector move inverted immediate.
2757   void mvni(const VRegister& vd,
2758             const int imm8,
2759             Shift shift = LSL,
2760             const int shift_amount = 0);
2761 
2762   // Signed saturating accumulate of unsigned value.
2763   void suqadd(const VRegister& vd, const VRegister& vn);
2764 
2765   // Unsigned saturating accumulate of signed value.
2766   void usqadd(const VRegister& vd, const VRegister& vn);
2767 
2768   // Absolute value.
2769   void abs(const VRegister& vd, const VRegister& vn);
2770 
2771   // Signed saturating absolute value.
2772   void sqabs(const VRegister& vd, const VRegister& vn);
2773 
2774   // Negate.
2775   void neg(const VRegister& vd, const VRegister& vn);
2776 
2777   // Signed saturating negate.
2778   void sqneg(const VRegister& vd, const VRegister& vn);
2779 
2780   // Bitwise not.
2781   void not_(const VRegister& vd, const VRegister& vn);
2782 
2783   // Extract narrow.
2784   void xtn(const VRegister& vd, const VRegister& vn);
2785 
2786   // Extract narrow (second part).
2787   void xtn2(const VRegister& vd, const VRegister& vn);
2788 
2789   // Signed saturating extract narrow.
2790   void sqxtn(const VRegister& vd, const VRegister& vn);
2791 
2792   // Signed saturating extract narrow (second part).
2793   void sqxtn2(const VRegister& vd, const VRegister& vn);
2794 
2795   // Unsigned saturating extract narrow.
2796   void uqxtn(const VRegister& vd, const VRegister& vn);
2797 
2798   // Unsigned saturating extract narrow (second part).
2799   void uqxtn2(const VRegister& vd, const VRegister& vn);
2800 
2801   // Signed saturating extract unsigned narrow.
2802   void sqxtun(const VRegister& vd, const VRegister& vn);
2803 
2804   // Signed saturating extract unsigned narrow (second part).
2805   void sqxtun2(const VRegister& vd, const VRegister& vn);
2806 
2807   // Extract vector from pair of vectors.
2808   void ext(const VRegister& vd,
2809            const VRegister& vn,
2810            const VRegister& vm,
2811            int index);
2812 
2813   // Duplicate vector element to vector or scalar.
2814   void dup(const VRegister& vd, const VRegister& vn, int vn_index);
2815 
2816   // Move vector element to scalar.
2817   void mov(const VRegister& vd, const VRegister& vn, int vn_index);
2818 
2819   // Duplicate general-purpose register to vector.
2820   void dup(const VRegister& vd, const Register& rn);
2821 
2822   // Insert vector element from another vector element.
2823   void ins(const VRegister& vd,
2824            int vd_index,
2825            const VRegister& vn,
2826            int vn_index);
2827 
2828   // Move vector element to another vector element.
2829   void mov(const VRegister& vd,
2830            int vd_index,
2831            const VRegister& vn,
2832            int vn_index);
2833 
2834   // Insert vector element from general-purpose register.
2835   void ins(const VRegister& vd, int vd_index, const Register& rn);
2836 
2837   // Move general-purpose register to a vector element.
2838   void mov(const VRegister& vd, int vd_index, const Register& rn);
2839 
2840   // Unsigned move vector element to general-purpose register.
2841   void umov(const Register& rd, const VRegister& vn, int vn_index);
2842 
2843   // Move vector element to general-purpose register.
2844   void mov(const Register& rd, const VRegister& vn, int vn_index);
2845 
2846   // Signed move vector element to general-purpose register.
2847   void smov(const Register& rd, const VRegister& vn, int vn_index);
2848 
2849   // One-element structure load to one register.
2850   void ld1(const VRegister& vt, const MemOperand& src);
2851 
2852   // One-element structure load to two registers.
2853   void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2854 
2855   // One-element structure load to three registers.
2856   void ld1(const VRegister& vt,
2857            const VRegister& vt2,
2858            const VRegister& vt3,
2859            const MemOperand& src);
2860 
2861   // One-element structure load to four registers.
2862   void ld1(const VRegister& vt,
2863            const VRegister& vt2,
2864            const VRegister& vt3,
2865            const VRegister& vt4,
2866            const MemOperand& src);
2867 
2868   // One-element single structure load to one lane.
2869   void ld1(const VRegister& vt, int lane, const MemOperand& src);
2870 
2871   // One-element single structure load to all lanes.
2872   void ld1r(const VRegister& vt, const MemOperand& src);
2873 
2874   // Two-element structure load.
2875   void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2876 
2877   // Two-element single structure load to one lane.
2878   void ld2(const VRegister& vt,
2879            const VRegister& vt2,
2880            int lane,
2881            const MemOperand& src);
2882 
2883   // Two-element single structure load to all lanes.
2884   void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2885 
2886   // Three-element structure load.
2887   void ld3(const VRegister& vt,
2888            const VRegister& vt2,
2889            const VRegister& vt3,
2890            const MemOperand& src);
2891 
2892   // Three-element single structure load to one lane.
2893   void ld3(const VRegister& vt,
2894            const VRegister& vt2,
2895            const VRegister& vt3,
2896            int lane,
2897            const MemOperand& src);
2898 
2899   // Three-element single structure load to all lanes.
2900   void ld3r(const VRegister& vt,
2901             const VRegister& vt2,
2902             const VRegister& vt3,
2903             const MemOperand& src);
2904 
2905   // Four-element structure load.
2906   void ld4(const VRegister& vt,
2907            const VRegister& vt2,
2908            const VRegister& vt3,
2909            const VRegister& vt4,
2910            const MemOperand& src);
2911 
2912   // Four-element single structure load to one lane.
2913   void ld4(const VRegister& vt,
2914            const VRegister& vt2,
2915            const VRegister& vt3,
2916            const VRegister& vt4,
2917            int lane,
2918            const MemOperand& src);
2919 
2920   // Four-element single structure load to all lanes.
2921   void ld4r(const VRegister& vt,
2922             const VRegister& vt2,
2923             const VRegister& vt3,
2924             const VRegister& vt4,
2925             const MemOperand& src);
2926 
2927   // Count leading sign bits.
2928   void cls(const VRegister& vd, const VRegister& vn);
2929 
2930   // Count leading zero bits (vector).
2931   void clz(const VRegister& vd, const VRegister& vn);
2932 
2933   // Population count per byte.
2934   void cnt(const VRegister& vd, const VRegister& vn);
2935 
2936   // Reverse bit order.
2937   void rbit(const VRegister& vd, const VRegister& vn);
2938 
2939   // Reverse elements in 16-bit halfwords.
2940   void rev16(const VRegister& vd, const VRegister& vn);
2941 
2942   // Reverse elements in 32-bit words.
2943   void rev32(const VRegister& vd, const VRegister& vn);
2944 
2945   // Reverse elements in 64-bit doublewords.
2946   void rev64(const VRegister& vd, const VRegister& vn);
2947 
2948   // Unsigned reciprocal square root estimate.
2949   void ursqrte(const VRegister& vd, const VRegister& vn);
2950 
2951   // Unsigned reciprocal estimate.
2952   void urecpe(const VRegister& vd, const VRegister& vn);
2953 
2954   // Signed pairwise long add.
2955   void saddlp(const VRegister& vd, const VRegister& vn);
2956 
2957   // Unsigned pairwise long add.
2958   void uaddlp(const VRegister& vd, const VRegister& vn);
2959 
2960   // Signed pairwise long add and accumulate.
2961   void sadalp(const VRegister& vd, const VRegister& vn);
2962 
2963   // Unsigned pairwise long add and accumulate.
2964   void uadalp(const VRegister& vd, const VRegister& vn);
2965 
2966   // Shift left by immediate.
2967   void shl(const VRegister& vd, const VRegister& vn, int shift);
2968 
2969   // Signed saturating shift left by immediate.
2970   void sqshl(const VRegister& vd, const VRegister& vn, int shift);
2971 
2972   // Signed saturating shift left unsigned by immediate.
2973   void sqshlu(const VRegister& vd, const VRegister& vn, int shift);
2974 
2975   // Unsigned saturating shift left by immediate.
2976   void uqshl(const VRegister& vd, const VRegister& vn, int shift);
2977 
2978   // Signed shift left long by immediate.
2979   void sshll(const VRegister& vd, const VRegister& vn, int shift);
2980 
2981   // Signed shift left long by immediate (second part).
2982   void sshll2(const VRegister& vd, const VRegister& vn, int shift);
2983 
2984   // Signed extend long.
2985   void sxtl(const VRegister& vd, const VRegister& vn);
2986 
2987   // Signed extend long (second part).
2988   void sxtl2(const VRegister& vd, const VRegister& vn);
2989 
2990   // Unsigned shift left long by immediate.
2991   void ushll(const VRegister& vd, const VRegister& vn, int shift);
2992 
2993   // Unsigned shift left long by immediate (second part).
2994   void ushll2(const VRegister& vd, const VRegister& vn, int shift);
2995 
2996   // Shift left long by element size.
2997   void shll(const VRegister& vd, const VRegister& vn, int shift);
2998 
2999   // Shift left long by element size (second part).
3000   void shll2(const VRegister& vd, const VRegister& vn, int shift);
3001 
3002   // Unsigned extend long.
3003   void uxtl(const VRegister& vd, const VRegister& vn);
3004 
3005   // Unsigned extend long (second part).
3006   void uxtl2(const VRegister& vd, const VRegister& vn);
3007 
3008   // Shift left by immediate and insert.
3009   void sli(const VRegister& vd, const VRegister& vn, int shift);
3010 
3011   // Shift right by immediate and insert.
3012   void sri(const VRegister& vd, const VRegister& vn, int shift);
3013 
3014   // Signed maximum.
3015   void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3016 
3017   // Signed pairwise maximum.
3018   void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3019 
3020   // Add across vector.
3021   void addv(const VRegister& vd, const VRegister& vn);
3022 
3023   // Signed add long across vector.
3024   void saddlv(const VRegister& vd, const VRegister& vn);
3025 
3026   // Unsigned add long across vector.
3027   void uaddlv(const VRegister& vd, const VRegister& vn);
3028 
3029   // FP maximum number across vector.
3030   void fmaxnmv(const VRegister& vd, const VRegister& vn);
3031 
3032   // FP maximum across vector.
3033   void fmaxv(const VRegister& vd, const VRegister& vn);
3034 
3035   // FP minimum number across vector.
3036   void fminnmv(const VRegister& vd, const VRegister& vn);
3037 
3038   // FP minimum across vector.
3039   void fminv(const VRegister& vd, const VRegister& vn);
3040 
3041   // Signed maximum across vector.
3042   void smaxv(const VRegister& vd, const VRegister& vn);
3043 
3044   // Signed minimum.
3045   void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3046 
3047   // Signed minimum pairwise.
3048   void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3049 
3050   // Signed minimum across vector.
3051   void sminv(const VRegister& vd, const VRegister& vn);
3052 
3053   // One-element structure store from one register.
3054   void st1(const VRegister& vt, const MemOperand& src);
3055 
3056   // One-element structure store from two registers.
3057   void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3058 
3059   // One-element structure store from three registers.
3060   void st1(const VRegister& vt,
3061            const VRegister& vt2,
3062            const VRegister& vt3,
3063            const MemOperand& src);
3064 
3065   // One-element structure store from four registers.
3066   void st1(const VRegister& vt,
3067            const VRegister& vt2,
3068            const VRegister& vt3,
3069            const VRegister& vt4,
3070            const MemOperand& src);
3071 
3072   // One-element single structure store from one lane.
3073   void st1(const VRegister& vt, int lane, const MemOperand& src);
3074 
3075   // Two-element structure store from two registers.
3076   void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3077 
3078   // Two-element single structure store from two lanes.
3079   void st2(const VRegister& vt,
3080            const VRegister& vt2,
3081            int lane,
3082            const MemOperand& src);
3083 
3084   // Three-element structure store from three registers.
3085   void st3(const VRegister& vt,
3086            const VRegister& vt2,
3087            const VRegister& vt3,
3088            const MemOperand& src);
3089 
3090   // Three-element single structure store from three lanes.
3091   void st3(const VRegister& vt,
3092            const VRegister& vt2,
3093            const VRegister& vt3,
3094            int lane,
3095            const MemOperand& src);
3096 
3097   // Four-element structure store from four registers.
3098   void st4(const VRegister& vt,
3099            const VRegister& vt2,
3100            const VRegister& vt3,
3101            const VRegister& vt4,
3102            const MemOperand& src);
3103 
3104   // Four-element single structure store from four lanes.
3105   void st4(const VRegister& vt,
3106            const VRegister& vt2,
3107            const VRegister& vt3,
3108            const VRegister& vt4,
3109            int lane,
3110            const MemOperand& src);
3111 
3112   // Unsigned add long.
3113   void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3114 
3115   // Unsigned add long (second part).
3116   void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3117 
3118   // Unsigned add wide.
3119   void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3120 
3121   // Unsigned add wide (second part).
3122   void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3123 
3124   // Signed add long.
3125   void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3126 
3127   // Signed add long (second part).
3128   void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3129 
3130   // Signed add wide.
3131   void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3132 
3133   // Signed add wide (second part).
3134   void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3135 
3136   // Unsigned subtract long.
3137   void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3138 
3139   // Unsigned subtract long (second part).
3140   void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3141 
3142   // Unsigned subtract wide.
3143   void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3144 
3145   // Unsigned subtract wide (second part).
3146   void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3147 
3148   // Signed subtract long.
3149   void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3150 
3151   // Signed subtract long (second part).
3152   void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3153 
3154   // Signed integer subtract wide.
3155   void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3156 
3157   // Signed integer subtract wide (second part).
3158   void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3159 
3160   // Unsigned maximum.
3161   void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3162 
3163   // Unsigned pairwise maximum.
3164   void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3165 
3166   // Unsigned maximum across vector.
3167   void umaxv(const VRegister& vd, const VRegister& vn);
3168 
3169   // Unsigned minimum.
3170   void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3171 
3172   // Unsigned pairwise minimum.
3173   void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3174 
3175   // Unsigned minimum across vector.
3176   void uminv(const VRegister& vd, const VRegister& vn);
3177 
3178   // Transpose vectors (primary).
3179   void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3180 
3181   // Transpose vectors (secondary).
3182   void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3183 
3184   // Unzip vectors (primary).
3185   void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3186 
3187   // Unzip vectors (secondary).
3188   void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3189 
3190   // Zip vectors (primary).
3191   void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3192 
3193   // Zip vectors (secondary).
3194   void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3195 
3196   // Signed shift right by immediate.
3197   void sshr(const VRegister& vd, const VRegister& vn, int shift);
3198 
3199   // Unsigned shift right by immediate.
3200   void ushr(const VRegister& vd, const VRegister& vn, int shift);
3201 
3202   // Signed rounding shift right by immediate.
3203   void srshr(const VRegister& vd, const VRegister& vn, int shift);
3204 
3205   // Unsigned rounding shift right by immediate.
3206   void urshr(const VRegister& vd, const VRegister& vn, int shift);
3207 
3208   // Signed shift right by immediate and accumulate.
3209   void ssra(const VRegister& vd, const VRegister& vn, int shift);
3210 
3211   // Unsigned shift right by immediate and accumulate.
3212   void usra(const VRegister& vd, const VRegister& vn, int shift);
3213 
3214   // Signed rounding shift right by immediate and accumulate.
3215   void srsra(const VRegister& vd, const VRegister& vn, int shift);
3216 
3217   // Unsigned rounding shift right by immediate and accumulate.
3218   void ursra(const VRegister& vd, const VRegister& vn, int shift);
3219 
3220   // Shift right narrow by immediate.
3221   void shrn(const VRegister& vd, const VRegister& vn, int shift);
3222 
3223   // Shift right narrow by immediate (second part).
3224   void shrn2(const VRegister& vd, const VRegister& vn, int shift);
3225 
3226   // Rounding shift right narrow by immediate.
3227   void rshrn(const VRegister& vd, const VRegister& vn, int shift);
3228 
3229   // Rounding shift right narrow by immediate (second part).
3230   void rshrn2(const VRegister& vd, const VRegister& vn, int shift);
3231 
3232   // Unsigned saturating shift right narrow by immediate.
3233   void uqshrn(const VRegister& vd, const VRegister& vn, int shift);
3234 
3235   // Unsigned saturating shift right narrow by immediate (second part).
3236   void uqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3237 
3238   // Unsigned saturating rounding shift right narrow by immediate.
3239   void uqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3240 
3241   // Unsigned saturating rounding shift right narrow by immediate (second part).
3242   void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3243 
3244   // Signed saturating shift right narrow by immediate.
3245   void sqshrn(const VRegister& vd, const VRegister& vn, int shift);
3246 
3247   // Signed saturating shift right narrow by immediate (second part).
3248   void sqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3249 
3250   // Signed saturating rounded shift right narrow by immediate.
3251   void sqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3252 
3253   // Signed saturating rounded shift right narrow by immediate (second part).
3254   void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3255 
3256   // Signed saturating shift right unsigned narrow by immediate.
3257   void sqshrun(const VRegister& vd, const VRegister& vn, int shift);
3258 
3259   // Signed saturating shift right unsigned narrow by immediate (second part).
3260   void sqshrun2(const VRegister& vd, const VRegister& vn, int shift);
3261 
3262   // Signed sat rounded shift right unsigned narrow by immediate.
3263   void sqrshrun(const VRegister& vd, const VRegister& vn, int shift);
3264 
3265   // Signed sat rounded shift right unsigned narrow by immediate (second part).
3266   void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift);
3267 
3268   // FP reciprocal step.
3269   void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3270 
3271   // FP reciprocal estimate.
3272   void frecpe(const VRegister& vd, const VRegister& vn);
3273 
3274   // FP reciprocal square root estimate.
3275   void frsqrte(const VRegister& vd, const VRegister& vn);
3276 
3277   // FP reciprocal square root step.
3278   void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3279 
3280   // Signed absolute difference and accumulate long.
3281   void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3282 
3283   // Signed absolute difference and accumulate long (second part).
3284   void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3285 
3286   // Unsigned absolute difference and accumulate long.
3287   void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3288 
3289   // Unsigned absolute difference and accumulate long (second part).
3290   void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3291 
3292   // Signed absolute difference long.
3293   void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3294 
3295   // Signed absolute difference long (second part).
3296   void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3297 
3298   // Unsigned absolute difference long.
3299   void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3300 
3301   // Unsigned absolute difference long (second part).
3302   void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3303 
3304   // Polynomial multiply long.
3305   void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3306 
3307   // Polynomial multiply long (second part).
3308   void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3309 
3310   // Signed long multiply-add.
3311   void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3312 
3313   // Signed long multiply-add (second part).
3314   void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3315 
3316   // Unsigned long multiply-add.
3317   void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3318 
3319   // Unsigned long multiply-add (second part).
3320   void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3321 
3322   // Signed long multiply-sub.
3323   void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3324 
3325   // Signed long multiply-sub (second part).
3326   void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3327 
3328   // Unsigned long multiply-sub.
3329   void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3330 
3331   // Unsigned long multiply-sub (second part).
3332   void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3333 
3334   // Signed long multiply.
3335   void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3336 
3337   // Signed long multiply (second part).
3338   void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3339 
3340   // Signed saturating doubling long multiply-add.
3341   void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3342 
3343   // Signed saturating doubling long multiply-add (second part).
3344   void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3345 
3346   // Signed saturating doubling long multiply-subtract.
3347   void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3348 
3349   // Signed saturating doubling long multiply-subtract (second part).
3350   void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3351 
3352   // Signed saturating doubling long multiply.
3353   void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3354 
3355   // Signed saturating doubling long multiply (second part).
3356   void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3357 
3358   // Signed saturating doubling multiply returning high half.
3359   void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3360 
3361   // Signed saturating rounding doubling multiply returning high half.
3362   void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3363 
3364   // Signed dot product [Armv8.2].
3365   void sdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3366 
3367   // Signed saturating rounding doubling multiply accumulate returning high
3368   // half [Armv8.1].
3369   void sqrdmlah(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3370 
3371   // Unsigned dot product [Armv8.2].
3372   void udot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3373 
3374   // Dot Product with unsigned and signed integers (vector).
3375   void usdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3376 
3377   // Dot product with signed and unsigned integers (vector, by element).
3378   void sudot(const VRegister& vd,
3379              const VRegister& vn,
3380              const VRegister& vm,
3381              int vm_index);
3382 
3383   // Dot product with unsigned and signed integers (vector, by element).
3384   void usdot(const VRegister& vd,
3385              const VRegister& vn,
3386              const VRegister& vm,
3387              int vm_index);
3388 
3389   // Signed saturating rounding doubling multiply subtract returning high half
3390   // [Armv8.1].
3391   void sqrdmlsh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3392 
3393   // Signed saturating doubling multiply element returning high half.
3394   void sqdmulh(const VRegister& vd,
3395                const VRegister& vn,
3396                const VRegister& vm,
3397                int vm_index);
3398 
3399   // Signed saturating rounding doubling multiply element returning high half.
3400   void sqrdmulh(const VRegister& vd,
3401                 const VRegister& vn,
3402                 const VRegister& vm,
3403                 int vm_index);
3404 
3405   // Signed dot product by element [Armv8.2].
3406   void sdot(const VRegister& vd,
3407             const VRegister& vn,
3408             const VRegister& vm,
3409             int vm_index);
3410 
3411   // Signed saturating rounding doubling multiply accumulate element returning
3412   // high half [Armv8.1].
3413   void sqrdmlah(const VRegister& vd,
3414                 const VRegister& vn,
3415                 const VRegister& vm,
3416                 int vm_index);
3417 
3418   // Unsigned dot product by element [Armv8.2].
3419   void udot(const VRegister& vd,
3420             const VRegister& vn,
3421             const VRegister& vm,
3422             int vm_index);
3423 
3424   // Signed saturating rounding doubling multiply subtract element returning
3425   // high half [Armv8.1].
3426   void sqrdmlsh(const VRegister& vd,
3427                 const VRegister& vn,
3428                 const VRegister& vm,
3429                 int vm_index);
3430 
3431   // Unsigned long multiply long.
3432   void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3433 
3434   // Unsigned long multiply (second part).
3435   void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3436 
3437   // Add narrow returning high half.
3438   void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3439 
3440   // Add narrow returning high half (second part).
3441   void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3442 
3443   // Rounding add narrow returning high half.
3444   void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3445 
3446   // Rounding add narrow returning high half (second part).
3447   void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3448 
3449   // Subtract narrow returning high half.
3450   void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3451 
3452   // Subtract narrow returning high half (second part).
3453   void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3454 
3455   // Rounding subtract narrow returning high half.
3456   void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3457 
3458   // Rounding subtract narrow returning high half (second part).
3459   void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3460 
3461   // FP vector multiply accumulate.
3462   void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3463 
3464   // FP fused multiply-add long to accumulator.
3465   void fmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3466 
3467   // FP fused multiply-add long to accumulator (second part).
3468   void fmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3469 
3470   // FP fused multiply-add long to accumulator by element.
3471   void fmlal(const VRegister& vd,
3472              const VRegister& vn,
3473              const VRegister& vm,
3474              int vm_index);
3475 
3476   // FP fused multiply-add long to accumulator by element (second part).
3477   void fmlal2(const VRegister& vd,
3478               const VRegister& vn,
3479               const VRegister& vm,
3480               int vm_index);
3481 
3482   // FP vector multiply subtract.
3483   void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3484 
3485   // FP fused multiply-subtract long to accumulator.
3486   void fmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3487 
3488   // FP fused multiply-subtract long to accumulator (second part).
3489   void fmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3490 
3491   // FP fused multiply-subtract long to accumulator by element.
3492   void fmlsl(const VRegister& vd,
3493              const VRegister& vn,
3494              const VRegister& vm,
3495              int vm_index);
3496 
3497   // FP fused multiply-subtract long to accumulator by element (second part).
3498   void fmlsl2(const VRegister& vd,
3499               const VRegister& vn,
3500               const VRegister& vm,
3501               int vm_index);
3502 
3503   // FP vector multiply extended.
3504   void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3505 
3506   // FP absolute greater than or equal.
3507   void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3508 
3509   // FP absolute greater than.
3510   void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3511 
3512   // FP multiply by element.
3513   void fmul(const VRegister& vd,
3514             const VRegister& vn,
3515             const VRegister& vm,
3516             int vm_index);
3517 
3518   // FP fused multiply-add to accumulator by element.
3519   void fmla(const VRegister& vd,
3520             const VRegister& vn,
3521             const VRegister& vm,
3522             int vm_index);
3523 
3524   // FP fused multiply-sub from accumulator by element.
3525   void fmls(const VRegister& vd,
3526             const VRegister& vn,
3527             const VRegister& vm,
3528             int vm_index);
3529 
3530   // FP multiply extended by element.
3531   void fmulx(const VRegister& vd,
3532              const VRegister& vn,
3533              const VRegister& vm,
3534              int vm_index);
3535 
3536   // FP compare equal.
3537   void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3538 
3539   // FP greater than.
3540   void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3541 
3542   // FP greater than or equal.
3543   void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3544 
3545   // FP compare equal to zero.
3546   void fcmeq(const VRegister& vd, const VRegister& vn, double imm);
3547 
3548   // FP greater than zero.
3549   void fcmgt(const VRegister& vd, const VRegister& vn, double imm);
3550 
3551   // FP greater than or equal to zero.
3552   void fcmge(const VRegister& vd, const VRegister& vn, double imm);
3553 
3554   // FP less than or equal to zero.
3555   void fcmle(const VRegister& vd, const VRegister& vn, double imm);
3556 
3557   // FP less than to zero.
3558   void fcmlt(const VRegister& vd, const VRegister& vn, double imm);
3559 
3560   // FP absolute difference.
3561   void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3562 
3563   // FP pairwise add vector.
3564   void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3565 
3566   // FP pairwise add scalar.
3567   void faddp(const VRegister& vd, const VRegister& vn);
3568 
3569   // FP pairwise maximum vector.
3570   void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3571 
3572   // FP pairwise maximum scalar.
3573   void fmaxp(const VRegister& vd, const VRegister& vn);
3574 
3575   // FP pairwise minimum vector.
3576   void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3577 
3578   // FP pairwise minimum scalar.
3579   void fminp(const VRegister& vd, const VRegister& vn);
3580 
3581   // FP pairwise maximum number vector.
3582   void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3583 
3584   // FP pairwise maximum number scalar.
3585   void fmaxnmp(const VRegister& vd, const VRegister& vn);
3586 
3587   // FP pairwise minimum number vector.
3588   void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3589 
3590   // FP pairwise minimum number scalar.
3591   void fminnmp(const VRegister& vd, const VRegister& vn);
3592 
3593   // v8.3 complex numbers - note that these are only partial/helper functions
3594   // and must be used in series in order to perform full CN operations.
3595 
3596   // FP complex multiply accumulate (by element) [Armv8.3].
3597   void fcmla(const VRegister& vd,
3598              const VRegister& vn,
3599              const VRegister& vm,
3600              int vm_index,
3601              int rot);
3602 
3603   // FP complex multiply accumulate [Armv8.3].
3604   void fcmla(const VRegister& vd,
3605              const VRegister& vn,
3606              const VRegister& vm,
3607              int rot);
3608 
3609   // FP complex add [Armv8.3].
3610   void fcadd(const VRegister& vd,
3611              const VRegister& vn,
3612              const VRegister& vm,
3613              int rot);
3614 
3615   // Signed 8-bit integer matrix multiply-accumulate (vector).
3616   void smmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3617 
3618   // Unsigned and signed 8-bit integer matrix multiply-accumulate (vector).
3619   void usmmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3620 
3621   // Unsigned 8-bit integer matrix multiply-accumulate (vector).
3622   void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3623 
3624   // Bit Clear and exclusive-OR.
3625   void bcax(const VRegister& vd,
3626             const VRegister& vn,
3627             const VRegister& vm,
3628             const VRegister& va);
3629 
3630   // Three-way Exclusive-OR.
3631   void eor3(const VRegister& vd,
3632             const VRegister& vn,
3633             const VRegister& vm,
3634             const VRegister& va);
3635 
3636   // Exclusive-OR and Rotate.
3637   void xar(const VRegister& vd,
3638            const VRegister& vn,
3639            const VRegister& vm,
3640            int rotate);
3641 
3642   // Rotate and Exclusive-OR
3643   void rax1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3644 
3645   // SHA1 hash update (choose).
3646   void sha1c(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3647 
3648   // SHA1 fixed rotate.
3649   void sha1h(const VRegister& sd, const VRegister& sn);
3650 
3651   // SHA1 hash update (majority).
3652   void sha1m(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3653 
3654   // SHA1 hash update (parity).
3655   void sha1p(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3656 
3657   // SHA1 schedule update 0.
3658   void sha1su0(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3659 
3660   // SHA1 schedule update 1.
3661   void sha1su1(const VRegister& vd, const VRegister& vn);
3662 
3663   // SHA256 hash update (part 1).
3664   void sha256h(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3665 
3666   // SHA256 hash update (part 2).
3667   void sha256h2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3668 
3669   // SHA256 schedule update 0.
3670   void sha256su0(const VRegister& vd, const VRegister& vn);
3671 
3672   // SHA256 schedule update 1.
3673   void sha256su1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3674 
3675   // SHA512 hash update part 1.
3676   void sha512h(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3677 
3678   // SHA512 hash update part 2.
3679   void sha512h2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3680 
3681   // SHA512 schedule Update 0.
3682   void sha512su0(const VRegister& vd, const VRegister& vn);
3683 
3684   // SHA512 schedule Update 1.
3685   void sha512su1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3686 
3687   // AES single round decryption.
3688   void aesd(const VRegister& vd, const VRegister& vn);
3689 
3690   // AES single round encryption.
3691   void aese(const VRegister& vd, const VRegister& vn);
3692 
3693   // AES inverse mix columns.
3694   void aesimc(const VRegister& vd, const VRegister& vn);
3695 
3696   // AES mix columns.
3697   void aesmc(const VRegister& vd, const VRegister& vn);
3698 
3699   // SM3PARTW1.
3700   void sm3partw1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3701 
3702   // SM3PARTW2.
3703   void sm3partw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3704 
3705   // SM3SS1.
3706   void sm3ss1(const VRegister& vd,
3707               const VRegister& vn,
3708               const VRegister& vm,
3709               const VRegister& va);
3710 
3711   // SM3TT1A.
3712   void sm3tt1a(const VRegister& vd,
3713                const VRegister& vn,
3714                const VRegister& vm,
3715                int index);
3716 
3717   // SM3TT1B.
3718   void sm3tt1b(const VRegister& vd,
3719                const VRegister& vn,
3720                const VRegister& vm,
3721                int index);
3722 
3723   // SM3TT2A.
3724   void sm3tt2a(const VRegister& vd,
3725                const VRegister& vn,
3726                const VRegister& vm,
3727                int index);
3728 
3729   // SM3TT2B.
3730   void sm3tt2b(const VRegister& vd,
3731                const VRegister& vn,
3732                const VRegister& vm,
3733                int index);
3734 
3735   // SM4 Encode.
3736   void sm4e(const VRegister& vd, const VRegister& vn);
3737 
3738   // SM4 Key.
3739   void sm4ekey(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3740 
3741   // Scalable Vector Extensions.
3742 
3743   // Absolute value (predicated).
3744   void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3745 
3746   // Add vectors (predicated).
3747   void add(const ZRegister& zd,
3748            const PRegisterM& pg,
3749            const ZRegister& zn,
3750            const ZRegister& zm);
3751 
3752   // Add vectors (unpredicated).
3753   void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3754 
3755   // Add immediate (unpredicated).
3756   void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
3757 
3758   // Add multiple of predicate register size to scalar register.
3759   void addpl(const Register& xd, const Register& xn, int imm6);
3760 
3761   // Add multiple of vector register size to scalar register.
3762   void addvl(const Register& xd, const Register& xn, int imm6);
3763 
3764   // Compute vector address.
3765   void adr(const ZRegister& zd, const SVEMemOperand& addr);
3766 
3767   // Bitwise AND predicates.
3768   void and_(const PRegisterWithLaneSize& pd,
3769             const PRegisterZ& pg,
3770             const PRegisterWithLaneSize& pn,
3771             const PRegisterWithLaneSize& pm);
3772 
3773   // Bitwise AND vectors (predicated).
3774   void and_(const ZRegister& zd,
3775             const PRegisterM& pg,
3776             const ZRegister& zn,
3777             const ZRegister& zm);
3778 
3779   // Bitwise AND with immediate (unpredicated).
3780   void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3781 
3782   // Bitwise AND vectors (unpredicated).
3783   void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3784 
3785   // Bitwise AND predicates.
3786   void ands(const PRegisterWithLaneSize& pd,
3787             const PRegisterZ& pg,
3788             const PRegisterWithLaneSize& pn,
3789             const PRegisterWithLaneSize& pm);
3790 
3791   // Bitwise AND reduction to scalar.
3792   void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
3793 
3794   // Arithmetic shift right by immediate (predicated).
3795   void asr(const ZRegister& zd,
3796            const PRegisterM& pg,
3797            const ZRegister& zn,
3798            int shift);
3799 
3800   // Arithmetic shift right by 64-bit wide elements (predicated).
3801   void asr(const ZRegister& zd,
3802            const PRegisterM& pg,
3803            const ZRegister& zn,
3804            const ZRegister& zm);
3805 
3806   // Arithmetic shift right by immediate (unpredicated).
3807   void asr(const ZRegister& zd, const ZRegister& zn, int shift);
3808 
3809   // Arithmetic shift right by 64-bit wide elements (unpredicated).
3810   void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3811 
3812   // Arithmetic shift right for divide by immediate (predicated).
3813   void asrd(const ZRegister& zd,
3814             const PRegisterM& pg,
3815             const ZRegister& zn,
3816             int shift);
3817 
3818   // Reversed arithmetic shift right by vector (predicated).
3819   void asrr(const ZRegister& zd,
3820             const PRegisterM& pg,
3821             const ZRegister& zn,
3822             const ZRegister& zm);
3823 
3824   // Bitwise clear predicates.
3825   void bic(const PRegisterWithLaneSize& pd,
3826            const PRegisterZ& pg,
3827            const PRegisterWithLaneSize& pn,
3828            const PRegisterWithLaneSize& pm);
3829 
3830   // Bitwise clear vectors (predicated).
3831   void bic(const ZRegister& zd,
3832            const PRegisterM& pg,
3833            const ZRegister& zn,
3834            const ZRegister& zm);
3835 
3836   // Bitwise clear bits using immediate (unpredicated).
3837   void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3838 
3839   // Bitwise clear vectors (unpredicated).
3840   void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3841 
3842   // Bitwise clear predicates.
3843   void bics(const PRegisterWithLaneSize& pd,
3844             const PRegisterZ& pg,
3845             const PRegisterWithLaneSize& pn,
3846             const PRegisterWithLaneSize& pm);
3847 
3848   // Break after first true condition.
3849   void brka(const PRegisterWithLaneSize& pd,
3850             const PRegister& pg,
3851             const PRegisterWithLaneSize& pn);
3852 
3853   // Break after first true condition.
3854   void brkas(const PRegisterWithLaneSize& pd,
3855              const PRegisterZ& pg,
3856              const PRegisterWithLaneSize& pn);
3857 
3858   // Break before first true condition.
3859   void brkb(const PRegisterWithLaneSize& pd,
3860             const PRegister& pg,
3861             const PRegisterWithLaneSize& pn);
3862 
3863   // Break before first true condition.
3864   void brkbs(const PRegisterWithLaneSize& pd,
3865              const PRegisterZ& pg,
3866              const PRegisterWithLaneSize& pn);
3867 
3868   // Propagate break to next partition.
3869   void brkn(const PRegisterWithLaneSize& pd,
3870             const PRegisterZ& pg,
3871             const PRegisterWithLaneSize& pn,
3872             const PRegisterWithLaneSize& pm);
3873 
3874   // Propagate break to next partition.
3875   void brkns(const PRegisterWithLaneSize& pd,
3876              const PRegisterZ& pg,
3877              const PRegisterWithLaneSize& pn,
3878              const PRegisterWithLaneSize& pm);
3879 
3880   // Break after first true condition, propagating from previous partition.
3881   void brkpa(const PRegisterWithLaneSize& pd,
3882              const PRegisterZ& pg,
3883              const PRegisterWithLaneSize& pn,
3884              const PRegisterWithLaneSize& pm);
3885 
3886   // Break after first true condition, propagating from previous partition.
3887   void brkpas(const PRegisterWithLaneSize& pd,
3888               const PRegisterZ& pg,
3889               const PRegisterWithLaneSize& pn,
3890               const PRegisterWithLaneSize& pm);
3891 
3892   // Break before first true condition, propagating from previous partition.
3893   void brkpb(const PRegisterWithLaneSize& pd,
3894              const PRegisterZ& pg,
3895              const PRegisterWithLaneSize& pn,
3896              const PRegisterWithLaneSize& pm);
3897 
3898   // Break before first true condition, propagating from previous partition.
3899   void brkpbs(const PRegisterWithLaneSize& pd,
3900               const PRegisterZ& pg,
3901               const PRegisterWithLaneSize& pn,
3902               const PRegisterWithLaneSize& pm);
3903 
3904   // Conditionally extract element after last to general-purpose register.
3905   void clasta(const Register& rd,
3906               const PRegister& pg,
3907               const Register& rn,
3908               const ZRegister& zm);
3909 
3910   // Conditionally extract element after last to SIMD&FP scalar register.
3911   void clasta(const VRegister& vd,
3912               const PRegister& pg,
3913               const VRegister& vn,
3914               const ZRegister& zm);
3915 
3916   // Conditionally extract element after last to vector register.
3917   void clasta(const ZRegister& zd,
3918               const PRegister& pg,
3919               const ZRegister& zn,
3920               const ZRegister& zm);
3921 
3922   // Conditionally extract last element to general-purpose register.
3923   void clastb(const Register& rd,
3924               const PRegister& pg,
3925               const Register& rn,
3926               const ZRegister& zm);
3927 
3928   // Conditionally extract last element to SIMD&FP scalar register.
3929   void clastb(const VRegister& vd,
3930               const PRegister& pg,
3931               const VRegister& vn,
3932               const ZRegister& zm);
3933 
3934   // Conditionally extract last element to vector register.
3935   void clastb(const ZRegister& zd,
3936               const PRegister& pg,
3937               const ZRegister& zn,
3938               const ZRegister& zm);
3939 
3940   // Count leading sign bits (predicated).
3941   void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3942 
3943   // Count leading zero bits (predicated).
3944   void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3945 
3946   void cmp(Condition cond,
3947            const PRegisterWithLaneSize& pd,
3948            const PRegisterZ& pg,
3949            const ZRegister& zn,
3950            const ZRegister& zm);
3951 
3952   // Compare vector to 64-bit wide elements.
3953   void cmpeq(const PRegisterWithLaneSize& pd,
3954              const PRegisterZ& pg,
3955              const ZRegister& zn,
3956              const ZRegister& zm);
3957 
3958   // Compare vector to immediate.
3959   void cmpeq(const PRegisterWithLaneSize& pd,
3960              const PRegisterZ& pg,
3961              const ZRegister& zn,
3962              int imm5);
3963 
3964   // Compare vector to 64-bit wide elements.
3965   void cmpge(const PRegisterWithLaneSize& pd,
3966              const PRegisterZ& pg,
3967              const ZRegister& zn,
3968              const ZRegister& zm);
3969 
3970   // Compare vector to immediate.
3971   void cmpge(const PRegisterWithLaneSize& pd,
3972              const PRegisterZ& pg,
3973              const ZRegister& zn,
3974              int imm5);
3975 
3976   // Compare vector to 64-bit wide elements.
3977   void cmpgt(const PRegisterWithLaneSize& pd,
3978              const PRegisterZ& pg,
3979              const ZRegister& zn,
3980              const ZRegister& zm);
3981 
3982   // Compare vector to immediate.
3983   void cmpgt(const PRegisterWithLaneSize& pd,
3984              const PRegisterZ& pg,
3985              const ZRegister& zn,
3986              int imm5);
3987 
3988   // Compare vector to 64-bit wide elements.
3989   void cmphi(const PRegisterWithLaneSize& pd,
3990              const PRegisterZ& pg,
3991              const ZRegister& zn,
3992              const ZRegister& zm);
3993 
3994   // Compare vector to immediate.
3995   void cmphi(const PRegisterWithLaneSize& pd,
3996              const PRegisterZ& pg,
3997              const ZRegister& zn,
3998              unsigned imm7);
3999 
4000   // Compare vector to 64-bit wide elements.
4001   void cmphs(const PRegisterWithLaneSize& pd,
4002              const PRegisterZ& pg,
4003              const ZRegister& zn,
4004              const ZRegister& zm);
4005 
4006   // Compare vector to immediate.
4007   void cmphs(const PRegisterWithLaneSize& pd,
4008              const PRegisterZ& pg,
4009              const ZRegister& zn,
4010              unsigned imm7);
4011 
4012   // Compare vector to 64-bit wide elements.
4013   void cmple(const PRegisterWithLaneSize& pd,
4014              const PRegisterZ& pg,
4015              const ZRegister& zn,
4016              const ZRegister& zm);
4017 
4018   // Compare vector to immediate.
4019   void cmple(const PRegisterWithLaneSize& pd,
4020              const PRegisterZ& pg,
4021              const ZRegister& zn,
4022              int imm5);
4023 
4024   // Compare vector to 64-bit wide elements.
4025   void cmplo(const PRegisterWithLaneSize& pd,
4026              const PRegisterZ& pg,
4027              const ZRegister& zn,
4028              const ZRegister& zm);
4029 
4030   // Compare vector to immediate.
4031   void cmplo(const PRegisterWithLaneSize& pd,
4032              const PRegisterZ& pg,
4033              const ZRegister& zn,
4034              unsigned imm7);
4035 
4036   // Compare vector to 64-bit wide elements.
4037   void cmpls(const PRegisterWithLaneSize& pd,
4038              const PRegisterZ& pg,
4039              const ZRegister& zn,
4040              const ZRegister& zm);
4041 
4042   // Compare vector to immediate.
4043   void cmpls(const PRegisterWithLaneSize& pd,
4044              const PRegisterZ& pg,
4045              const ZRegister& zn,
4046              unsigned imm7);
4047 
4048   // Compare vector to 64-bit wide elements.
4049   void cmplt(const PRegisterWithLaneSize& pd,
4050              const PRegisterZ& pg,
4051              const ZRegister& zn,
4052              const ZRegister& zm);
4053 
4054   // Compare vector to immediate.
4055   void cmplt(const PRegisterWithLaneSize& pd,
4056              const PRegisterZ& pg,
4057              const ZRegister& zn,
4058              int imm5);
4059 
4060   // Compare vector to 64-bit wide elements.
4061   void cmpne(const PRegisterWithLaneSize& pd,
4062              const PRegisterZ& pg,
4063              const ZRegister& zn,
4064              const ZRegister& zm);
4065 
4066   // Compare vector to immediate.
4067   void cmpne(const PRegisterWithLaneSize& pd,
4068              const PRegisterZ& pg,
4069              const ZRegister& zn,
4070              int imm5);
4071 
4072   // Logically invert boolean condition in vector (predicated).
4073   void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4074 
4075   // Count non-zero bits (predicated).
4076   void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4077 
4078   // Set scalar to multiple of predicate constraint element count.
4079   void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
4080 
4081   // Set scalar to multiple of predicate constraint element count.
4082   void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
4083 
4084   // Set scalar to multiple of predicate constraint element count.
4085   void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
4086 
4087   // Set scalar to active predicate element count.
4088   void cntp(const Register& xd,
4089             const PRegister& pg,
4090             const PRegisterWithLaneSize& pn);
4091 
4092   // Set scalar to multiple of predicate constraint element count.
4093   void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
4094 
4095   // Shuffle active elements of vector to the right and fill with zero.
4096   void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
4097 
4098   // Copy signed integer immediate to vector elements (predicated).
4099   void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
4100 
4101   // Copy general-purpose register to vector elements (predicated).
4102   void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
4103 
4104   // Copy SIMD&FP scalar register to vector elements (predicated).
4105   void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
4106 
4107   // Compare and terminate loop.
4108   void ctermeq(const Register& rn, const Register& rm);
4109 
4110   // Compare and terminate loop.
4111   void ctermne(const Register& rn, const Register& rm);
4112 
4113   // Decrement scalar by multiple of predicate constraint element count.
4114   void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4115 
4116   // Decrement scalar by multiple of predicate constraint element count.
4117   void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4118 
4119   // Decrement vector by multiple of predicate constraint element count.
4120   void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4121 
4122   // Decrement scalar by multiple of predicate constraint element count.
4123   void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4124 
4125   // Decrement vector by multiple of predicate constraint element count.
4126   void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4127 
4128   // Decrement scalar by active predicate element count.
4129   void decp(const Register& rdn, const PRegisterWithLaneSize& pg);
4130 
4131   // Decrement vector by active predicate element count.
4132   void decp(const ZRegister& zdn, const PRegister& pg);
4133 
4134   // Decrement scalar by multiple of predicate constraint element count.
4135   void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4136 
4137   // Decrement vector by multiple of predicate constraint element count.
4138   void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4139 
4140   // Broadcast general-purpose register to vector elements (unpredicated).
4141   void dup(const ZRegister& zd, const Register& xn);
4142 
4143   // Broadcast indexed element to vector (unpredicated).
4144   void dup(const ZRegister& zd, const ZRegister& zn, unsigned index);
4145 
4146   // As for movz/movk/movn, if the default shift of -1 is specified to dup, the
4147   // assembler will pick an appropriate immediate and left shift that is
4148   // equivalent to the immediate argument. If an explicit left shift is
4149   // specified (0 or 8), the immediate must be a signed 8-bit integer.
4150 
4151   // Broadcast signed immediate to vector elements (unpredicated).
4152   void dup(const ZRegister& zd, int imm8, int shift = -1);
4153 
4154   // Broadcast logical bitmask immediate to vector (unpredicated).
4155   void dupm(const ZRegister& zd, uint64_t imm);
4156 
4157   // Bitwise exclusive OR with inverted immediate (unpredicated).
4158   void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4159 
4160   // Bitwise exclusive OR predicates.
4161   void eor(const PRegisterWithLaneSize& pd,
4162            const PRegisterZ& pg,
4163            const PRegisterWithLaneSize& pn,
4164            const PRegisterWithLaneSize& pm);
4165 
4166   // Bitwise exclusive OR vectors (predicated).
4167   void eor(const ZRegister& zd,
4168            const PRegisterM& pg,
4169            const ZRegister& zn,
4170            const ZRegister& zm);
4171 
4172   // Bitwise exclusive OR with immediate (unpredicated).
4173   void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4174 
4175   // Bitwise exclusive OR vectors (unpredicated).
4176   void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4177 
4178   // Bitwise exclusive OR predicates.
4179   void eors(const PRegisterWithLaneSize& pd,
4180             const PRegisterZ& pg,
4181             const PRegisterWithLaneSize& pn,
4182             const PRegisterWithLaneSize& pm);
4183 
4184   // Bitwise XOR reduction to scalar.
4185   void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4186 
4187   // Extract vector from pair of vectors.
4188   void ext(const ZRegister& zd,
4189            const ZRegister& zn,
4190            const ZRegister& zm,
4191            unsigned offset);
4192 
4193   // Floating-point absolute difference (predicated).
4194   void fabd(const ZRegister& zd,
4195             const PRegisterM& pg,
4196             const ZRegister& zn,
4197             const ZRegister& zm);
4198 
4199   // Floating-point absolute value (predicated).
4200   void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4201 
4202   // Floating-point absolute compare vectors.
4203   void facge(const PRegisterWithLaneSize& pd,
4204              const PRegisterZ& pg,
4205              const ZRegister& zn,
4206              const ZRegister& zm);
4207 
4208   // Floating-point absolute compare vectors.
4209   void facgt(const PRegisterWithLaneSize& pd,
4210              const PRegisterZ& pg,
4211              const ZRegister& zn,
4212              const ZRegister& zm);
4213 
4214   // Floating-point add immediate (predicated).
4215   void fadd(const ZRegister& zd,
4216             const PRegisterM& pg,
4217             const ZRegister& zn,
4218             double imm);
4219 
4220   // Floating-point add vector (predicated).
4221   void fadd(const ZRegister& zd,
4222             const PRegisterM& pg,
4223             const ZRegister& zn,
4224             const ZRegister& zm);
4225 
4226   // Floating-point add vector (unpredicated).
4227   void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4228 
4229   // Floating-point add strictly-ordered reduction, accumulating in scalar.
4230   void fadda(const VRegister& vd,
4231              const PRegister& pg,
4232              const VRegister& vn,
4233              const ZRegister& zm);
4234 
4235   // Floating-point add recursive reduction to scalar.
4236   void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4237 
4238   // Floating-point complex add with rotate (predicated).
4239   void fcadd(const ZRegister& zd,
4240              const PRegisterM& pg,
4241              const ZRegister& zn,
4242              const ZRegister& zm,
4243              int rot);
4244 
4245   // Floating-point compare vector with zero.
4246   void fcmeq(const PRegisterWithLaneSize& pd,
4247              const PRegisterZ& pg,
4248              const ZRegister& zn,
4249              double zero);
4250 
4251   // Floating-point compare vectors.
4252   void fcmeq(const PRegisterWithLaneSize& pd,
4253              const PRegisterZ& pg,
4254              const ZRegister& zn,
4255              const ZRegister& zm);
4256 
4257   // Floating-point compare vector with zero.
4258   void fcmge(const PRegisterWithLaneSize& pd,
4259              const PRegisterZ& pg,
4260              const ZRegister& zn,
4261              double zero);
4262 
4263   // Floating-point compare vectors.
4264   void fcmge(const PRegisterWithLaneSize& pd,
4265              const PRegisterZ& pg,
4266              const ZRegister& zn,
4267              const ZRegister& zm);
4268 
4269   // Floating-point compare vector with zero.
4270   void fcmgt(const PRegisterWithLaneSize& pd,
4271              const PRegisterZ& pg,
4272              const ZRegister& zn,
4273              double zero);
4274 
4275   // Floating-point compare vectors.
4276   void fcmgt(const PRegisterWithLaneSize& pd,
4277              const PRegisterZ& pg,
4278              const ZRegister& zn,
4279              const ZRegister& zm);
4280 
4281   // Floating-point complex multiply-add with rotate (predicated).
4282   void fcmla(const ZRegister& zda,
4283              const PRegisterM& pg,
4284              const ZRegister& zn,
4285              const ZRegister& zm,
4286              int rot);
4287 
4288   // Floating-point complex multiply-add by indexed values with rotate.
4289   void fcmla(const ZRegister& zda,
4290              const ZRegister& zn,
4291              const ZRegister& zm,
4292              int index,
4293              int rot);
4294 
4295   // Floating-point compare vector with zero.
4296   void fcmle(const PRegisterWithLaneSize& pd,
4297              const PRegisterZ& pg,
4298              const ZRegister& zn,
4299              double zero);
4300 
4301   // Floating-point compare vector with zero.
4302   void fcmlt(const PRegisterWithLaneSize& pd,
4303              const PRegisterZ& pg,
4304              const ZRegister& zn,
4305              double zero);
4306 
4307   // Floating-point compare vector with zero.
4308   void fcmne(const PRegisterWithLaneSize& pd,
4309              const PRegisterZ& pg,
4310              const ZRegister& zn,
4311              double zero);
4312 
4313   // Floating-point compare vectors.
4314   void fcmne(const PRegisterWithLaneSize& pd,
4315              const PRegisterZ& pg,
4316              const ZRegister& zn,
4317              const ZRegister& zm);
4318 
4319   // Floating-point compare vectors.
4320   void fcmuo(const PRegisterWithLaneSize& pd,
4321              const PRegisterZ& pg,
4322              const ZRegister& zn,
4323              const ZRegister& zm);
4324 
4325   // Copy floating-point immediate to vector elements (predicated).
4326   void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm);
4327 
4328   // Copy half-precision floating-point immediate to vector elements
4329   // (predicated).
fcpy(const ZRegister & zd,const PRegisterM & pg,Float16 imm)4330   void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) {
4331     fcpy(zd, pg, FPToDouble(imm, kIgnoreDefaultNaN));
4332   }
4333 
4334   // Floating-point convert precision (predicated).
4335   void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4336 
4337   // Floating-point convert to signed integer, rounding toward zero
4338   // (predicated).
4339   void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4340 
4341   // Floating-point convert to unsigned integer, rounding toward zero
4342   // (predicated).
4343   void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4344 
4345   // Floating-point divide by vector (predicated).
4346   void fdiv(const ZRegister& zd,
4347             const PRegisterM& pg,
4348             const ZRegister& zn,
4349             const ZRegister& zm);
4350 
4351   // Floating-point reversed divide by vector (predicated).
4352   void fdivr(const ZRegister& zd,
4353              const PRegisterM& pg,
4354              const ZRegister& zn,
4355              const ZRegister& zm);
4356 
4357   // Broadcast floating-point immediate to vector elements.
4358   void fdup(const ZRegister& zd, double imm);
4359 
4360   // Broadcast half-precision floating-point immediate to vector elements.
fdup(const ZRegister & zd,Float16 imm)4361   void fdup(const ZRegister& zd, Float16 imm) {
4362     fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
4363   }
4364 
4365   // Floating-point exponential accelerator.
4366   void fexpa(const ZRegister& zd, const ZRegister& zn);
4367 
4368   // Floating-point fused multiply-add vectors (predicated), writing
4369   // multiplicand [Zdn = Za + Zdn * Zm].
4370   void fmad(const ZRegister& zdn,
4371             const PRegisterM& pg,
4372             const ZRegister& zm,
4373             const ZRegister& za);
4374 
4375   // Floating-point maximum with immediate (predicated).
4376   void fmax(const ZRegister& zd,
4377             const PRegisterM& pg,
4378             const ZRegister& zn,
4379             double imm);
4380 
4381   // Floating-point maximum (predicated).
4382   void fmax(const ZRegister& zd,
4383             const PRegisterM& pg,
4384             const ZRegister& zn,
4385             const ZRegister& zm);
4386 
4387   // Floating-point maximum number with immediate (predicated).
4388   void fmaxnm(const ZRegister& zd,
4389               const PRegisterM& pg,
4390               const ZRegister& zn,
4391               double imm);
4392 
4393   // Floating-point maximum number (predicated).
4394   void fmaxnm(const ZRegister& zd,
4395               const PRegisterM& pg,
4396               const ZRegister& zn,
4397               const ZRegister& zm);
4398 
4399   // Floating-point maximum number recursive reduction to scalar.
4400   void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4401 
4402   // Floating-point maximum recursive reduction to scalar.
4403   void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4404 
4405   // Floating-point minimum with immediate (predicated).
4406   void fmin(const ZRegister& zd,
4407             const PRegisterM& pg,
4408             const ZRegister& zn,
4409             double imm);
4410 
4411   // Floating-point minimum (predicated).
4412   void fmin(const ZRegister& zd,
4413             const PRegisterM& pg,
4414             const ZRegister& zn,
4415             const ZRegister& zm);
4416 
4417   // Floating-point minimum number with immediate (predicated).
4418   void fminnm(const ZRegister& zd,
4419               const PRegisterM& pg,
4420               const ZRegister& zn,
4421               double imm);
4422 
4423   // Floating-point minimum number (predicated).
4424   void fminnm(const ZRegister& zd,
4425               const PRegisterM& pg,
4426               const ZRegister& zn,
4427               const ZRegister& zm);
4428 
4429   // Floating-point minimum number recursive reduction to scalar.
4430   void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4431 
4432   // Floating-point minimum recursive reduction to scalar.
4433   void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4434 
4435   // Floating-point fused multiply-add vectors (predicated), writing addend
4436   // [Zda = Zda + Zn * Zm].
4437   void fmla(const ZRegister& zda,
4438             const PRegisterM& pg,
4439             const ZRegister& zn,
4440             const ZRegister& zm);
4441 
4442   // Floating-point fused multiply-add by indexed elements
4443   // (Zda = Zda + Zn * Zm[indexed]).
4444   void fmla(const ZRegister& zda,
4445             const ZRegister& zn,
4446             const ZRegister& zm,
4447             int index);
4448 
4449   // Floating-point fused multiply-subtract vectors (predicated), writing
4450   // addend [Zda = Zda + -Zn * Zm].
4451   void fmls(const ZRegister& zda,
4452             const PRegisterM& pg,
4453             const ZRegister& zn,
4454             const ZRegister& zm);
4455 
4456   // Floating-point fused multiply-subtract by indexed elements
4457   // (Zda = Zda + -Zn * Zm[indexed]).
4458   void fmls(const ZRegister& zda,
4459             const ZRegister& zn,
4460             const ZRegister& zm,
4461             int index);
4462 
4463   // Move 8-bit floating-point immediate to vector elements (unpredicated).
4464   void fmov(const ZRegister& zd, double imm);
4465 
4466   // Move 8-bit floating-point immediate to vector elements (predicated).
4467   void fmov(const ZRegister& zd, const PRegisterM& pg, double imm);
4468 
4469   // Floating-point fused multiply-subtract vectors (predicated), writing
4470   // multiplicand [Zdn = Za + -Zdn * Zm].
4471   void fmsb(const ZRegister& zdn,
4472             const PRegisterM& pg,
4473             const ZRegister& zm,
4474             const ZRegister& za);
4475 
4476   // Floating-point multiply by immediate (predicated).
4477   void fmul(const ZRegister& zd,
4478             const PRegisterM& pg,
4479             const ZRegister& zn,
4480             double imm);
4481 
4482   // Floating-point multiply vectors (predicated).
4483   void fmul(const ZRegister& zd,
4484             const PRegisterM& pg,
4485             const ZRegister& zn,
4486             const ZRegister& zm);
4487 
4488   // Floating-point multiply by indexed elements.
4489   void fmul(const ZRegister& zd,
4490             const ZRegister& zn,
4491             const ZRegister& zm,
4492             unsigned index);
4493 
4494   // Floating-point multiply vectors (unpredicated).
4495   void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4496 
4497   // Floating-point multiply-extended vectors (predicated).
4498   void fmulx(const ZRegister& zd,
4499              const PRegisterM& pg,
4500              const ZRegister& zn,
4501              const ZRegister& zm);
4502 
4503   // Floating-point negate (predicated).
4504   void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4505 
4506   // Floating-point negated fused multiply-add vectors (predicated), writing
4507   // multiplicand [Zdn = -Za + -Zdn * Zm].
4508   void fnmad(const ZRegister& zdn,
4509              const PRegisterM& pg,
4510              const ZRegister& zm,
4511              const ZRegister& za);
4512 
4513   // Floating-point negated fused multiply-add vectors (predicated), writing
4514   // addend [Zda = -Zda + -Zn * Zm].
4515   void fnmla(const ZRegister& zda,
4516              const PRegisterM& pg,
4517              const ZRegister& zn,
4518              const ZRegister& zm);
4519 
4520   // Floating-point negated fused multiply-subtract vectors (predicated),
4521   // writing addend [Zda = -Zda + Zn * Zm].
4522   void fnmls(const ZRegister& zda,
4523              const PRegisterM& pg,
4524              const ZRegister& zn,
4525              const ZRegister& zm);
4526 
4527   // Floating-point negated fused multiply-subtract vectors (predicated),
4528   // writing multiplicand [Zdn = -Za + Zdn * Zm].
4529   void fnmsb(const ZRegister& zdn,
4530              const PRegisterM& pg,
4531              const ZRegister& zm,
4532              const ZRegister& za);
4533 
4534   // Floating-point reciprocal estimate (unpredicated).
4535   void frecpe(const ZRegister& zd, const ZRegister& zn);
4536 
4537   // Floating-point reciprocal step (unpredicated).
4538   void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4539 
4540   // Floating-point reciprocal exponent (predicated).
4541   void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4542 
4543   // Floating-point round to integral value (predicated).
4544   void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4545 
4546   // Floating-point round to integral value (predicated).
4547   void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4548 
4549   // Floating-point round to integral value (predicated).
4550   void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4551 
4552   // Floating-point round to integral value (predicated).
4553   void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4554 
4555   // Floating-point round to integral value (predicated).
4556   void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4557 
4558   // Floating-point round to integral value (predicated).
4559   void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4560 
4561   // Floating-point round to integral value (predicated).
4562   void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4563 
4564   // Floating-point reciprocal square root estimate (unpredicated).
4565   void frsqrte(const ZRegister& zd, const ZRegister& zn);
4566 
4567   // Floating-point reciprocal square root step (unpredicated).
4568   void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4569 
4570   // Floating-point adjust exponent by vector (predicated).
4571   void fscale(const ZRegister& zd,
4572               const PRegisterM& pg,
4573               const ZRegister& zn,
4574               const ZRegister& zm);
4575 
4576   // Floating-point square root (predicated).
4577   void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4578 
4579   // Floating-point subtract immediate (predicated).
4580   void fsub(const ZRegister& zd,
4581             const PRegisterM& pg,
4582             const ZRegister& zn,
4583             double imm);
4584 
4585   // Floating-point subtract vectors (predicated).
4586   void fsub(const ZRegister& zd,
4587             const PRegisterM& pg,
4588             const ZRegister& zn,
4589             const ZRegister& zm);
4590 
4591   // Floating-point subtract vectors (unpredicated).
4592   void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4593 
4594   // Floating-point reversed subtract from immediate (predicated).
4595   void fsubr(const ZRegister& zd,
4596              const PRegisterM& pg,
4597              const ZRegister& zn,
4598              double imm);
4599 
4600   // Floating-point reversed subtract vectors (predicated).
4601   void fsubr(const ZRegister& zd,
4602              const PRegisterM& pg,
4603              const ZRegister& zn,
4604              const ZRegister& zm);
4605 
4606   // Floating-point trigonometric multiply-add coefficient.
4607   void ftmad(const ZRegister& zd,
4608              const ZRegister& zn,
4609              const ZRegister& zm,
4610              int imm3);
4611 
4612   // Floating-point trigonometric starting value.
4613   void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4614 
4615   // Floating-point trigonometric select coefficient.
4616   void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4617 
4618   // Increment scalar by multiple of predicate constraint element count.
4619   void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4620 
4621   // Increment scalar by multiple of predicate constraint element count.
4622   void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4623 
4624   // Increment vector by multiple of predicate constraint element count.
4625   void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4626 
4627   // Increment scalar by multiple of predicate constraint element count.
4628   void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4629 
4630   // Increment vector by multiple of predicate constraint element count.
4631   void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4632 
4633   // Increment scalar by active predicate element count.
4634   void incp(const Register& rdn, const PRegisterWithLaneSize& pg);
4635 
4636   // Increment vector by active predicate element count.
4637   void incp(const ZRegister& zdn, const PRegister& pg);
4638 
4639   // Increment scalar by multiple of predicate constraint element count.
4640   void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4641 
4642   // Increment vector by multiple of predicate constraint element count.
4643   void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4644 
4645   // Create index starting from and incremented by immediate.
4646   void index(const ZRegister& zd, int start, int step);
4647 
4648   // Create index starting from and incremented by general-purpose register.
4649   void index(const ZRegister& zd, const Register& rn, const Register& rm);
4650 
4651   // Create index starting from general-purpose register and incremented by
4652   // immediate.
4653   void index(const ZRegister& zd, const Register& rn, int imm5);
4654 
4655   // Create index starting from immediate and incremented by general-purpose
4656   // register.
4657   void index(const ZRegister& zd, int imm5, const Register& rm);
4658 
4659   // Insert general-purpose register in shifted vector.
4660   void insr(const ZRegister& zdn, const Register& rm);
4661 
4662   // Insert SIMD&FP scalar register in shifted vector.
4663   void insr(const ZRegister& zdn, const VRegister& vm);
4664 
4665   // Extract element after last to general-purpose register.
4666   void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn);
4667 
4668   // Extract element after last to SIMD&FP scalar register.
4669   void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4670 
4671   // Extract last element to general-purpose register.
4672   void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn);
4673 
4674   // Extract last element to SIMD&FP scalar register.
4675   void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4676 
4677   // Contiguous/gather load bytes to vector.
4678   void ld1b(const ZRegister& zt,
4679             const PRegisterZ& pg,
4680             const SVEMemOperand& addr);
4681 
4682   // Contiguous/gather load halfwords to vector.
4683   void ld1h(const ZRegister& zt,
4684             const PRegisterZ& pg,
4685             const SVEMemOperand& addr);
4686 
4687   // Contiguous/gather load words to vector.
4688   void ld1w(const ZRegister& zt,
4689             const PRegisterZ& pg,
4690             const SVEMemOperand& addr);
4691 
4692   // Contiguous/gather load doublewords to vector.
4693   void ld1d(const ZRegister& zt,
4694             const PRegisterZ& pg,
4695             const SVEMemOperand& addr);
4696 
4697   // TODO: Merge other loads into the SVEMemOperand versions.
4698 
4699   // Load and broadcast unsigned byte to vector.
4700   void ld1rb(const ZRegister& zt,
4701              const PRegisterZ& pg,
4702              const SVEMemOperand& addr);
4703 
4704   // Load and broadcast unsigned halfword to vector.
4705   void ld1rh(const ZRegister& zt,
4706              const PRegisterZ& pg,
4707              const SVEMemOperand& addr);
4708 
4709   // Load and broadcast unsigned word to vector.
4710   void ld1rw(const ZRegister& zt,
4711              const PRegisterZ& pg,
4712              const SVEMemOperand& addr);
4713 
4714   // Load and broadcast doubleword to vector.
4715   void ld1rd(const ZRegister& zt,
4716              const PRegisterZ& pg,
4717              const SVEMemOperand& addr);
4718 
4719   // Contiguous load and replicate sixteen bytes.
4720   void ld1rqb(const ZRegister& zt,
4721               const PRegisterZ& pg,
4722               const SVEMemOperand& addr);
4723 
4724   // Contiguous load and replicate eight halfwords.
4725   void ld1rqh(const ZRegister& zt,
4726               const PRegisterZ& pg,
4727               const SVEMemOperand& addr);
4728 
4729   // Contiguous load and replicate four words.
4730   void ld1rqw(const ZRegister& zt,
4731               const PRegisterZ& pg,
4732               const SVEMemOperand& addr);
4733 
4734   // Contiguous load and replicate two doublewords.
4735   void ld1rqd(const ZRegister& zt,
4736               const PRegisterZ& pg,
4737               const SVEMemOperand& addr);
4738 
4739   // Contiguous load and replicate thirty-two bytes.
4740   void ld1rob(const ZRegister& zt,
4741               const PRegisterZ& pg,
4742               const SVEMemOperand& addr);
4743 
4744   // Contiguous load and replicate sixteen halfwords.
4745   void ld1roh(const ZRegister& zt,
4746               const PRegisterZ& pg,
4747               const SVEMemOperand& addr);
4748 
4749   // Contiguous load and replicate eight words.
4750   void ld1row(const ZRegister& zt,
4751               const PRegisterZ& pg,
4752               const SVEMemOperand& addr);
4753 
4754   // Contiguous load and replicate four doublewords.
4755   void ld1rod(const ZRegister& zt,
4756               const PRegisterZ& pg,
4757               const SVEMemOperand& addr);
4758 
4759   // Load and broadcast signed byte to vector.
4760   void ld1rsb(const ZRegister& zt,
4761               const PRegisterZ& pg,
4762               const SVEMemOperand& addr);
4763 
4764   // Load and broadcast signed halfword to vector.
4765   void ld1rsh(const ZRegister& zt,
4766               const PRegisterZ& pg,
4767               const SVEMemOperand& addr);
4768 
4769   // Load and broadcast signed word to vector.
4770   void ld1rsw(const ZRegister& zt,
4771               const PRegisterZ& pg,
4772               const SVEMemOperand& addr);
4773 
4774   // Contiguous/gather load signed bytes to vector.
4775   void ld1sb(const ZRegister& zt,
4776              const PRegisterZ& pg,
4777              const SVEMemOperand& addr);
4778 
4779   // Contiguous/gather load signed halfwords to vector.
4780   void ld1sh(const ZRegister& zt,
4781              const PRegisterZ& pg,
4782              const SVEMemOperand& addr);
4783 
4784   // Contiguous/gather load signed words to vector.
4785   void ld1sw(const ZRegister& zt,
4786              const PRegisterZ& pg,
4787              const SVEMemOperand& addr);
4788 
4789   // TODO: Merge other loads into the SVEMemOperand versions.
4790 
4791   // Contiguous load two-byte structures to two vectors.
4792   void ld2b(const ZRegister& zt1,
4793             const ZRegister& zt2,
4794             const PRegisterZ& pg,
4795             const SVEMemOperand& addr);
4796 
4797   // Contiguous load two-halfword structures to two vectors.
4798   void ld2h(const ZRegister& zt1,
4799             const ZRegister& zt2,
4800             const PRegisterZ& pg,
4801             const SVEMemOperand& addr);
4802 
4803   // Contiguous load two-word structures to two vectors.
4804   void ld2w(const ZRegister& zt1,
4805             const ZRegister& zt2,
4806             const PRegisterZ& pg,
4807             const SVEMemOperand& addr);
4808 
4809   // Contiguous load two-doubleword structures to two vectors.
4810   void ld2d(const ZRegister& zt1,
4811             const ZRegister& zt2,
4812             const PRegisterZ& pg,
4813             const SVEMemOperand& addr);
4814 
4815   // Contiguous load three-byte structures to three vectors.
4816   void ld3b(const ZRegister& zt1,
4817             const ZRegister& zt2,
4818             const ZRegister& zt3,
4819             const PRegisterZ& pg,
4820             const SVEMemOperand& addr);
4821 
4822   // Contiguous load three-halfword structures to three vectors.
4823   void ld3h(const ZRegister& zt1,
4824             const ZRegister& zt2,
4825             const ZRegister& zt3,
4826             const PRegisterZ& pg,
4827             const SVEMemOperand& addr);
4828 
4829   // Contiguous load three-word structures to three vectors.
4830   void ld3w(const ZRegister& zt1,
4831             const ZRegister& zt2,
4832             const ZRegister& zt3,
4833             const PRegisterZ& pg,
4834             const SVEMemOperand& addr);
4835 
4836   // Contiguous load three-doubleword structures to three vectors.
4837   void ld3d(const ZRegister& zt1,
4838             const ZRegister& zt2,
4839             const ZRegister& zt3,
4840             const PRegisterZ& pg,
4841             const SVEMemOperand& addr);
4842 
4843   // Contiguous load four-byte structures to four vectors.
4844   void ld4b(const ZRegister& zt1,
4845             const ZRegister& zt2,
4846             const ZRegister& zt3,
4847             const ZRegister& zt4,
4848             const PRegisterZ& pg,
4849             const SVEMemOperand& addr);
4850 
4851   // Contiguous load four-halfword structures to four vectors.
4852   void ld4h(const ZRegister& zt1,
4853             const ZRegister& zt2,
4854             const ZRegister& zt3,
4855             const ZRegister& zt4,
4856             const PRegisterZ& pg,
4857             const SVEMemOperand& addr);
4858 
4859   // Contiguous load four-word structures to four vectors.
4860   void ld4w(const ZRegister& zt1,
4861             const ZRegister& zt2,
4862             const ZRegister& zt3,
4863             const ZRegister& zt4,
4864             const PRegisterZ& pg,
4865             const SVEMemOperand& addr);
4866 
4867   // Contiguous load four-doubleword structures to four vectors.
4868   void ld4d(const ZRegister& zt1,
4869             const ZRegister& zt2,
4870             const ZRegister& zt3,
4871             const ZRegister& zt4,
4872             const PRegisterZ& pg,
4873             const SVEMemOperand& addr);
4874 
4875   // Contiguous load first-fault unsigned bytes to vector.
4876   void ldff1b(const ZRegister& zt,
4877               const PRegisterZ& pg,
4878               const SVEMemOperand& addr);
4879 
4880   // Contiguous load first-fault unsigned halfwords to vector.
4881   void ldff1h(const ZRegister& zt,
4882               const PRegisterZ& pg,
4883               const SVEMemOperand& addr);
4884 
4885   // Contiguous load first-fault unsigned words to vector.
4886   void ldff1w(const ZRegister& zt,
4887               const PRegisterZ& pg,
4888               const SVEMemOperand& addr);
4889 
4890   // Contiguous load first-fault doublewords to vector.
4891   void ldff1d(const ZRegister& zt,
4892               const PRegisterZ& pg,
4893               const SVEMemOperand& addr);
4894 
4895   // Contiguous load first-fault signed bytes to vector.
4896   void ldff1sb(const ZRegister& zt,
4897                const PRegisterZ& pg,
4898                const SVEMemOperand& addr);
4899 
4900   // Contiguous load first-fault signed halfwords to vector.
4901   void ldff1sh(const ZRegister& zt,
4902                const PRegisterZ& pg,
4903                const SVEMemOperand& addr);
4904 
4905   // Contiguous load first-fault signed words to vector.
4906   void ldff1sw(const ZRegister& zt,
4907                const PRegisterZ& pg,
4908                const SVEMemOperand& addr);
4909 
4910   // Gather load first-fault unsigned bytes to vector.
4911   void ldff1b(const ZRegister& zt,
4912               const PRegisterZ& pg,
4913               const Register& xn,
4914               const ZRegister& zm);
4915 
4916   // Gather load first-fault unsigned bytes to vector (immediate index).
4917   void ldff1b(const ZRegister& zt,
4918               const PRegisterZ& pg,
4919               const ZRegister& zn,
4920               int imm5);
4921 
4922   // Gather load first-fault doublewords to vector (vector index).
4923   void ldff1d(const ZRegister& zt,
4924               const PRegisterZ& pg,
4925               const Register& xn,
4926               const ZRegister& zm);
4927 
4928   // Gather load first-fault doublewords to vector (immediate index).
4929   void ldff1d(const ZRegister& zt,
4930               const PRegisterZ& pg,
4931               const ZRegister& zn,
4932               int imm5);
4933 
4934   // Gather load first-fault unsigned halfwords to vector (vector index).
4935   void ldff1h(const ZRegister& zt,
4936               const PRegisterZ& pg,
4937               const Register& xn,
4938               const ZRegister& zm);
4939 
4940   // Gather load first-fault unsigned halfwords to vector (immediate index).
4941   void ldff1h(const ZRegister& zt,
4942               const PRegisterZ& pg,
4943               const ZRegister& zn,
4944               int imm5);
4945 
4946   // Gather load first-fault signed bytes to vector (vector index).
4947   void ldff1sb(const ZRegister& zt,
4948                const PRegisterZ& pg,
4949                const Register& xn,
4950                const ZRegister& zm);
4951 
4952   // Gather load first-fault signed bytes to vector (immediate index).
4953   void ldff1sb(const ZRegister& zt,
4954                const PRegisterZ& pg,
4955                const ZRegister& zn,
4956                int imm5);
4957 
4958   // Gather load first-fault signed halfwords to vector (vector index).
4959   void ldff1sh(const ZRegister& zt,
4960                const PRegisterZ& pg,
4961                const Register& xn,
4962                const ZRegister& zm);
4963 
4964   // Gather load first-fault signed halfwords to vector (immediate index).
4965   void ldff1sh(const ZRegister& zt,
4966                const PRegisterZ& pg,
4967                const ZRegister& zn,
4968                int imm5);
4969 
4970   // Gather load first-fault signed words to vector (vector index).
4971   void ldff1sw(const ZRegister& zt,
4972                const PRegisterZ& pg,
4973                const Register& xn,
4974                const ZRegister& zm);
4975 
4976   // Gather load first-fault signed words to vector (immediate index).
4977   void ldff1sw(const ZRegister& zt,
4978                const PRegisterZ& pg,
4979                const ZRegister& zn,
4980                int imm5);
4981 
4982   // Gather load first-fault unsigned words to vector (vector index).
4983   void ldff1w(const ZRegister& zt,
4984               const PRegisterZ& pg,
4985               const Register& xn,
4986               const ZRegister& zm);
4987 
4988   // Gather load first-fault unsigned words to vector (immediate index).
4989   void ldff1w(const ZRegister& zt,
4990               const PRegisterZ& pg,
4991               const ZRegister& zn,
4992               int imm5);
4993 
4994   // Contiguous load non-fault unsigned bytes to vector (immediate index).
4995   void ldnf1b(const ZRegister& zt,
4996               const PRegisterZ& pg,
4997               const SVEMemOperand& addr);
4998 
4999   // Contiguous load non-fault doublewords to vector (immediate index).
5000   void ldnf1d(const ZRegister& zt,
5001               const PRegisterZ& pg,
5002               const SVEMemOperand& addr);
5003 
5004   // Contiguous load non-fault unsigned halfwords to vector (immediate
5005   // index).
5006   void ldnf1h(const ZRegister& zt,
5007               const PRegisterZ& pg,
5008               const SVEMemOperand& addr);
5009 
5010   // Contiguous load non-fault signed bytes to vector (immediate index).
5011   void ldnf1sb(const ZRegister& zt,
5012                const PRegisterZ& pg,
5013                const SVEMemOperand& addr);
5014 
5015   // Contiguous load non-fault signed halfwords to vector (immediate index).
5016   void ldnf1sh(const ZRegister& zt,
5017                const PRegisterZ& pg,
5018                const SVEMemOperand& addr);
5019 
5020   // Contiguous load non-fault signed words to vector (immediate index).
5021   void ldnf1sw(const ZRegister& zt,
5022                const PRegisterZ& pg,
5023                const SVEMemOperand& addr);
5024 
5025   // Contiguous load non-fault unsigned words to vector (immediate index).
5026   void ldnf1w(const ZRegister& zt,
5027               const PRegisterZ& pg,
5028               const SVEMemOperand& addr);
5029 
5030   // Contiguous load non-temporal bytes to vector.
5031   void ldnt1b(const ZRegister& zt,
5032               const PRegisterZ& pg,
5033               const SVEMemOperand& addr);
5034 
5035   // Contiguous load non-temporal halfwords to vector.
5036   void ldnt1h(const ZRegister& zt,
5037               const PRegisterZ& pg,
5038               const SVEMemOperand& addr);
5039 
5040   // Contiguous load non-temporal words to vector.
5041   void ldnt1w(const ZRegister& zt,
5042               const PRegisterZ& pg,
5043               const SVEMemOperand& addr);
5044 
5045   // Contiguous load non-temporal doublewords to vector.
5046   void ldnt1d(const ZRegister& zt,
5047               const PRegisterZ& pg,
5048               const SVEMemOperand& addr);
5049 
5050   // Load SVE predicate/vector register.
5051   void ldr(const CPURegister& rt, const SVEMemOperand& addr);
5052 
5053   // Logical shift left by immediate (predicated).
5054   void lsl(const ZRegister& zd,
5055            const PRegisterM& pg,
5056            const ZRegister& zn,
5057            int shift);
5058 
5059   // Logical shift left by 64-bit wide elements (predicated).
5060   void lsl(const ZRegister& zd,
5061            const PRegisterM& pg,
5062            const ZRegister& zn,
5063            const ZRegister& zm);
5064 
5065   // Logical shift left by immediate (unpredicated).
5066   void lsl(const ZRegister& zd, const ZRegister& zn, int shift);
5067 
5068   // Logical shift left by 64-bit wide elements (unpredicated).
5069   void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5070 
5071   // Reversed logical shift left by vector (predicated).
5072   void lslr(const ZRegister& zd,
5073             const PRegisterM& pg,
5074             const ZRegister& zn,
5075             const ZRegister& zm);
5076 
5077   // Logical shift right by immediate (predicated).
5078   void lsr(const ZRegister& zd,
5079            const PRegisterM& pg,
5080            const ZRegister& zn,
5081            int shift);
5082 
5083   // Logical shift right by 64-bit wide elements (predicated).
5084   void lsr(const ZRegister& zd,
5085            const PRegisterM& pg,
5086            const ZRegister& zn,
5087            const ZRegister& zm);
5088 
5089   // Logical shift right by immediate (unpredicated).
5090   void lsr(const ZRegister& zd, const ZRegister& zn, int shift);
5091 
5092   // Logical shift right by 64-bit wide elements (unpredicated).
5093   void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5094 
5095   // Reversed logical shift right by vector (predicated).
5096   void lsrr(const ZRegister& zd,
5097             const PRegisterM& pg,
5098             const ZRegister& zn,
5099             const ZRegister& zm);
5100 
5101   // Bitwise invert predicate.
5102   void not_(const PRegisterWithLaneSize& pd,
5103             const PRegisterZ& pg,
5104             const PRegisterWithLaneSize& pn);
5105 
5106   // Bitwise invert predicate, setting the condition flags.
5107   void nots(const PRegisterWithLaneSize& pd,
5108             const PRegisterZ& pg,
5109             const PRegisterWithLaneSize& pn);
5110 
5111   // Multiply-add vectors (predicated), writing multiplicand
5112   // [Zdn = Za + Zdn * Zm].
5113   void mad(const ZRegister& zdn,
5114            const PRegisterM& pg,
5115            const ZRegister& zm,
5116            const ZRegister& za);
5117 
5118   // Multiply-add vectors (predicated), writing addend
5119   // [Zda = Zda + Zn * Zm].
5120   void mla(const ZRegister& zda,
5121            const PRegisterM& pg,
5122            const ZRegister& zn,
5123            const ZRegister& zm);
5124 
5125   // Multiply-subtract vectors (predicated), writing addend
5126   // [Zda = Zda - Zn * Zm].
5127   void mls(const ZRegister& zda,
5128            const PRegisterM& pg,
5129            const ZRegister& zn,
5130            const ZRegister& zm);
5131 
5132   // Move predicates (unpredicated)
5133   void mov(const PRegister& pd, const PRegister& pn);
5134 
5135   // Move predicates (merging)
5136   void mov(const PRegisterWithLaneSize& pd,
5137            const PRegisterM& pg,
5138            const PRegisterWithLaneSize& pn);
5139 
5140   // Move predicates (zeroing)
5141   void mov(const PRegisterWithLaneSize& pd,
5142            const PRegisterZ& pg,
5143            const PRegisterWithLaneSize& pn);
5144 
5145   // Move general-purpose register to vector elements (unpredicated)
5146   void mov(const ZRegister& zd, const Register& xn);
5147 
5148   // Move SIMD&FP scalar register to vector elements (unpredicated)
5149   void mov(const ZRegister& zd, const VRegister& vn);
5150 
5151   // Move vector register (unpredicated)
5152   void mov(const ZRegister& zd, const ZRegister& zn);
5153 
5154   // Move indexed element to vector elements (unpredicated)
5155   void mov(const ZRegister& zd, const ZRegister& zn, unsigned index);
5156 
5157   // Move general-purpose register to vector elements (predicated)
5158   void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
5159 
5160   // Move SIMD&FP scalar register to vector elements (predicated)
5161   void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
5162 
5163   // Move vector elements (predicated)
5164   void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5165 
5166   // Move signed integer immediate to vector elements (predicated)
5167   void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
5168 
5169   // Move signed immediate to vector elements (unpredicated).
5170   void mov(const ZRegister& zd, int imm8, int shift);
5171 
5172   // Move logical bitmask immediate to vector (unpredicated).
5173   void mov(const ZRegister& zd, uint64_t imm);
5174 
5175   // Move predicate (unpredicated), setting the condition flags
5176   void movs(const PRegister& pd, const PRegister& pn);
5177 
5178   // Move predicates (zeroing), setting the condition flags
5179   void movs(const PRegisterWithLaneSize& pd,
5180             const PRegisterZ& pg,
5181             const PRegisterWithLaneSize& pn);
5182 
5183   // Move prefix (predicated).
5184   void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
5185 
5186   // Move prefix (unpredicated).
5187   void movprfx(const ZRegister& zd, const ZRegister& zn);
5188 
5189   // Multiply-subtract vectors (predicated), writing multiplicand
5190   // [Zdn = Za - Zdn * Zm].
5191   void msb(const ZRegister& zdn,
5192            const PRegisterM& pg,
5193            const ZRegister& zm,
5194            const ZRegister& za);
5195 
5196   // Multiply vectors (predicated).
5197   void mul(const ZRegister& zd,
5198            const PRegisterM& pg,
5199            const ZRegister& zn,
5200            const ZRegister& zm);
5201 
5202   // Multiply by immediate (unpredicated).
5203   void mul(const ZRegister& zd, const ZRegister& zn, int imm8);
5204 
5205   // Bitwise NAND predicates.
5206   void nand(const PRegisterWithLaneSize& pd,
5207             const PRegisterZ& pg,
5208             const PRegisterWithLaneSize& pn,
5209             const PRegisterWithLaneSize& pm);
5210 
5211   // Bitwise NAND predicates.
5212   void nands(const PRegisterWithLaneSize& pd,
5213              const PRegisterZ& pg,
5214              const PRegisterWithLaneSize& pn,
5215              const PRegisterWithLaneSize& pm);
5216 
5217   // Negate (predicated).
5218   void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5219 
5220   // Bitwise NOR predicates.
5221   void nor(const PRegisterWithLaneSize& pd,
5222            const PRegisterZ& pg,
5223            const PRegisterWithLaneSize& pn,
5224            const PRegisterWithLaneSize& pm);
5225 
5226   // Bitwise NOR predicates.
5227   void nors(const PRegisterWithLaneSize& pd,
5228             const PRegisterZ& pg,
5229             const PRegisterWithLaneSize& pn,
5230             const PRegisterWithLaneSize& pm);
5231 
5232   // Bitwise invert vector (predicated).
5233   void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5234 
5235   // Bitwise OR inverted predicate.
5236   void orn(const PRegisterWithLaneSize& pd,
5237            const PRegisterZ& pg,
5238            const PRegisterWithLaneSize& pn,
5239            const PRegisterWithLaneSize& pm);
5240 
5241   // Bitwise OR inverted predicate.
5242   void orns(const PRegisterWithLaneSize& pd,
5243             const PRegisterZ& pg,
5244             const PRegisterWithLaneSize& pn,
5245             const PRegisterWithLaneSize& pm);
5246 
5247   // Bitwise OR with inverted immediate (unpredicated).
5248   void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5249 
5250   // Bitwise OR predicate.
5251   void orr(const PRegisterWithLaneSize& pd,
5252            const PRegisterZ& pg,
5253            const PRegisterWithLaneSize& pn,
5254            const PRegisterWithLaneSize& pm);
5255 
5256   // Bitwise OR vectors (predicated).
5257   void orr(const ZRegister& zd,
5258            const PRegisterM& pg,
5259            const ZRegister& zn,
5260            const ZRegister& zm);
5261 
5262   // Bitwise OR with immediate (unpredicated).
5263   void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5264 
5265   // Bitwise OR vectors (unpredicated).
5266   void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5267 
5268   // Bitwise OR predicate.
5269   void orrs(const PRegisterWithLaneSize& pd,
5270             const PRegisterZ& pg,
5271             const PRegisterWithLaneSize& pn,
5272             const PRegisterWithLaneSize& pm);
5273 
5274   // Bitwise OR reduction to scalar.
5275   void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5276 
5277   // Set all predicate elements to false.
5278   void pfalse(const PRegisterWithLaneSize& pd);
5279 
5280   // Set the first active predicate element to true.
5281   void pfirst(const PRegisterWithLaneSize& pd,
5282               const PRegister& pg,
5283               const PRegisterWithLaneSize& pn);
5284 
5285   // Find next active predicate.
5286   void pnext(const PRegisterWithLaneSize& pd,
5287              const PRegister& pg,
5288              const PRegisterWithLaneSize& pn);
5289 
5290   // Prefetch bytes.
5291   void prfb(PrefetchOperation prfop,
5292             const PRegister& pg,
5293             const SVEMemOperand& addr);
5294 
5295   // Prefetch halfwords.
5296   void prfh(PrefetchOperation prfop,
5297             const PRegister& pg,
5298             const SVEMemOperand& addr);
5299 
5300   // Prefetch words.
5301   void prfw(PrefetchOperation prfop,
5302             const PRegister& pg,
5303             const SVEMemOperand& addr);
5304 
5305   // Prefetch doublewords.
5306   void prfd(PrefetchOperation prfop,
5307             const PRegister& pg,
5308             const SVEMemOperand& addr);
5309 
5310   // Set condition flags for predicate.
5311   void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn);
5312 
5313   // Initialise predicate from named constraint.
5314   void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5315 
5316   // Initialise predicate from named constraint.
5317   void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5318 
5319   // Unpack and widen half of predicate.
5320   void punpkhi(const PRegisterWithLaneSize& pd,
5321                const PRegisterWithLaneSize& pn);
5322 
5323   // Unpack and widen half of predicate.
5324   void punpklo(const PRegisterWithLaneSize& pd,
5325                const PRegisterWithLaneSize& pn);
5326 
5327   // Reverse bits (predicated).
5328   void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5329 
5330   // Read the first-fault register.
5331   void rdffr(const PRegisterWithLaneSize& pd);
5332 
5333   // Return predicate of succesfully loaded elements.
5334   void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5335 
5336   // Return predicate of succesfully loaded elements.
5337   void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5338 
5339   // Read multiple of vector register size to scalar register.
5340   void rdvl(const Register& xd, int imm6);
5341 
5342   // Reverse all elements in a predicate.
5343   void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn);
5344 
5345   // Reverse all elements in a vector (unpredicated).
5346   void rev(const ZRegister& zd, const ZRegister& zn);
5347 
5348   // Reverse bytes / halfwords / words within elements (predicated).
5349   void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5350 
5351   // Reverse bytes / halfwords / words within elements (predicated).
5352   void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5353 
5354   // Reverse bytes / halfwords / words within elements (predicated).
5355   void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5356 
5357   // Signed absolute difference (predicated).
5358   void sabd(const ZRegister& zd,
5359             const PRegisterM& pg,
5360             const ZRegister& zn,
5361             const ZRegister& zm);
5362 
5363   // Signed add reduction to scalar.
5364   void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5365 
5366   // Signed integer convert to floating-point (predicated).
5367   void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5368 
5369   // Signed divide (predicated).
5370   void sdiv(const ZRegister& zd,
5371             const PRegisterM& pg,
5372             const ZRegister& zn,
5373             const ZRegister& zm);
5374 
5375   // Signed reversed divide (predicated).
5376   void sdivr(const ZRegister& zd,
5377              const PRegisterM& pg,
5378              const ZRegister& zn,
5379              const ZRegister& zm);
5380 
5381   // Signed dot product by indexed quadtuplet.
5382   void sdot(const ZRegister& zda,
5383             const ZRegister& zn,
5384             const ZRegister& zm,
5385             int index);
5386 
5387   // Signed dot product.
5388   void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5389 
5390   // Conditionally select elements from two predicates.
5391   void sel(const PRegisterWithLaneSize& pd,
5392            const PRegister& pg,
5393            const PRegisterWithLaneSize& pn,
5394            const PRegisterWithLaneSize& pm);
5395 
5396   // Conditionally select elements from two vectors.
5397   void sel(const ZRegister& zd,
5398            const PRegister& pg,
5399            const ZRegister& zn,
5400            const ZRegister& zm);
5401 
5402   // Initialise the first-fault register to all true.
5403   void setffr();
5404 
5405   // Signed maximum vectors (predicated).
5406   void smax(const ZRegister& zd,
5407             const PRegisterM& pg,
5408             const ZRegister& zn,
5409             const ZRegister& zm);
5410 
5411   // Signed maximum with immediate (unpredicated).
5412   void smax(const ZRegister& zd, const ZRegister& zn, int imm8);
5413 
5414   // Signed maximum reduction to scalar.
5415   void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5416 
5417   // Signed minimum vectors (predicated).
5418   void smin(const ZRegister& zd,
5419             const PRegisterM& pg,
5420             const ZRegister& zn,
5421             const ZRegister& zm);
5422 
5423   // Signed minimum with immediate (unpredicated).
5424   void smin(const ZRegister& zd, const ZRegister& zn, int imm8);
5425 
5426   // Signed minimum reduction to scalar.
5427   void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5428 
5429   // Signed multiply returning high half (predicated).
5430   void smulh(const ZRegister& zd,
5431              const PRegisterM& pg,
5432              const ZRegister& zn,
5433              const ZRegister& zm);
5434 
5435   // Splice two vectors under predicate control.
5436   void splice(const ZRegister& zd,
5437               const PRegister& pg,
5438               const ZRegister& zn,
5439               const ZRegister& zm);
5440 
5441   // Splice two vectors under predicate control (constructive).
5442   void splice_con(const ZRegister& zd,
5443                   const PRegister& pg,
5444                   const ZRegister& zn,
5445                   const ZRegister& zm);
5446 
5447   // Signed saturating add vectors (unpredicated).
5448   void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5449 
5450   // Signed saturating add immediate (unpredicated).
5451   void sqadd(const ZRegister& zd,
5452              const ZRegister& zn,
5453              int imm8,
5454              int shift = -1);
5455 
5456   // Signed saturating decrement scalar by multiple of 8-bit predicate
5457   // constraint element count.
5458   void sqdecb(const Register& xd,
5459               const Register& wn,
5460               int pattern,
5461               int multiplier);
5462 
5463   // Signed saturating decrement scalar by multiple of 8-bit predicate
5464   // constraint element count.
5465   void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5466 
5467   // Signed saturating decrement scalar by multiple of 64-bit predicate
5468   // constraint element count.
5469   void sqdecd(const Register& xd,
5470               const Register& wn,
5471               int pattern = SVE_ALL,
5472               int multiplier = 1);
5473 
5474   // Signed saturating decrement scalar by multiple of 64-bit predicate
5475   // constraint element count.
5476   void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5477 
5478   // Signed saturating decrement vector by multiple of 64-bit predicate
5479   // constraint element count.
5480   void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5481 
5482   // Signed saturating decrement scalar by multiple of 16-bit predicate
5483   // constraint element count.
5484   void sqdech(const Register& xd,
5485               const Register& wn,
5486               int pattern = SVE_ALL,
5487               int multiplier = 1);
5488 
5489   // Signed saturating decrement scalar by multiple of 16-bit predicate
5490   // constraint element count.
5491   void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5492 
5493   // Signed saturating decrement vector by multiple of 16-bit predicate
5494   // constraint element count.
5495   void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5496 
5497   // Signed saturating decrement scalar by active predicate element count.
5498   void sqdecp(const Register& xd,
5499               const PRegisterWithLaneSize& pg,
5500               const Register& wn);
5501 
5502   // Signed saturating decrement scalar by active predicate element count.
5503   void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg);
5504 
5505   // Signed saturating decrement vector by active predicate element count.
5506   void sqdecp(const ZRegister& zdn, const PRegister& pg);
5507 
5508   // Signed saturating decrement scalar by multiple of 32-bit predicate
5509   // constraint element count.
5510   void sqdecw(const Register& xd,
5511               const Register& wn,
5512               int pattern = SVE_ALL,
5513               int multiplier = 1);
5514 
5515   // Signed saturating decrement scalar by multiple of 32-bit predicate
5516   // constraint element count.
5517   void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5518 
5519   // Signed saturating decrement vector by multiple of 32-bit predicate
5520   // constraint element count.
5521   void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5522 
5523   // Signed saturating increment scalar by multiple of 8-bit predicate
5524   // constraint element count.
5525   void sqincb(const Register& xd,
5526               const Register& wn,
5527               int pattern = SVE_ALL,
5528               int multiplier = 1);
5529 
5530   // Signed saturating increment scalar by multiple of 8-bit predicate
5531   // constraint element count.
5532   void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5533 
5534   // Signed saturating increment scalar by multiple of 64-bit predicate
5535   // constraint element count.
5536   void sqincd(const Register& xd,
5537               const Register& wn,
5538               int pattern,
5539               int multiplier);
5540 
5541   // Signed saturating increment scalar by multiple of 64-bit predicate
5542   // constraint element count.
5543   void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5544 
5545   // Signed saturating increment vector by multiple of 64-bit predicate
5546   // constraint element count.
5547   void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5548 
5549   // Signed saturating increment scalar by multiple of 16-bit predicate
5550   // constraint element count.
5551   void sqinch(const Register& xd,
5552               const Register& wn,
5553               int pattern = SVE_ALL,
5554               int multiplier = 1);
5555 
5556   // Signed saturating increment scalar by multiple of 16-bit predicate
5557   // constraint element count.
5558   void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5559 
5560   // Signed saturating increment vector by multiple of 16-bit predicate
5561   // constraint element count.
5562   void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5563 
5564   // Signed saturating increment scalar by active predicate element count.
5565   void sqincp(const Register& xd,
5566               const PRegisterWithLaneSize& pg,
5567               const Register& wn);
5568 
5569   // Signed saturating increment scalar by active predicate element count.
5570   void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg);
5571 
5572   // Signed saturating increment vector by active predicate element count.
5573   void sqincp(const ZRegister& zdn, const PRegister& pg);
5574 
5575   // Signed saturating increment scalar by multiple of 32-bit predicate
5576   // constraint element count.
5577   void sqincw(const Register& xd,
5578               const Register& wn,
5579               int pattern = SVE_ALL,
5580               int multiplier = 1);
5581 
5582   // Signed saturating increment scalar by multiple of 32-bit predicate
5583   // constraint element count.
5584   void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5585 
5586   // Signed saturating increment vector by multiple of 32-bit predicate
5587   // constraint element count.
5588   void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5589 
5590   // Signed saturating subtract vectors (unpredicated).
5591   void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5592 
5593   // Signed saturating subtract immediate (unpredicated).
5594   void sqsub(const ZRegister& zd,
5595              const ZRegister& zn,
5596              int imm8,
5597              int shift = -1);
5598 
5599   // Contiguous/scatter store bytes from vector.
5600   void st1b(const ZRegister& zt,
5601             const PRegister& pg,
5602             const SVEMemOperand& addr);
5603 
5604   // Contiguous/scatter store halfwords from vector.
5605   void st1h(const ZRegister& zt,
5606             const PRegister& pg,
5607             const SVEMemOperand& addr);
5608 
5609   // Contiguous/scatter store words from vector.
5610   void st1w(const ZRegister& zt,
5611             const PRegister& pg,
5612             const SVEMemOperand& addr);
5613 
5614   // Contiguous/scatter store doublewords from vector.
5615   void st1d(const ZRegister& zt,
5616             const PRegister& pg,
5617             const SVEMemOperand& addr);
5618 
5619   // Contiguous store two-byte structures from two vectors.
5620   void st2b(const ZRegister& zt1,
5621             const ZRegister& zt2,
5622             const PRegister& pg,
5623             const SVEMemOperand& addr);
5624 
5625   // Contiguous store two-halfword structures from two vectors.
5626   void st2h(const ZRegister& zt1,
5627             const ZRegister& zt2,
5628             const PRegister& pg,
5629             const SVEMemOperand& addr);
5630 
5631   // Contiguous store two-word structures from two vectors.
5632   void st2w(const ZRegister& zt1,
5633             const ZRegister& zt2,
5634             const PRegister& pg,
5635             const SVEMemOperand& addr);
5636 
5637   // Contiguous store two-doubleword structures from two vectors,
5638   void st2d(const ZRegister& zt1,
5639             const ZRegister& zt2,
5640             const PRegister& pg,
5641             const SVEMemOperand& addr);
5642 
5643   // Contiguous store three-byte structures from three vectors.
5644   void st3b(const ZRegister& zt1,
5645             const ZRegister& zt2,
5646             const ZRegister& zt3,
5647             const PRegister& pg,
5648             const SVEMemOperand& addr);
5649 
5650   // Contiguous store three-halfword structures from three vectors.
5651   void st3h(const ZRegister& zt1,
5652             const ZRegister& zt2,
5653             const ZRegister& zt3,
5654             const PRegister& pg,
5655             const SVEMemOperand& addr);
5656 
5657   // Contiguous store three-word structures from three vectors.
5658   void st3w(const ZRegister& zt1,
5659             const ZRegister& zt2,
5660             const ZRegister& zt3,
5661             const PRegister& pg,
5662             const SVEMemOperand& addr);
5663 
5664   // Contiguous store three-doubleword structures from three vectors.
5665   void st3d(const ZRegister& zt1,
5666             const ZRegister& zt2,
5667             const ZRegister& zt3,
5668             const PRegister& pg,
5669             const SVEMemOperand& addr);
5670 
5671   // Contiguous store four-byte structures from four vectors.
5672   void st4b(const ZRegister& zt1,
5673             const ZRegister& zt2,
5674             const ZRegister& zt3,
5675             const ZRegister& zt4,
5676             const PRegister& pg,
5677             const SVEMemOperand& addr);
5678 
5679   // Contiguous store four-halfword structures from four vectors.
5680   void st4h(const ZRegister& zt1,
5681             const ZRegister& zt2,
5682             const ZRegister& zt3,
5683             const ZRegister& zt4,
5684             const PRegister& pg,
5685             const SVEMemOperand& addr);
5686 
5687   // Contiguous store four-word structures from four vectors.
5688   void st4w(const ZRegister& zt1,
5689             const ZRegister& zt2,
5690             const ZRegister& zt3,
5691             const ZRegister& zt4,
5692             const PRegister& pg,
5693             const SVEMemOperand& addr);
5694 
5695   // Contiguous store four-doubleword structures from four vectors.
5696   void st4d(const ZRegister& zt1,
5697             const ZRegister& zt2,
5698             const ZRegister& zt3,
5699             const ZRegister& zt4,
5700             const PRegister& pg,
5701             const SVEMemOperand& addr);
5702 
5703   // Contiguous store non-temporal bytes from vector.
5704   void stnt1b(const ZRegister& zt,
5705               const PRegister& pg,
5706               const SVEMemOperand& addr);
5707 
5708   // Contiguous store non-temporal halfwords from vector.
5709   void stnt1h(const ZRegister& zt,
5710               const PRegister& pg,
5711               const SVEMemOperand& addr);
5712 
5713   // Contiguous store non-temporal words from vector.
5714   void stnt1w(const ZRegister& zt,
5715               const PRegister& pg,
5716               const SVEMemOperand& addr);
5717 
5718   // Contiguous store non-temporal doublewords from vector.
5719   void stnt1d(const ZRegister& zt,
5720               const PRegister& pg,
5721               const SVEMemOperand& addr);
5722 
5723   // Store SVE predicate/vector register.
5724   void str(const CPURegister& rt, const SVEMemOperand& addr);
5725 
5726   // Subtract vectors (predicated).
5727   void sub(const ZRegister& zd,
5728            const PRegisterM& pg,
5729            const ZRegister& zn,
5730            const ZRegister& zm);
5731 
5732   // Subtract vectors (unpredicated).
5733   void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5734 
5735   // Subtract immediate (unpredicated).
5736   void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5737 
5738   // Reversed subtract vectors (predicated).
5739   void subr(const ZRegister& zd,
5740             const PRegisterM& pg,
5741             const ZRegister& zn,
5742             const ZRegister& zm);
5743 
5744   // Reversed subtract from immediate (unpredicated).
5745   void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5746 
5747   // Signed unpack and extend half of vector.
5748   void sunpkhi(const ZRegister& zd, const ZRegister& zn);
5749 
5750   // Signed unpack and extend half of vector.
5751   void sunpklo(const ZRegister& zd, const ZRegister& zn);
5752 
5753   // Signed byte extend (predicated).
5754   void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5755 
5756   // Signed halfword extend (predicated).
5757   void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5758 
5759   // Signed word extend (predicated).
5760   void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5761 
5762   // Programmable table lookup/permute using vector of indices into a
5763   // vector.
5764   void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5765 
5766   // Interleave even or odd elements from two predicates.
5767   void trn1(const PRegisterWithLaneSize& pd,
5768             const PRegisterWithLaneSize& pn,
5769             const PRegisterWithLaneSize& pm);
5770 
5771   // Interleave even or odd elements from two vectors.
5772   void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5773 
5774   // Interleave even or odd elements from two predicates.
5775   void trn2(const PRegisterWithLaneSize& pd,
5776             const PRegisterWithLaneSize& pn,
5777             const PRegisterWithLaneSize& pm);
5778 
5779   // Interleave even or odd elements from two vectors.
5780   void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5781 
5782   // Unsigned absolute difference (predicated).
5783   void uabd(const ZRegister& zd,
5784             const PRegisterM& pg,
5785             const ZRegister& zn,
5786             const ZRegister& zm);
5787 
5788   // Unsigned add reduction to scalar.
5789   void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5790 
5791   // Unsigned integer convert to floating-point (predicated).
5792   void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5793 
5794   // Unsigned divide (predicated).
5795   void udiv(const ZRegister& zd,
5796             const PRegisterM& pg,
5797             const ZRegister& zn,
5798             const ZRegister& zm);
5799 
5800   // Unsigned reversed divide (predicated).
5801   void udivr(const ZRegister& zd,
5802              const PRegisterM& pg,
5803              const ZRegister& zn,
5804              const ZRegister& zm);
5805 
5806   // Unsigned dot product by indexed quadtuplet.
5807   void udot(const ZRegister& zda,
5808             const ZRegister& zn,
5809             const ZRegister& zm,
5810             int index);
5811 
5812   // Unsigned dot product.
5813   void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5814 
5815   // Unsigned maximum vectors (predicated).
5816   void umax(const ZRegister& zd,
5817             const PRegisterM& pg,
5818             const ZRegister& zn,
5819             const ZRegister& zm);
5820 
5821   // Unsigned maximum with immediate (unpredicated).
5822   void umax(const ZRegister& zd, const ZRegister& zn, int imm8);
5823 
5824   // Unsigned maximum reduction to scalar.
5825   void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5826 
5827   // Unsigned minimum vectors (predicated).
5828   void umin(const ZRegister& zd,
5829             const PRegisterM& pg,
5830             const ZRegister& zn,
5831             const ZRegister& zm);
5832 
5833   // Unsigned minimum with immediate (unpredicated).
5834   void umin(const ZRegister& zd, const ZRegister& zn, int imm8);
5835 
5836   // Unsigned minimum reduction to scalar.
5837   void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5838 
5839   // Unsigned multiply returning high half (predicated).
5840   void umulh(const ZRegister& zd,
5841              const PRegisterM& pg,
5842              const ZRegister& zn,
5843              const ZRegister& zm);
5844 
5845   // Unsigned saturating add vectors (unpredicated).
5846   void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5847 
5848   // Unsigned saturating add immediate (unpredicated).
5849   void uqadd(const ZRegister& zd,
5850              const ZRegister& zn,
5851              int imm8,
5852              int shift = -1);
5853 
5854   // Unsigned saturating decrement scalar by multiple of 8-bit predicate
5855   // constraint element count.
5856   void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5857 
5858   // Unsigned saturating decrement scalar by multiple of 64-bit predicate
5859   // constraint element count.
5860   void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5861 
5862   // Unsigned saturating decrement vector by multiple of 64-bit predicate
5863   // constraint element count.
5864   void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5865 
5866   // Unsigned saturating decrement scalar by multiple of 16-bit predicate
5867   // constraint element count.
5868   void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5869 
5870   // Unsigned saturating decrement vector by multiple of 16-bit predicate
5871   // constraint element count.
5872   void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5873 
5874   // Unsigned saturating decrement scalar by active predicate element count.
5875   void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg);
5876 
5877   // Unsigned saturating decrement vector by active predicate element count.
5878   void uqdecp(const ZRegister& zdn, const PRegister& pg);
5879 
5880   // Unsigned saturating decrement scalar by multiple of 32-bit predicate
5881   // constraint element count.
5882   void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5883 
5884   // Unsigned saturating decrement vector by multiple of 32-bit predicate
5885   // constraint element count.
5886   void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5887 
5888   // Unsigned saturating increment scalar by multiple of 8-bit predicate
5889   // constraint element count.
5890   void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5891 
5892   // Unsigned saturating increment scalar by multiple of 64-bit predicate
5893   // constraint element count.
5894   void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5895 
5896   // Unsigned saturating increment vector by multiple of 64-bit predicate
5897   // constraint element count.
5898   void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5899 
5900   // Unsigned saturating increment scalar by multiple of 16-bit predicate
5901   // constraint element count.
5902   void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5903 
5904   // Unsigned saturating increment vector by multiple of 16-bit predicate
5905   // constraint element count.
5906   void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5907 
5908   // Unsigned saturating increment scalar by active predicate element count.
5909   void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg);
5910 
5911   // Unsigned saturating increment vector by active predicate element count.
5912   void uqincp(const ZRegister& zdn, const PRegister& pg);
5913 
5914   // Unsigned saturating increment scalar by multiple of 32-bit predicate
5915   // constraint element count.
5916   void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5917 
5918   // Unsigned saturating increment vector by multiple of 32-bit predicate
5919   // constraint element count.
5920   void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5921 
5922   // Unsigned saturating subtract vectors (unpredicated).
5923   void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5924 
5925   // Unsigned saturating subtract immediate (unpredicated).
5926   void uqsub(const ZRegister& zd,
5927              const ZRegister& zn,
5928              int imm8,
5929              int shift = -1);
5930 
5931   // Unsigned unpack and extend half of vector.
5932   void uunpkhi(const ZRegister& zd, const ZRegister& zn);
5933 
5934   // Unsigned unpack and extend half of vector.
5935   void uunpklo(const ZRegister& zd, const ZRegister& zn);
5936 
5937   // Unsigned byte extend (predicated).
5938   void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5939 
5940   // Unsigned halfword extend (predicated).
5941   void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5942 
5943   // Unsigned word extend (predicated).
5944   void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5945 
5946   // Concatenate even or odd elements from two predicates.
5947   void uzp1(const PRegisterWithLaneSize& pd,
5948             const PRegisterWithLaneSize& pn,
5949             const PRegisterWithLaneSize& pm);
5950 
5951   // Concatenate even or odd elements from two vectors.
5952   void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5953 
5954   // Concatenate even or odd elements from two predicates.
5955   void uzp2(const PRegisterWithLaneSize& pd,
5956             const PRegisterWithLaneSize& pn,
5957             const PRegisterWithLaneSize& pm);
5958 
5959   // Concatenate even or odd elements from two vectors.
5960   void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5961 
5962   // While incrementing signed scalar less than or equal to scalar.
5963   void whilele(const PRegisterWithLaneSize& pd,
5964                const Register& rn,
5965                const Register& rm);
5966 
5967   // While incrementing unsigned scalar lower than scalar.
5968   void whilelo(const PRegisterWithLaneSize& pd,
5969                const Register& rn,
5970                const Register& rm);
5971 
5972   // While incrementing unsigned scalar lower or same as scalar.
5973   void whilels(const PRegisterWithLaneSize& pd,
5974                const Register& rn,
5975                const Register& rm);
5976 
5977   // While incrementing signed scalar less than scalar.
5978   void whilelt(const PRegisterWithLaneSize& pd,
5979                const Register& rn,
5980                const Register& rm);
5981 
5982   // Write the first-fault register.
5983   void wrffr(const PRegisterWithLaneSize& pn);
5984 
5985   // Interleave elements from two half predicates.
5986   void zip1(const PRegisterWithLaneSize& pd,
5987             const PRegisterWithLaneSize& pn,
5988             const PRegisterWithLaneSize& pm);
5989 
5990   // Interleave elements from two half vectors.
5991   void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5992 
5993   // Interleave elements from two half predicates.
5994   void zip2(const PRegisterWithLaneSize& pd,
5995             const PRegisterWithLaneSize& pn,
5996             const PRegisterWithLaneSize& pm);
5997 
5998   // Interleave elements from two half vectors.
5999   void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6000 
6001   // Add with carry long (bottom).
6002   void adclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6003 
6004   // Add with carry long (top).
6005   void adclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6006 
6007   // Add narrow high part (bottom).
6008   void addhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6009 
6010   // Add narrow high part (top).
6011   void addhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6012 
6013   // Add pairwise.
6014   void addp(const ZRegister& zd,
6015             const PRegisterM& pg,
6016             const ZRegister& zn,
6017             const ZRegister& zm);
6018 
6019   // Bitwise clear and exclusive OR.
6020   void bcax(const ZRegister& zd,
6021             const ZRegister& zn,
6022             const ZRegister& zm,
6023             const ZRegister& zk);
6024 
6025   // Scatter lower bits into positions selected by bitmask.
6026   void bdep(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6027 
6028   // Gather lower bits from positions selected by bitmask.
6029   void bext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6030 
6031   // Group bits to right or left as selected by bitmask.
6032   void bgrp(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6033 
6034   // Bitwise select.
6035   void bsl(const ZRegister& zd,
6036            const ZRegister& zn,
6037            const ZRegister& zm,
6038            const ZRegister& zk);
6039 
6040   // Bitwise select with first input inverted.
6041   void bsl1n(const ZRegister& zd,
6042              const ZRegister& zn,
6043              const ZRegister& zm,
6044              const ZRegister& zk);
6045 
6046   // Bitwise select with second input inverted.
6047   void bsl2n(const ZRegister& zd,
6048              const ZRegister& zn,
6049              const ZRegister& zm,
6050              const ZRegister& zk);
6051 
6052   // Complex integer add with rotate.
6053   void cadd(const ZRegister& zd,
6054             const ZRegister& zn,
6055             const ZRegister& zm,
6056             int rot);
6057 
6058   // Complex integer dot product (indexed).
6059   void cdot(const ZRegister& zda,
6060             const ZRegister& zn,
6061             const ZRegister& zm,
6062             int index,
6063             int rot);
6064 
6065   // Complex integer dot product.
6066   void cdot(const ZRegister& zda,
6067             const ZRegister& zn,
6068             const ZRegister& zm,
6069             int rot);
6070 
6071   // Complex integer multiply-add with rotate (indexed).
6072   void cmla(const ZRegister& zda,
6073             const ZRegister& zn,
6074             const ZRegister& zm,
6075             int index,
6076             int rot);
6077 
6078   // Complex integer multiply-add with rotate.
6079   void cmla(const ZRegister& zda,
6080             const ZRegister& zn,
6081             const ZRegister& zm,
6082             int rot);
6083 
6084   // Bitwise exclusive OR of three vectors.
6085   void eor3(const ZRegister& zd,
6086             const ZRegister& zn,
6087             const ZRegister& zm,
6088             const ZRegister& zk);
6089 
6090   // Interleaving exclusive OR (bottom, top).
6091   void eorbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6092 
6093   // Interleaving exclusive OR (top, bottom).
6094   void eortb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6095 
6096   // Floating-point add pairwise.
6097   void faddp(const ZRegister& zd,
6098              const PRegisterM& pg,
6099              const ZRegister& zn,
6100              const ZRegister& zm);
6101 
6102   // Floating-point up convert long (top, predicated).
6103   void fcvtlt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6104 
6105   // Floating-point down convert and narrow (top, predicated).
6106   void fcvtnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6107 
6108   // Floating-point down convert, rounding to odd (predicated).
6109   void fcvtx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6110 
6111   // Floating-point down convert, rounding to odd (top, predicated).
6112   void fcvtxnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6113 
6114   // Floating-point base 2 logarithm as integer.
6115   void flogb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6116 
6117   // Floating-point maximum number pairwise.
6118   void fmaxnmp(const ZRegister& zd,
6119                const PRegisterM& pg,
6120                const ZRegister& zn,
6121                const ZRegister& zm);
6122 
6123   // Floating-point maximum pairwise.
6124   void fmaxp(const ZRegister& zd,
6125              const PRegisterM& pg,
6126              const ZRegister& zn,
6127              const ZRegister& zm);
6128 
6129   // Floating-point minimum number pairwise.
6130   void fminnmp(const ZRegister& zd,
6131                const PRegisterM& pg,
6132                const ZRegister& zn,
6133                const ZRegister& zm);
6134 
6135   // Floating-point minimum pairwise.
6136   void fminp(const ZRegister& zd,
6137              const PRegisterM& pg,
6138              const ZRegister& zn,
6139              const ZRegister& zm);
6140 
6141   // Half-precision floating-point multiply-add long to single-precision
6142   // (bottom).
6143   void fmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6144 
6145   // Half-precision floating-point multiply-add long to single-precision
6146   // (top).
6147   void fmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6148 
6149   // Half-precision floating-point multiply-subtract long from
6150   // single-precision (bottom).
6151   void fmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6152 
6153   // Half-precision floating-point multiply-subtract long from
6154   // single-precision (top, indexed).
6155   void fmlslt(const ZRegister& zda,
6156               const ZRegister& zn,
6157               const ZRegister& zm,
6158               int index);
6159 
6160   // Half-precision floating-point multiply-add long to single-precision
6161   // (bottom, indexed).
6162   void fmlalb(const ZRegister& zda,
6163               const ZRegister& zn,
6164               const ZRegister& zm,
6165               int index);
6166 
6167   // Half-precision floating-point multiply-add long to single-precision
6168   // (top, indexed).
6169   void fmlalt(const ZRegister& zda,
6170               const ZRegister& zn,
6171               const ZRegister& zm,
6172               int index);
6173 
6174   // Half-precision floating-point multiply-subtract long from
6175   // single-precision (bottom, indexed).
6176   void fmlslb(const ZRegister& zda,
6177               const ZRegister& zn,
6178               const ZRegister& zm,
6179               int index);
6180 
6181   // Half-precision floating-point multiply-subtract long from
6182   // single-precision (top).
6183   void fmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6184 
6185   // Count matching elements in vector.
6186   void histcnt(const ZRegister& zd,
6187                const PRegisterZ& pg,
6188                const ZRegister& zn,
6189                const ZRegister& zm);
6190 
6191   // Count matching elements in vector segments.
6192   void histseg(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6193 
6194   // Gather load non-temporal signed bytes.
6195   void ldnt1sb(const ZRegister& zt,
6196                const PRegisterZ& pg,
6197                const SVEMemOperand& addr);
6198 
6199   // Gather load non-temporal signed halfwords.
6200   void ldnt1sh(const ZRegister& zt,
6201                const PRegisterZ& pg,
6202                const SVEMemOperand& addr);
6203 
6204   // Gather load non-temporal signed words.
6205   void ldnt1sw(const ZRegister& zt,
6206                const PRegisterZ& pg,
6207                const SVEMemOperand& addr);
6208 
6209   // Detect any matching elements, setting the condition flags.
6210   void match(const PRegisterWithLaneSize& pd,
6211              const PRegisterZ& pg,
6212              const ZRegister& zn,
6213              const ZRegister& zm);
6214 
6215   // Multiply-add to accumulator (indexed).
6216   void mla(const ZRegister& zda,
6217            const ZRegister& zn,
6218            const ZRegister& zm,
6219            int index);
6220 
6221   // Multiply-subtract from accumulator (indexed).
6222   void mls(const ZRegister& zda,
6223            const ZRegister& zn,
6224            const ZRegister& zm,
6225            int index);
6226 
6227   // Multiply (indexed).
6228   void mul(const ZRegister& zd,
6229            const ZRegister& zn,
6230            const ZRegister& zm,
6231            int index);
6232 
6233   // Multiply vectors (unpredicated).
6234   void mul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6235 
6236   // Bitwise inverted select.
6237   void nbsl(const ZRegister& zd,
6238             const ZRegister& zn,
6239             const ZRegister& zm,
6240             const ZRegister& zk);
6241 
6242   // Detect no matching elements, setting the condition flags.
6243   void nmatch(const PRegisterWithLaneSize& pd,
6244               const PRegisterZ& pg,
6245               const ZRegister& zn,
6246               const ZRegister& zm);
6247 
6248   // Polynomial multiply vectors (unpredicated).
6249   void pmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6250 
6251   // Polynomial multiply long (bottom).
6252   void pmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6253 
6254   // Polynomial multiply long (top).
6255   void pmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6256 
6257   // Rounding add narrow high part (bottom).
6258   void raddhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6259 
6260   // Rounding add narrow high part (top).
6261   void raddhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6262 
6263   // Rounding shift right narrow by immediate (bottom).
6264   void rshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6265 
6266   // Rounding shift right narrow by immediate (top).
6267   void rshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6268 
6269   // Rounding subtract narrow high part (bottom).
6270   void rsubhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6271 
6272   // Rounding subtract narrow high part (top).
6273   void rsubhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6274 
6275   // Signed absolute difference and accumulate.
6276   void saba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6277 
6278   // Signed absolute difference and accumulate long (bottom).
6279   void sabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6280 
6281   // Signed absolute difference and accumulate long (top).
6282   void sabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6283 
6284   // Signed absolute difference long (bottom).
6285   void sabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6286 
6287   // Signed absolute difference long (top).
6288   void sabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6289 
6290   // Signed add and accumulate long pairwise.
6291   void sadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
6292 
6293   // Signed add long (bottom).
6294   void saddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6295 
6296   // Signed add long (bottom + top).
6297   void saddlbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6298 
6299   // Signed add long (top).
6300   void saddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6301 
6302   // Signed add wide (bottom).
6303   void saddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6304 
6305   // Signed add wide (top).
6306   void saddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6307 
6308   // Subtract with carry long (bottom).
6309   void sbclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6310 
6311   // Subtract with carry long (top).
6312   void sbclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6313 
6314   // Signed halving addition.
6315   void shadd(const ZRegister& zd,
6316              const PRegisterM& pg,
6317              const ZRegister& zn,
6318              const ZRegister& zm);
6319 
6320   // Shift right narrow by immediate (bottom).
6321   void shrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6322 
6323   // Shift right narrow by immediate (top).
6324   void shrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6325 
6326   // Signed halving subtract.
6327   void shsub(const ZRegister& zd,
6328              const PRegisterM& pg,
6329              const ZRegister& zn,
6330              const ZRegister& zm);
6331 
6332   // Signed halving subtract reversed vectors.
6333   void shsubr(const ZRegister& zd,
6334               const PRegisterM& pg,
6335               const ZRegister& zn,
6336               const ZRegister& zm);
6337 
6338   // Shift left and insert (immediate).
6339   void sli(const ZRegister& zd, const ZRegister& zn, int shift);
6340 
6341   // Signed maximum pairwise.
6342   void smaxp(const ZRegister& zd,
6343              const PRegisterM& pg,
6344              const ZRegister& zn,
6345              const ZRegister& zm);
6346 
6347   // Signed minimum pairwise.
6348   void sminp(const ZRegister& zd,
6349              const PRegisterM& pg,
6350              const ZRegister& zn,
6351              const ZRegister& zm);
6352 
6353   // Signed multiply-add long to accumulator (bottom, indexed).
6354   void smlalb(const ZRegister& zda,
6355               const ZRegister& zn,
6356               const ZRegister& zm,
6357               int index);
6358 
6359   // Signed multiply-add long to accumulator (bottom).
6360   void smlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6361 
6362   // Signed multiply-add long to accumulator (top, indexed).
6363   void smlalt(const ZRegister& zda,
6364               const ZRegister& zn,
6365               const ZRegister& zm,
6366               int index);
6367 
6368   // Signed multiply-add long to accumulator (top).
6369   void smlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6370 
6371   // Signed multiply-subtract long from accumulator (bottom, indexed).
6372   void smlslb(const ZRegister& zda,
6373               const ZRegister& zn,
6374               const ZRegister& zm,
6375               int index);
6376 
6377   // Signed multiply-subtract long from accumulator (bottom).
6378   void smlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6379 
6380   // Signed multiply-subtract long from accumulator (top, indexed).
6381   void smlslt(const ZRegister& zda,
6382               const ZRegister& zn,
6383               const ZRegister& zm,
6384               int index);
6385 
6386   // Signed multiply-subtract long from accumulator (top).
6387   void smlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6388 
6389   // Signed multiply returning high half (unpredicated).
6390   void smulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6391 
6392   // Signed multiply long (bottom, indexed).
6393   void smullb(const ZRegister& zd,
6394               const ZRegister& zn,
6395               const ZRegister& zm,
6396               int index);
6397 
6398   // Signed multiply long (bottom).
6399   void smullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6400 
6401   // Signed multiply long (top, indexed).
6402   void smullt(const ZRegister& zd,
6403               const ZRegister& zn,
6404               const ZRegister& zm,
6405               int index);
6406 
6407   // Signed multiply long (top).
6408   void smullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6409 
6410   // Signed saturating absolute value.
6411   void sqabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6412 
6413   // Signed saturating addition (predicated).
6414   void sqadd(const ZRegister& zd,
6415              const PRegisterM& pg,
6416              const ZRegister& zn,
6417              const ZRegister& zm);
6418 
6419   // Saturating complex integer add with rotate.
6420   void sqcadd(const ZRegister& zd,
6421               const ZRegister& zn,
6422               const ZRegister& zm,
6423               int rot);
6424 
6425   // Signed saturating doubling multiply-add long to accumulator (bottom,
6426   // indexed).
6427   void sqdmlalb(const ZRegister& zda,
6428                 const ZRegister& zn,
6429                 const ZRegister& zm,
6430                 int index);
6431 
6432   // Signed saturating doubling multiply-add long to accumulator (bottom).
6433   void sqdmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6434 
6435   // Signed saturating doubling multiply-add long to accumulator (bottom x
6436   // top).
6437   void sqdmlalbt(const ZRegister& zda,
6438                  const ZRegister& zn,
6439                  const ZRegister& zm);
6440 
6441   // Signed saturating doubling multiply-add long to accumulator (top,
6442   // indexed).
6443   void sqdmlalt(const ZRegister& zda,
6444                 const ZRegister& zn,
6445                 const ZRegister& zm,
6446                 int index);
6447 
6448   // Signed saturating doubling multiply-add long to accumulator (top).
6449   void sqdmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6450 
6451   // Signed saturating doubling multiply-subtract long from accumulator
6452   // (bottom, indexed).
6453   void sqdmlslb(const ZRegister& zda,
6454                 const ZRegister& zn,
6455                 const ZRegister& zm,
6456                 int index);
6457 
6458   // Signed saturating doubling multiply-subtract long from accumulator
6459   // (bottom).
6460   void sqdmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6461 
6462   // Signed saturating doubling multiply-subtract long from accumulator
6463   // (bottom x top).
6464   void sqdmlslbt(const ZRegister& zda,
6465                  const ZRegister& zn,
6466                  const ZRegister& zm);
6467 
6468   // Signed saturating doubling multiply-subtract long from accumulator
6469   // (top, indexed).
6470   void sqdmlslt(const ZRegister& zda,
6471                 const ZRegister& zn,
6472                 const ZRegister& zm,
6473                 int index);
6474 
6475   // Signed saturating doubling multiply-subtract long from accumulator
6476   // (top).
6477   void sqdmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6478 
6479   // Signed saturating doubling multiply high (indexed).
6480   void sqdmulh(const ZRegister& zd,
6481                const ZRegister& zn,
6482                const ZRegister& zm,
6483                int index);
6484 
6485   // Signed saturating doubling multiply high (unpredicated).
6486   void sqdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6487 
6488   // Signed saturating doubling multiply long (bottom, indexed).
6489   void sqdmullb(const ZRegister& zd,
6490                 const ZRegister& zn,
6491                 const ZRegister& zm,
6492                 int index);
6493 
6494   // Signed saturating doubling multiply long (bottom).
6495   void sqdmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6496 
6497   // Signed saturating doubling multiply long (top, indexed).
6498   void sqdmullt(const ZRegister& zd,
6499                 const ZRegister& zn,
6500                 const ZRegister& zm,
6501                 int index);
6502 
6503   // Signed saturating doubling multiply long (top).
6504   void sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6505 
6506   // Signed saturating negate.
6507   void sqneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6508 
6509   // Saturating rounding doubling complex integer multiply-add high with
6510   // rotate (indexed).
6511   void sqrdcmlah(const ZRegister& zda,
6512                  const ZRegister& zn,
6513                  const ZRegister& zm,
6514                  int index,
6515                  int rot);
6516 
6517   // Saturating rounding doubling complex integer multiply-add high with
6518   // rotate.
6519   void sqrdcmlah(const ZRegister& zda,
6520                  const ZRegister& zn,
6521                  const ZRegister& zm,
6522                  int rot);
6523 
6524   // Signed saturating rounding doubling multiply-add high to accumulator
6525   // (indexed).
6526   void sqrdmlah(const ZRegister& zda,
6527                 const ZRegister& zn,
6528                 const ZRegister& zm,
6529                 int index);
6530 
6531   // Signed saturating rounding doubling multiply-add high to accumulator
6532   // (unpredicated).
6533   void sqrdmlah(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6534 
6535   // Signed saturating rounding doubling multiply-subtract high from
6536   // accumulator (indexed).
6537   void sqrdmlsh(const ZRegister& zda,
6538                 const ZRegister& zn,
6539                 const ZRegister& zm,
6540                 int index);
6541 
6542   // Signed saturating rounding doubling multiply-subtract high from
6543   // accumulator (unpredicated).
6544   void sqrdmlsh(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6545 
6546   // Signed saturating rounding doubling multiply high (indexed).
6547   void sqrdmulh(const ZRegister& zd,
6548                 const ZRegister& zn,
6549                 const ZRegister& zm,
6550                 int index);
6551 
6552   // Signed saturating rounding doubling multiply high (unpredicated).
6553   void sqrdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6554 
6555   // Signed saturating rounding shift left by vector (predicated).
6556   void sqrshl(const ZRegister& zd,
6557               const PRegisterM& pg,
6558               const ZRegister& zn,
6559               const ZRegister& zm);
6560 
6561   // Signed saturating rounding shift left reversed vectors (predicated).
6562   void sqrshlr(const ZRegister& zd,
6563                const PRegisterM& pg,
6564                const ZRegister& zn,
6565                const ZRegister& zm);
6566 
6567   // Signed saturating rounding shift right narrow by immediate (bottom).
6568   void sqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6569 
6570   // Signed saturating rounding shift right narrow by immediate (top).
6571   void sqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6572 
6573   // Signed saturating rounding shift right unsigned narrow by immediate
6574   // (bottom).
6575   void sqrshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
6576 
6577   // Signed saturating rounding shift right unsigned narrow by immediate
6578   // (top).
6579   void sqrshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
6580 
6581   // Signed saturating shift left by immediate.
6582   void sqshl(const ZRegister& zd,
6583              const PRegisterM& pg,
6584              const ZRegister& zn,
6585              int shift);
6586 
6587   // Signed saturating shift left by vector (predicated).
6588   void sqshl(const ZRegister& zd,
6589              const PRegisterM& pg,
6590              const ZRegister& zn,
6591              const ZRegister& zm);
6592 
6593   // Signed saturating shift left reversed vectors (predicated).
6594   void sqshlr(const ZRegister& zd,
6595               const PRegisterM& pg,
6596               const ZRegister& zn,
6597               const ZRegister& zm);
6598 
6599   // Signed saturating shift left unsigned by immediate.
6600   void sqshlu(const ZRegister& zd,
6601               const PRegisterM& pg,
6602               const ZRegister& zn,
6603               int shift);
6604 
6605   // Signed saturating shift right narrow by immediate (bottom).
6606   void sqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6607 
6608   // Signed saturating shift right narrow by immediate (top).
6609   void sqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6610 
6611   // Signed saturating shift right unsigned narrow by immediate (bottom).
6612   void sqshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
6613 
6614   // Signed saturating shift right unsigned narrow by immediate (top).
6615   void sqshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
6616 
6617   // Signed saturating subtraction (predicated).
6618   void sqsub(const ZRegister& zd,
6619              const PRegisterM& pg,
6620              const ZRegister& zn,
6621              const ZRegister& zm);
6622 
6623   // Signed saturating subtraction reversed vectors (predicated).
6624   void sqsubr(const ZRegister& zd,
6625               const PRegisterM& pg,
6626               const ZRegister& zn,
6627               const ZRegister& zm);
6628 
6629   // Signed saturating extract narrow (bottom).
6630   void sqxtnb(const ZRegister& zd, const ZRegister& zn);
6631 
6632   // Signed saturating extract narrow (top).
6633   void sqxtnt(const ZRegister& zd, const ZRegister& zn);
6634 
6635   // Signed saturating unsigned extract narrow (bottom).
6636   void sqxtunb(const ZRegister& zd, const ZRegister& zn);
6637 
6638   // Signed saturating unsigned extract narrow (top).
6639   void sqxtunt(const ZRegister& zd, const ZRegister& zn);
6640 
6641   // Signed rounding halving addition.
6642   void srhadd(const ZRegister& zd,
6643               const PRegisterM& pg,
6644               const ZRegister& zn,
6645               const ZRegister& zm);
6646 
6647   // Shift right and insert (immediate).
6648   void sri(const ZRegister& zd, const ZRegister& zn, int shift);
6649 
6650   // Signed rounding shift left by vector (predicated).
6651   void srshl(const ZRegister& zd,
6652              const PRegisterM& pg,
6653              const ZRegister& zn,
6654              const ZRegister& zm);
6655 
6656   // Signed rounding shift left reversed vectors (predicated).
6657   void srshlr(const ZRegister& zd,
6658               const PRegisterM& pg,
6659               const ZRegister& zn,
6660               const ZRegister& zm);
6661 
6662   // Signed rounding shift right by immediate.
6663   void srshr(const ZRegister& zd,
6664              const PRegisterM& pg,
6665              const ZRegister& zn,
6666              int shift);
6667 
6668   // Signed rounding shift right and accumulate (immediate).
6669   void srsra(const ZRegister& zda, const ZRegister& zn, int shift);
6670 
6671   // Signed shift left long by immediate (bottom).
6672   void sshllb(const ZRegister& zd, const ZRegister& zn, int shift);
6673 
6674   // Signed shift left long by immediate (top).
6675   void sshllt(const ZRegister& zd, const ZRegister& zn, int shift);
6676 
6677   // Signed shift right and accumulate (immediate).
6678   void ssra(const ZRegister& zda, const ZRegister& zn, int shift);
6679 
6680   // Signed subtract long (bottom).
6681   void ssublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6682 
6683   // Signed subtract long (bottom - top).
6684   void ssublbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6685 
6686   // Signed subtract long (top).
6687   void ssublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6688 
6689   // Signed subtract long (top - bottom).
6690   void ssubltb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6691 
6692   // Signed subtract wide (bottom).
6693   void ssubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6694 
6695   // Signed subtract wide (top).
6696   void ssubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6697 
6698   // Subtract narrow high part (bottom).
6699   void subhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6700 
6701   // Subtract narrow high part (top).
6702   void subhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6703 
6704   // Signed saturating addition of unsigned value.
6705   void suqadd(const ZRegister& zd,
6706               const PRegisterM& pg,
6707               const ZRegister& zn,
6708               const ZRegister& zm);
6709 
6710   // Programmable table lookup in one or two vector table (zeroing).
6711   void tbl(const ZRegister& zd,
6712            const ZRegister& zn1,
6713            const ZRegister& zn2,
6714            const ZRegister& zm);
6715 
6716   // Programmable table lookup in single vector table (merging).
6717   void tbx(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6718 
6719   // Unsigned absolute difference and accumulate.
6720   void uaba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6721 
6722   // Unsigned absolute difference and accumulate long (bottom).
6723   void uabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6724 
6725   // Unsigned absolute difference and accumulate long (top).
6726   void uabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6727 
6728   // Unsigned absolute difference long (bottom).
6729   void uabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6730 
6731   // Unsigned absolute difference long (top).
6732   void uabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6733 
6734   // Unsigned add and accumulate long pairwise.
6735   void uadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
6736 
6737   // Unsigned add long (bottom).
6738   void uaddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6739 
6740   // Unsigned add long (top).
6741   void uaddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6742 
6743   // Unsigned add wide (bottom).
6744   void uaddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6745 
6746   // Unsigned add wide (top).
6747   void uaddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6748 
6749   // Unsigned halving addition.
6750   void uhadd(const ZRegister& zd,
6751              const PRegisterM& pg,
6752              const ZRegister& zn,
6753              const ZRegister& zm);
6754 
6755   // Unsigned halving subtract.
6756   void uhsub(const ZRegister& zd,
6757              const PRegisterM& pg,
6758              const ZRegister& zn,
6759              const ZRegister& zm);
6760 
6761   // Unsigned halving subtract reversed vectors.
6762   void uhsubr(const ZRegister& zd,
6763               const PRegisterM& pg,
6764               const ZRegister& zn,
6765               const ZRegister& zm);
6766 
6767   // Unsigned maximum pairwise.
6768   void umaxp(const ZRegister& zd,
6769              const PRegisterM& pg,
6770              const ZRegister& zn,
6771              const ZRegister& zm);
6772 
6773   // Unsigned minimum pairwise.
6774   void uminp(const ZRegister& zd,
6775              const PRegisterM& pg,
6776              const ZRegister& zn,
6777              const ZRegister& zm);
6778 
6779   // Unsigned multiply-add long to accumulator (bottom, indexed).
6780   void umlalb(const ZRegister& zda,
6781               const ZRegister& zn,
6782               const ZRegister& zm,
6783               int index);
6784 
6785   // Unsigned multiply-add long to accumulator (bottom).
6786   void umlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6787 
6788   // Unsigned multiply-add long to accumulator (top, indexed).
6789   void umlalt(const ZRegister& zda,
6790               const ZRegister& zn,
6791               const ZRegister& zm,
6792               int index);
6793 
6794   // Unsigned multiply-add long to accumulator (top).
6795   void umlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6796 
6797   // Unsigned multiply-subtract long from accumulator (bottom, indexed).
6798   void umlslb(const ZRegister& zda,
6799               const ZRegister& zn,
6800               const ZRegister& zm,
6801               int index);
6802 
6803   // Unsigned multiply-subtract long from accumulator (bottom).
6804   void umlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6805 
6806   // Unsigned multiply-subtract long from accumulator (top, indexed).
6807   void umlslt(const ZRegister& zda,
6808               const ZRegister& zn,
6809               const ZRegister& zm,
6810               int index);
6811 
6812   // Unsigned multiply-subtract long from accumulator (top).
6813   void umlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6814 
6815   // Unsigned multiply returning high half (unpredicated).
6816   void umulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6817 
6818   // Unsigned multiply long (bottom, indexed).
6819   void umullb(const ZRegister& zd,
6820               const ZRegister& zn,
6821               const ZRegister& zm,
6822               int index);
6823 
6824   // Unsigned multiply long (bottom).
6825   void umullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6826 
6827   // Unsigned multiply long (top, indexed).
6828   void umullt(const ZRegister& zd,
6829               const ZRegister& zn,
6830               const ZRegister& zm,
6831               int index);
6832 
6833   // Unsigned multiply long (top).
6834   void umullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6835 
6836   // Unsigned saturating addition (predicated).
6837   void uqadd(const ZRegister& zd,
6838              const PRegisterM& pg,
6839              const ZRegister& zn,
6840              const ZRegister& zm);
6841 
6842   // Unsigned saturating rounding shift left by vector (predicated).
6843   void uqrshl(const ZRegister& zd,
6844               const PRegisterM& pg,
6845               const ZRegister& zn,
6846               const ZRegister& zm);
6847 
6848   // Unsigned saturating rounding shift left reversed vectors (predicated).
6849   void uqrshlr(const ZRegister& zd,
6850                const PRegisterM& pg,
6851                const ZRegister& zn,
6852                const ZRegister& zm);
6853 
6854   // Unsigned saturating rounding shift right narrow by immediate (bottom).
6855   void uqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6856 
6857   // Unsigned saturating rounding shift right narrow by immediate (top).
6858   void uqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6859 
6860   // Unsigned saturating shift left by immediate.
6861   void uqshl(const ZRegister& zd,
6862              const PRegisterM& pg,
6863              const ZRegister& zn,
6864              int shift);
6865 
6866   // Unsigned saturating shift left by vector (predicated).
6867   void uqshl(const ZRegister& zd,
6868              const PRegisterM& pg,
6869              const ZRegister& zn,
6870              const ZRegister& zm);
6871 
6872   // Unsigned saturating shift left reversed vectors (predicated).
6873   void uqshlr(const ZRegister& zd,
6874               const PRegisterM& pg,
6875               const ZRegister& zn,
6876               const ZRegister& zm);
6877 
6878   // Unsigned saturating shift right narrow by immediate (bottom).
6879   void uqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6880 
6881   // Unsigned saturating shift right narrow by immediate (top).
6882   void uqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6883 
6884   // Unsigned saturating subtraction (predicated).
6885   void uqsub(const ZRegister& zd,
6886              const PRegisterM& pg,
6887              const ZRegister& zn,
6888              const ZRegister& zm);
6889 
6890   // Unsigned saturating subtraction reversed vectors (predicated).
6891   void uqsubr(const ZRegister& zd,
6892               const PRegisterM& pg,
6893               const ZRegister& zn,
6894               const ZRegister& zm);
6895 
6896   // Unsigned saturating extract narrow (bottom).
6897   void uqxtnb(const ZRegister& zd, const ZRegister& zn);
6898 
6899   // Unsigned saturating extract narrow (top).
6900   void uqxtnt(const ZRegister& zd, const ZRegister& zn);
6901 
6902   // Unsigned reciprocal estimate (predicated).
6903   void urecpe(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6904 
6905   // Unsigned rounding halving addition.
6906   void urhadd(const ZRegister& zd,
6907               const PRegisterM& pg,
6908               const ZRegister& zn,
6909               const ZRegister& zm);
6910 
6911   // Unsigned rounding shift left by vector (predicated).
6912   void urshl(const ZRegister& zd,
6913              const PRegisterM& pg,
6914              const ZRegister& zn,
6915              const ZRegister& zm);
6916 
6917   // Unsigned rounding shift left reversed vectors (predicated).
6918   void urshlr(const ZRegister& zd,
6919               const PRegisterM& pg,
6920               const ZRegister& zn,
6921               const ZRegister& zm);
6922 
6923   // Unsigned rounding shift right by immediate.
6924   void urshr(const ZRegister& zd,
6925              const PRegisterM& pg,
6926              const ZRegister& zn,
6927              int shift);
6928 
6929   // Unsigned reciprocal square root estimate (predicated).
6930   void ursqrte(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6931 
6932   // Unsigned rounding shift right and accumulate (immediate).
6933   void ursra(const ZRegister& zda, const ZRegister& zn, int shift);
6934 
6935   // Unsigned shift left long by immediate (bottom).
6936   void ushllb(const ZRegister& zd, const ZRegister& zn, int shift);
6937 
6938   // Unsigned shift left long by immediate (top).
6939   void ushllt(const ZRegister& zd, const ZRegister& zn, int shift);
6940 
6941   // Unsigned saturating addition of signed value.
6942   void usqadd(const ZRegister& zd,
6943               const PRegisterM& pg,
6944               const ZRegister& zn,
6945               const ZRegister& zm);
6946 
6947   // Unsigned shift right and accumulate (immediate).
6948   void usra(const ZRegister& zda, const ZRegister& zn, int shift);
6949 
6950   // Unsigned subtract long (bottom).
6951   void usublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6952 
6953   // Unsigned subtract long (top).
6954   void usublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6955 
6956   // Unsigned subtract wide (bottom).
6957   void usubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6958 
6959   // Unsigned subtract wide (top).
6960   void usubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6961 
6962   // While decrementing signed scalar greater than or equal to scalar.
6963   void whilege(const PRegisterWithLaneSize& pd,
6964                const Register& rn,
6965                const Register& rm);
6966 
6967   // While decrementing signed scalar greater than scalar.
6968   void whilegt(const PRegisterWithLaneSize& pd,
6969                const Register& rn,
6970                const Register& rm);
6971 
6972   // While decrementing unsigned scalar higher than scalar.
6973   void whilehi(const PRegisterWithLaneSize& pd,
6974                const Register& rn,
6975                const Register& rm);
6976 
6977   // While decrementing unsigned scalar higher or same as scalar.
6978   void whilehs(const PRegisterWithLaneSize& pd,
6979                const Register& rn,
6980                const Register& rm);
6981 
6982   // While free of read-after-write conflicts.
6983   void whilerw(const PRegisterWithLaneSize& pd,
6984                const Register& rn,
6985                const Register& rm);
6986 
6987   // While free of write-after-read/write conflicts.
6988   void whilewr(const PRegisterWithLaneSize& pd,
6989                const Register& rn,
6990                const Register& rm);
6991 
6992   // Bitwise exclusive OR and rotate right by immediate.
6993   void xar(const ZRegister& zd,
6994            const ZRegister& zn,
6995            const ZRegister& zm,
6996            int shift);
6997 
6998   // Floating-point matrix multiply-accumulate.
6999   void fmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
7000 
7001   // Signed integer matrix multiply-accumulate.
7002   void smmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
7003 
7004   // Unsigned by signed integer matrix multiply-accumulate.
7005   void usmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
7006 
7007   // Unsigned integer matrix multiply-accumulate.
7008   void ummla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
7009 
7010   // Unsigned by signed integer dot product.
7011   void usdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
7012 
7013   // Unsigned by signed integer indexed dot product.
7014   void usdot(const ZRegister& zda,
7015              const ZRegister& zn,
7016              const ZRegister& zm,
7017              int index);
7018 
7019   // Signed by unsigned integer indexed dot product.
7020   void sudot(const ZRegister& zda,
7021              const ZRegister& zn,
7022              const ZRegister& zm,
7023              int index);
7024 
7025   // Add with Tag.
7026   void addg(const Register& xd, const Register& xn, int offset, int tag_offset);
7027 
7028   // Tag Mask Insert.
7029   void gmi(const Register& xd, const Register& xn, const Register& xm);
7030 
7031   // Insert Random Tag.
7032   void irg(const Register& xd, const Register& xn, const Register& xm = xzr);
7033 
7034   // Load Allocation Tag.
7035   void ldg(const Register& xt, const MemOperand& addr);
7036 
7037   void StoreTagHelper(const Register& xt, const MemOperand& addr, Instr op);
7038 
7039   // Store Allocation Tags.
7040   void st2g(const Register& xt, const MemOperand& addr);
7041 
7042   // Store Allocation Tag.
7043   void stg(const Register& xt, const MemOperand& addr);
7044 
7045   // Store Allocation Tag and Pair of registers.
7046   void stgp(const Register& xt1, const Register& xt2, const MemOperand& addr);
7047 
7048   // Store Allocation Tags, Zeroing.
7049   void stz2g(const Register& xt, const MemOperand& addr);
7050 
7051   // Store Allocation Tag, Zeroing.
7052   void stzg(const Register& xt, const MemOperand& addr);
7053 
7054   // Subtract with Tag.
7055   void subg(const Register& xd, const Register& xn, int offset, int tag_offset);
7056 
7057   // Subtract Pointer.
7058   void subp(const Register& xd, const Register& xn, const Register& xm);
7059 
7060   // Subtract Pointer, setting Flags.
7061   void subps(const Register& xd, const Register& xn, const Register& xm);
7062 
7063   // Compare with Tag.
cmpp(const Register & xn,const Register & xm)7064   void cmpp(const Register& xn, const Register& xm) { subps(xzr, xn, xm); }
7065 
7066   // Memory Copy.
7067   void cpye(const Register& rd, const Register& rs, const Register& rn);
7068 
7069   // Memory Copy, reads and writes non-temporal.
7070   void cpyen(const Register& rd, const Register& rs, const Register& rn);
7071 
7072   // Memory Copy, reads non-temporal.
7073   void cpyern(const Register& rd, const Register& rs, const Register& rn);
7074 
7075   // Memory Copy, writes non-temporal.
7076   void cpyewn(const Register& rd, const Register& rs, const Register& rn);
7077 
7078   // Memory Copy Forward-only.
7079   void cpyfe(const Register& rd, const Register& rs, const Register& rn);
7080 
7081   // Memory Copy Forward-only, reads and writes non-temporal.
7082   void cpyfen(const Register& rd, const Register& rs, const Register& rn);
7083 
7084   // Memory Copy Forward-only, reads non-temporal.
7085   void cpyfern(const Register& rd, const Register& rs, const Register& rn);
7086 
7087   // Memory Copy Forward-only, writes non-temporal.
7088   void cpyfewn(const Register& rd, const Register& rs, const Register& rn);
7089 
7090   // Memory Copy Forward-only.
7091   void cpyfm(const Register& rd, const Register& rs, const Register& rn);
7092 
7093   // Memory Copy Forward-only, reads and writes non-temporal.
7094   void cpyfmn(const Register& rd, const Register& rs, const Register& rn);
7095 
7096   // Memory Copy Forward-only, reads non-temporal.
7097   void cpyfmrn(const Register& rd, const Register& rs, const Register& rn);
7098 
7099   // Memory Copy Forward-only, writes non-temporal.
7100   void cpyfmwn(const Register& rd, const Register& rs, const Register& rn);
7101 
7102   // Memory Copy Forward-only.
7103   void cpyfp(const Register& rd, const Register& rs, const Register& rn);
7104 
7105   // Memory Copy Forward-only, reads and writes non-temporal.
7106   void cpyfpn(const Register& rd, const Register& rs, const Register& rn);
7107 
7108   // Memory Copy Forward-only, reads non-temporal.
7109   void cpyfprn(const Register& rd, const Register& rs, const Register& rn);
7110 
7111   // Memory Copy Forward-only, writes non-temporal.
7112   void cpyfpwn(const Register& rd, const Register& rs, const Register& rn);
7113 
7114   // Memory Copy.
7115   void cpym(const Register& rd, const Register& rs, const Register& rn);
7116 
7117   // Memory Copy, reads and writes non-temporal.
7118   void cpymn(const Register& rd, const Register& rs, const Register& rn);
7119 
7120   // Memory Copy, reads non-temporal.
7121   void cpymrn(const Register& rd, const Register& rs, const Register& rn);
7122 
7123   // Memory Copy, writes non-temporal.
7124   void cpymwn(const Register& rd, const Register& rs, const Register& rn);
7125 
7126   // Memory Copy.
7127   void cpyp(const Register& rd, const Register& rs, const Register& rn);
7128 
7129   // Memory Copy, reads and writes non-temporal.
7130   void cpypn(const Register& rd, const Register& rs, const Register& rn);
7131 
7132   // Memory Copy, reads non-temporal.
7133   void cpyprn(const Register& rd, const Register& rs, const Register& rn);
7134 
7135   // Memory Copy, writes non-temporal.
7136   void cpypwn(const Register& rd, const Register& rs, const Register& rn);
7137 
7138   // Memory Set.
7139   void sete(const Register& rd, const Register& rn, const Register& rs);
7140 
7141   // Memory Set, non-temporal.
7142   void seten(const Register& rd, const Register& rn, const Register& rs);
7143 
7144   // Memory Set with tag setting.
7145   void setge(const Register& rd, const Register& rn, const Register& rs);
7146 
7147   // Memory Set with tag setting, non-temporal.
7148   void setgen(const Register& rd, const Register& rn, const Register& rs);
7149 
7150   // Memory Set with tag setting.
7151   void setgm(const Register& rd, const Register& rn, const Register& rs);
7152 
7153   // Memory Set with tag setting, non-temporal.
7154   void setgmn(const Register& rd, const Register& rn, const Register& rs);
7155 
7156   // Memory Set with tag setting.
7157   void setgp(const Register& rd, const Register& rn, const Register& rs);
7158 
7159   // Memory Set with tag setting, non-temporal.
7160   void setgpn(const Register& rd, const Register& rn, const Register& rs);
7161 
7162   // Memory Set.
7163   void setm(const Register& rd, const Register& rn, const Register& rs);
7164 
7165   // Memory Set, non-temporal.
7166   void setmn(const Register& rd, const Register& rn, const Register& rs);
7167 
7168   // Memory Set.
7169   void setp(const Register& rd, const Register& rn, const Register& rs);
7170 
7171   // Memory Set, non-temporal.
7172   void setpn(const Register& rd, const Register& rn, const Register& rs);
7173 
7174   // Absolute value.
7175   void abs(const Register& rd, const Register& rn);
7176 
7177   // Count bits.
7178   void cnt(const Register& rd, const Register& rn);
7179 
7180   // Count Trailing Zeros.
7181   void ctz(const Register& rd, const Register& rn);
7182 
7183   // Signed Maximum.
7184   void smax(const Register& rd, const Register& rn, const Operand& op);
7185 
7186   // Signed Minimum.
7187   void smin(const Register& rd, const Register& rn, const Operand& op);
7188 
7189   // Unsigned Maximum.
7190   void umax(const Register& rd, const Register& rn, const Operand& op);
7191 
7192   // Unsigned Minimum.
7193   void umin(const Register& rd, const Register& rn, const Operand& op);
7194 
7195   // Check feature status.
7196   void chkfeat(const Register& rd);
7197 
7198   // Guarded Control Stack Push.
7199   void gcspushm(const Register& rt);
7200 
7201   // Guarded Control Stack Pop.
7202   void gcspopm(const Register& rt);
7203 
7204   // Guarded Control Stack Switch Stack 1.
7205   void gcsss1(const Register& rt);
7206 
7207   // Guarded Control Stack Switch Stack 2.
7208   void gcsss2(const Register& rt);
7209 
7210   // Emit generic instructions.
7211 
7212   // Emit raw instructions into the instruction stream.
dci(Instr raw_inst)7213   void dci(Instr raw_inst) { Emit(raw_inst); }
7214 
7215   // Emit 32 bits of data into the instruction stream.
dc32(uint32_t data)7216   void dc32(uint32_t data) { dc(data); }
7217 
7218   // Emit 64 bits of data into the instruction stream.
dc64(uint64_t data)7219   void dc64(uint64_t data) { dc(data); }
7220 
7221   // Emit data in the instruction stream.
7222   template <typename T>
dc(T data)7223   void dc(T data) {
7224     VIXL_ASSERT(AllowAssembler());
7225     GetBuffer()->Emit<T>(data);
7226   }
7227 
7228   // Copy a string into the instruction stream, including the terminating NULL
7229   // character. The instruction pointer is then aligned correctly for
7230   // subsequent instructions.
EmitString(const char * string)7231   void EmitString(const char* string) {
7232     VIXL_ASSERT(string != NULL);
7233     VIXL_ASSERT(AllowAssembler());
7234 
7235     GetBuffer()->EmitString(string);
7236     GetBuffer()->Align();
7237   }
7238 
7239   // Code generation helpers.
7240   static bool OneInstrMoveImmediateHelper(Assembler* assm,
7241                                           const Register& dst,
7242                                           uint64_t imm);
7243 
7244   // Register encoding.
7245   template <int hibit, int lobit>
Rx(CPURegister rx)7246   static Instr Rx(CPURegister rx) {
7247     VIXL_ASSERT(rx.GetCode() != kSPRegInternalCode);
7248     return ImmUnsignedField<hibit, lobit>(rx.GetCode());
7249   }
7250 
7251 #define CPU_REGISTER_FIELD_NAMES(V) V(d) V(n) V(m) V(a) V(t) V(t2) V(s)
7252 #define REGISTER_ENCODER(N)                                           \
7253   static Instr R##N(CPURegister r##N) {                               \
7254     return Rx<R##N##_offset + R##N##_width - 1, R##N##_offset>(r##N); \
7255   }
CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)7256   CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)
7257 #undef REGISTER_ENCODER
7258 #undef CPU_REGISTER_FIELD_NAMES
7259 
7260   static Instr RmNot31(CPURegister rm) {
7261     VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
7262     VIXL_ASSERT(!rm.IsZero());
7263     return Rm(rm);
7264   }
7265 
7266   // These encoding functions allow the stack pointer to be encoded, and
7267   // disallow the zero register.
RdSP(Register rd)7268   static Instr RdSP(Register rd) {
7269     VIXL_ASSERT(!rd.IsZero());
7270     return (rd.GetCode() & kRegCodeMask) << Rd_offset;
7271   }
7272 
RnSP(Register rn)7273   static Instr RnSP(Register rn) {
7274     VIXL_ASSERT(!rn.IsZero());
7275     return (rn.GetCode() & kRegCodeMask) << Rn_offset;
7276   }
7277 
RmSP(Register rm)7278   static Instr RmSP(Register rm) {
7279     VIXL_ASSERT(!rm.IsZero());
7280     return (rm.GetCode() & kRegCodeMask) << Rm_offset;
7281   }
7282 
Pd(PRegister pd)7283   static Instr Pd(PRegister pd) {
7284     return Rx<Pd_offset + Pd_width - 1, Pd_offset>(pd);
7285   }
7286 
Pm(PRegister pm)7287   static Instr Pm(PRegister pm) {
7288     return Rx<Pm_offset + Pm_width - 1, Pm_offset>(pm);
7289   }
7290 
Pn(PRegister pn)7291   static Instr Pn(PRegister pn) {
7292     return Rx<Pn_offset + Pn_width - 1, Pn_offset>(pn);
7293   }
7294 
PgLow8(PRegister pg)7295   static Instr PgLow8(PRegister pg) {
7296     // Governing predicates can be merging, zeroing, or unqualified. They should
7297     // never have a lane size.
7298     VIXL_ASSERT(!pg.HasLaneSize());
7299     return Rx<PgLow8_offset + PgLow8_width - 1, PgLow8_offset>(pg);
7300   }
7301 
7302   template <int hibit, int lobit>
Pg(PRegister pg)7303   static Instr Pg(PRegister pg) {
7304     // Governing predicates can be merging, zeroing, or unqualified. They should
7305     // never have a lane size.
7306     VIXL_ASSERT(!pg.HasLaneSize());
7307     return Rx<hibit, lobit>(pg);
7308   }
7309 
7310   // Flags encoding.
Flags(FlagsUpdate S)7311   static Instr Flags(FlagsUpdate S) {
7312     if (S == SetFlags) {
7313       return 1 << FlagsUpdate_offset;
7314     } else if (S == LeaveFlags) {
7315       return 0 << FlagsUpdate_offset;
7316     }
7317     VIXL_UNREACHABLE();
7318     return 0;
7319   }
7320 
Cond(Condition cond)7321   static Instr Cond(Condition cond) { return cond << Condition_offset; }
7322 
7323   // Generic immediate encoding.
7324   template <int hibit, int lobit>
ImmField(int64_t imm)7325   static Instr ImmField(int64_t imm) {
7326     VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
7327     VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
7328     int fieldsize = hibit - lobit + 1;
7329     VIXL_ASSERT(IsIntN(fieldsize, imm));
7330     return static_cast<Instr>(TruncateToUintN(fieldsize, imm) << lobit);
7331   }
7332 
7333   // For unsigned immediate encoding.
7334   // TODO: Handle signed and unsigned immediate in satisfactory way.
7335   template <int hibit, int lobit>
ImmUnsignedField(uint64_t imm)7336   static Instr ImmUnsignedField(uint64_t imm) {
7337     VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
7338     VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
7339     VIXL_ASSERT(IsUintN(hibit - lobit + 1, imm));
7340     return static_cast<Instr>(imm << lobit);
7341   }
7342 
7343   // PC-relative address encoding.
ImmPCRelAddress(int64_t imm21)7344   static Instr ImmPCRelAddress(int64_t imm21) {
7345     VIXL_ASSERT(IsInt21(imm21));
7346     Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
7347     Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
7348     Instr immlo = imm << ImmPCRelLo_offset;
7349     return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
7350   }
7351 
7352   // Branch encoding.
ImmUncondBranch(int64_t imm26)7353   static Instr ImmUncondBranch(int64_t imm26) {
7354     VIXL_ASSERT(IsInt26(imm26));
7355     return TruncateToUint26(imm26) << ImmUncondBranch_offset;
7356   }
7357 
ImmCondBranch(int64_t imm19)7358   static Instr ImmCondBranch(int64_t imm19) {
7359     VIXL_ASSERT(IsInt19(imm19));
7360     return TruncateToUint19(imm19) << ImmCondBranch_offset;
7361   }
7362 
ImmCmpBranch(int64_t imm19)7363   static Instr ImmCmpBranch(int64_t imm19) {
7364     VIXL_ASSERT(IsInt19(imm19));
7365     return TruncateToUint19(imm19) << ImmCmpBranch_offset;
7366   }
7367 
ImmTestBranch(int64_t imm14)7368   static Instr ImmTestBranch(int64_t imm14) {
7369     VIXL_ASSERT(IsInt14(imm14));
7370     return TruncateToUint14(imm14) << ImmTestBranch_offset;
7371   }
7372 
ImmTestBranchBit(unsigned bit_pos)7373   static Instr ImmTestBranchBit(unsigned bit_pos) {
7374     VIXL_ASSERT(IsUint6(bit_pos));
7375     // Subtract five from the shift offset, as we need bit 5 from bit_pos.
7376     unsigned bit5 = bit_pos << (ImmTestBranchBit5_offset - 5);
7377     unsigned bit40 = bit_pos << ImmTestBranchBit40_offset;
7378     bit5 &= ImmTestBranchBit5_mask;
7379     bit40 &= ImmTestBranchBit40_mask;
7380     return bit5 | bit40;
7381   }
7382 
7383   // Data Processing encoding.
SF(Register rd)7384   static Instr SF(Register rd) {
7385     return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
7386   }
7387 
ImmAddSub(int imm)7388   static Instr ImmAddSub(int imm) {
7389     VIXL_ASSERT(IsImmAddSub(imm));
7390     if (IsUint12(imm)) {  // No shift required.
7391       imm <<= ImmAddSub_offset;
7392     } else {
7393       imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ImmAddSubShift_offset);
7394     }
7395     return imm;
7396   }
7397 
SVEImmSetBits(unsigned imms,unsigned lane_size)7398   static Instr SVEImmSetBits(unsigned imms, unsigned lane_size) {
7399     VIXL_ASSERT(IsUint6(imms));
7400     VIXL_ASSERT((lane_size == kDRegSize) || IsUint6(imms + 3));
7401     USE(lane_size);
7402     return imms << SVEImmSetBits_offset;
7403   }
7404 
SVEImmRotate(unsigned immr,unsigned lane_size)7405   static Instr SVEImmRotate(unsigned immr, unsigned lane_size) {
7406     VIXL_ASSERT(IsUintN(WhichPowerOf2(lane_size), immr));
7407     USE(lane_size);
7408     return immr << SVEImmRotate_offset;
7409   }
7410 
SVEBitN(unsigned bitn)7411   static Instr SVEBitN(unsigned bitn) {
7412     VIXL_ASSERT(IsUint1(bitn));
7413     return bitn << SVEBitN_offset;
7414   }
7415 
7416   static Instr SVEDtype(unsigned msize_in_bytes_log2,
7417                         unsigned esize_in_bytes_log2,
7418                         bool is_signed,
7419                         int dtype_h_lsb = 23,
7420                         int dtype_l_lsb = 21) {
7421     VIXL_ASSERT(msize_in_bytes_log2 <= kDRegSizeInBytesLog2);
7422     VIXL_ASSERT(esize_in_bytes_log2 <= kDRegSizeInBytesLog2);
7423     Instr dtype_h = msize_in_bytes_log2;
7424     Instr dtype_l = esize_in_bytes_log2;
7425     // Signed forms use the encodings where msize would be greater than esize.
7426     if (is_signed) {
7427       dtype_h = dtype_h ^ 0x3;
7428       dtype_l = dtype_l ^ 0x3;
7429     }
7430     VIXL_ASSERT(IsUint2(dtype_h));
7431     VIXL_ASSERT(IsUint2(dtype_l));
7432     VIXL_ASSERT((dtype_h > dtype_l) == is_signed);
7433 
7434     return (dtype_h << dtype_h_lsb) | (dtype_l << dtype_l_lsb);
7435   }
7436 
SVEDtypeSplit(unsigned msize_in_bytes_log2,unsigned esize_in_bytes_log2,bool is_signed)7437   static Instr SVEDtypeSplit(unsigned msize_in_bytes_log2,
7438                              unsigned esize_in_bytes_log2,
7439                              bool is_signed) {
7440     return SVEDtype(msize_in_bytes_log2,
7441                     esize_in_bytes_log2,
7442                     is_signed,
7443                     23,
7444                     13);
7445   }
7446 
ImmS(unsigned imms,unsigned reg_size)7447   static Instr ImmS(unsigned imms, unsigned reg_size) {
7448     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
7449                 ((reg_size == kWRegSize) && IsUint5(imms)));
7450     USE(reg_size);
7451     return imms << ImmS_offset;
7452   }
7453 
ImmR(unsigned immr,unsigned reg_size)7454   static Instr ImmR(unsigned immr, unsigned reg_size) {
7455     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
7456                 ((reg_size == kWRegSize) && IsUint5(immr)));
7457     USE(reg_size);
7458     VIXL_ASSERT(IsUint6(immr));
7459     return immr << ImmR_offset;
7460   }
7461 
ImmSetBits(unsigned imms,unsigned reg_size)7462   static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
7463     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7464     VIXL_ASSERT(IsUint6(imms));
7465     VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
7466     USE(reg_size);
7467     return imms << ImmSetBits_offset;
7468   }
7469 
ImmRotate(unsigned immr,unsigned reg_size)7470   static Instr ImmRotate(unsigned immr, unsigned reg_size) {
7471     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7472     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
7473                 ((reg_size == kWRegSize) && IsUint5(immr)));
7474     USE(reg_size);
7475     return immr << ImmRotate_offset;
7476   }
7477 
ImmLLiteral(int64_t imm19)7478   static Instr ImmLLiteral(int64_t imm19) {
7479     VIXL_ASSERT(IsInt19(imm19));
7480     return TruncateToUint19(imm19) << ImmLLiteral_offset;
7481   }
7482 
BitN(unsigned bitn,unsigned reg_size)7483   static Instr BitN(unsigned bitn, unsigned reg_size) {
7484     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7485     VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
7486     USE(reg_size);
7487     return bitn << BitN_offset;
7488   }
7489 
ShiftDP(Shift shift)7490   static Instr ShiftDP(Shift shift) {
7491     VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
7492     return shift << ShiftDP_offset;
7493   }
7494 
ImmDPShift(unsigned amount)7495   static Instr ImmDPShift(unsigned amount) {
7496     VIXL_ASSERT(IsUint6(amount));
7497     return amount << ImmDPShift_offset;
7498   }
7499 
ExtendMode(Extend extend)7500   static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; }
7501 
ImmExtendShift(unsigned left_shift)7502   static Instr ImmExtendShift(unsigned left_shift) {
7503     VIXL_ASSERT(left_shift <= 4);
7504     return left_shift << ImmExtendShift_offset;
7505   }
7506 
ImmCondCmp(unsigned imm)7507   static Instr ImmCondCmp(unsigned imm) {
7508     VIXL_ASSERT(IsUint5(imm));
7509     return imm << ImmCondCmp_offset;
7510   }
7511 
Nzcv(StatusFlags nzcv)7512   static Instr Nzcv(StatusFlags nzcv) {
7513     return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
7514   }
7515 
7516   // MemOperand offset encoding.
ImmLSUnsigned(int64_t imm12)7517   static Instr ImmLSUnsigned(int64_t imm12) {
7518     VIXL_ASSERT(IsUint12(imm12));
7519     return TruncateToUint12(imm12) << ImmLSUnsigned_offset;
7520   }
7521 
ImmLS(int64_t imm9)7522   static Instr ImmLS(int64_t imm9) {
7523     VIXL_ASSERT(IsInt9(imm9));
7524     return TruncateToUint9(imm9) << ImmLS_offset;
7525   }
7526 
ImmLSPair(int64_t imm7,unsigned access_size_in_bytes_log2)7527   static Instr ImmLSPair(int64_t imm7, unsigned access_size_in_bytes_log2) {
7528     const auto access_size_in_bytes = 1U << access_size_in_bytes_log2;
7529     VIXL_ASSERT(IsMultiple(imm7, access_size_in_bytes));
7530     int64_t scaled_imm7 = imm7 / access_size_in_bytes;
7531     VIXL_ASSERT(IsInt7(scaled_imm7));
7532     return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
7533   }
7534 
ImmShiftLS(unsigned shift_amount)7535   static Instr ImmShiftLS(unsigned shift_amount) {
7536     VIXL_ASSERT(IsUint1(shift_amount));
7537     return shift_amount << ImmShiftLS_offset;
7538   }
7539 
ImmLSPAC(int64_t imm10)7540   static Instr ImmLSPAC(int64_t imm10) {
7541     VIXL_ASSERT(IsMultiple(imm10, 1 << 3));
7542     int64_t scaled_imm10 = imm10 / (1 << 3);
7543     VIXL_ASSERT(IsInt10(scaled_imm10));
7544     uint32_t s_bit = (scaled_imm10 >> 9) & 1;
7545     return (s_bit << ImmLSPACHi_offset) |
7546            (TruncateToUint9(scaled_imm10) << ImmLSPACLo_offset);
7547   }
7548 
ImmPrefetchOperation(int imm5)7549   static Instr ImmPrefetchOperation(int imm5) {
7550     VIXL_ASSERT(IsUint5(imm5));
7551     return imm5 << ImmPrefetchOperation_offset;
7552   }
7553 
ImmException(int imm16)7554   static Instr ImmException(int imm16) {
7555     VIXL_ASSERT(IsUint16(imm16));
7556     return imm16 << ImmException_offset;
7557   }
7558 
ImmUdf(int imm16)7559   static Instr ImmUdf(int imm16) {
7560     VIXL_ASSERT(IsUint16(imm16));
7561     return imm16 << ImmUdf_offset;
7562   }
7563 
ImmSystemRegister(int imm16)7564   static Instr ImmSystemRegister(int imm16) {
7565     VIXL_ASSERT(IsUint16(imm16));
7566     return imm16 << ImmSystemRegister_offset;
7567   }
7568 
ImmRMIFRotation(int imm6)7569   static Instr ImmRMIFRotation(int imm6) {
7570     VIXL_ASSERT(IsUint6(imm6));
7571     return imm6 << ImmRMIFRotation_offset;
7572   }
7573 
ImmHint(int imm7)7574   static Instr ImmHint(int imm7) {
7575     VIXL_ASSERT(IsUint7(imm7));
7576     return imm7 << ImmHint_offset;
7577   }
7578 
CRm(int imm4)7579   static Instr CRm(int imm4) {
7580     VIXL_ASSERT(IsUint4(imm4));
7581     return imm4 << CRm_offset;
7582   }
7583 
CRn(int imm4)7584   static Instr CRn(int imm4) {
7585     VIXL_ASSERT(IsUint4(imm4));
7586     return imm4 << CRn_offset;
7587   }
7588 
SysOp(int imm14)7589   static Instr SysOp(int imm14) {
7590     VIXL_ASSERT(IsUint14(imm14));
7591     return imm14 << SysOp_offset;
7592   }
7593 
ImmSysOp1(int imm3)7594   static Instr ImmSysOp1(int imm3) {
7595     VIXL_ASSERT(IsUint3(imm3));
7596     return imm3 << SysOp1_offset;
7597   }
7598 
ImmSysOp2(int imm3)7599   static Instr ImmSysOp2(int imm3) {
7600     VIXL_ASSERT(IsUint3(imm3));
7601     return imm3 << SysOp2_offset;
7602   }
7603 
ImmBarrierDomain(int imm2)7604   static Instr ImmBarrierDomain(int imm2) {
7605     VIXL_ASSERT(IsUint2(imm2));
7606     return imm2 << ImmBarrierDomain_offset;
7607   }
7608 
ImmBarrierType(int imm2)7609   static Instr ImmBarrierType(int imm2) {
7610     VIXL_ASSERT(IsUint2(imm2));
7611     return imm2 << ImmBarrierType_offset;
7612   }
7613 
7614   // Move immediates encoding.
ImmMoveWide(uint64_t imm)7615   static Instr ImmMoveWide(uint64_t imm) {
7616     VIXL_ASSERT(IsUint16(imm));
7617     return static_cast<Instr>(imm << ImmMoveWide_offset);
7618   }
7619 
ShiftMoveWide(int64_t shift)7620   static Instr ShiftMoveWide(int64_t shift) {
7621     VIXL_ASSERT(IsUint2(shift));
7622     return static_cast<Instr>(shift << ShiftMoveWide_offset);
7623   }
7624 
7625   // FP Immediates.
7626   static Instr ImmFP16(Float16 imm);
7627   static Instr ImmFP32(float imm);
7628   static Instr ImmFP64(double imm);
7629 
7630   // FP register type.
FPType(VRegister fd)7631   static Instr FPType(VRegister fd) {
7632     VIXL_ASSERT(fd.IsScalar());
7633     switch (fd.GetSizeInBits()) {
7634       case 16:
7635         return FP16;
7636       case 32:
7637         return FP32;
7638       case 64:
7639         return FP64;
7640       default:
7641         VIXL_UNREACHABLE();
7642         return 0;
7643     }
7644   }
7645 
FPScale(unsigned scale)7646   static Instr FPScale(unsigned scale) {
7647     VIXL_ASSERT(IsUint6(scale));
7648     return scale << FPScale_offset;
7649   }
7650 
7651   // Immediate field checking helpers.
7652   static bool IsImmAddSub(int64_t immediate);
7653   static bool IsImmConditionalCompare(int64_t immediate);
7654   static bool IsImmFP16(Float16 imm);
7655 
IsImmFP32(float imm)7656   static bool IsImmFP32(float imm) { return IsImmFP32(FloatToRawbits(imm)); }
7657 
7658   static bool IsImmFP32(uint32_t bits);
7659 
IsImmFP64(double imm)7660   static bool IsImmFP64(double imm) { return IsImmFP64(DoubleToRawbits(imm)); }
7661 
7662   static bool IsImmFP64(uint64_t bits);
7663   static bool IsImmLogical(uint64_t value,
7664                            unsigned width,
7665                            unsigned* n = NULL,
7666                            unsigned* imm_s = NULL,
7667                            unsigned* imm_r = NULL);
7668   static bool IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2);
7669   static bool IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2);
7670   static bool IsImmLSUnscaled(int64_t offset);
7671   static bool IsImmMovn(uint64_t imm, unsigned reg_size);
7672   static bool IsImmMovz(uint64_t imm, unsigned reg_size);
7673 
7674   // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)7675   static Instr VFormat(VRegister vd) {
7676     if (vd.Is64Bits()) {
7677       switch (vd.GetLanes()) {
7678         case 1:
7679           return NEON_1D;
7680         case 2:
7681           return NEON_2S;
7682         case 4:
7683           return NEON_4H;
7684         case 8:
7685           return NEON_8B;
7686         default:
7687           return 0xffffffff;
7688       }
7689     } else {
7690       VIXL_ASSERT(vd.Is128Bits());
7691       switch (vd.GetLanes()) {
7692         case 2:
7693           return NEON_2D;
7694         case 4:
7695           return NEON_4S;
7696         case 8:
7697           return NEON_8H;
7698         case 16:
7699           return NEON_16B;
7700         default:
7701           return 0xffffffff;
7702       }
7703     }
7704   }
7705 
7706   // Instruction bits for vector format in floating point data processing
7707   // operations.
FPFormat(VRegister vd)7708   static Instr FPFormat(VRegister vd) {
7709     switch (vd.GetLanes()) {
7710       case 1:
7711         // Floating point scalar formats.
7712         switch (vd.GetSizeInBits()) {
7713           case 16:
7714             return FP16;
7715           case 32:
7716             return FP32;
7717           case 64:
7718             return FP64;
7719           default:
7720             VIXL_UNREACHABLE();
7721         }
7722         break;
7723       case 2:
7724         // Two lane floating point vector formats.
7725         switch (vd.GetSizeInBits()) {
7726           case 64:
7727             return NEON_FP_2S;
7728           case 128:
7729             return NEON_FP_2D;
7730           default:
7731             VIXL_UNREACHABLE();
7732         }
7733         break;
7734       case 4:
7735         // Four lane floating point vector formats.
7736         switch (vd.GetSizeInBits()) {
7737           case 64:
7738             return NEON_FP_4H;
7739           case 128:
7740             return NEON_FP_4S;
7741           default:
7742             VIXL_UNREACHABLE();
7743         }
7744         break;
7745       case 8:
7746         // Eight lane floating point vector format.
7747         VIXL_ASSERT(vd.Is128Bits());
7748         return NEON_FP_8H;
7749       default:
7750         VIXL_UNREACHABLE();
7751         return 0;
7752     }
7753     VIXL_UNREACHABLE();
7754     return 0;
7755   }
7756 
7757   // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)7758   static Instr LSVFormat(VRegister vd) {
7759     if (vd.Is64Bits()) {
7760       switch (vd.GetLanes()) {
7761         case 1:
7762           return LS_NEON_1D;
7763         case 2:
7764           return LS_NEON_2S;
7765         case 4:
7766           return LS_NEON_4H;
7767         case 8:
7768           return LS_NEON_8B;
7769         default:
7770           return 0xffffffff;
7771       }
7772     } else {
7773       VIXL_ASSERT(vd.Is128Bits());
7774       switch (vd.GetLanes()) {
7775         case 2:
7776           return LS_NEON_2D;
7777         case 4:
7778           return LS_NEON_4S;
7779         case 8:
7780           return LS_NEON_8H;
7781         case 16:
7782           return LS_NEON_16B;
7783         default:
7784           return 0xffffffff;
7785       }
7786     }
7787   }
7788 
7789   // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)7790   static Instr SFormat(VRegister vd) {
7791     VIXL_ASSERT(vd.GetLanes() == 1);
7792     switch (vd.GetSizeInBytes()) {
7793       case 1:
7794         return NEON_B;
7795       case 2:
7796         return NEON_H;
7797       case 4:
7798         return NEON_S;
7799       case 8:
7800         return NEON_D;
7801       default:
7802         return 0xffffffff;
7803     }
7804   }
7805 
7806   template <typename T>
SVESize(const T & rd)7807   static Instr SVESize(const T& rd) {
7808     VIXL_ASSERT(rd.IsZRegister() || rd.IsPRegister());
7809     VIXL_ASSERT(rd.HasLaneSize());
7810     switch (rd.GetLaneSizeInBytes()) {
7811       case 1:
7812         return SVE_B;
7813       case 2:
7814         return SVE_H;
7815       case 4:
7816         return SVE_S;
7817       case 8:
7818         return SVE_D;
7819       default:
7820         return 0xffffffff;
7821     }
7822   }
7823 
ImmSVEPredicateConstraint(int pattern)7824   static Instr ImmSVEPredicateConstraint(int pattern) {
7825     VIXL_ASSERT(IsUint5(pattern));
7826     return (pattern << ImmSVEPredicateConstraint_offset) &
7827            ImmSVEPredicateConstraint_mask;
7828   }
7829 
ImmNEONHLM(int index,int num_bits)7830   static Instr ImmNEONHLM(int index, int num_bits) {
7831     int h, l, m;
7832     if (num_bits == 3) {
7833       VIXL_ASSERT(IsUint3(index));
7834       h = (index >> 2) & 1;
7835       l = (index >> 1) & 1;
7836       m = (index >> 0) & 1;
7837     } else if (num_bits == 2) {
7838       VIXL_ASSERT(IsUint2(index));
7839       h = (index >> 1) & 1;
7840       l = (index >> 0) & 1;
7841       m = 0;
7842     } else {
7843       VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
7844       h = (index >> 0) & 1;
7845       l = 0;
7846       m = 0;
7847     }
7848     return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
7849   }
7850 
ImmRotFcadd(int rot)7851   static Instr ImmRotFcadd(int rot) {
7852     VIXL_ASSERT(rot == 90 || rot == 270);
7853     return (((rot == 270) ? 1 : 0) << ImmRotFcadd_offset);
7854   }
7855 
ImmRotFcmlaSca(int rot)7856   static Instr ImmRotFcmlaSca(int rot) {
7857     VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
7858     return (rot / 90) << ImmRotFcmlaSca_offset;
7859   }
7860 
ImmRotFcmlaVec(int rot)7861   static Instr ImmRotFcmlaVec(int rot) {
7862     VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
7863     return (rot / 90) << ImmRotFcmlaVec_offset;
7864   }
7865 
ImmNEONExt(int imm4)7866   static Instr ImmNEONExt(int imm4) {
7867     VIXL_ASSERT(IsUint4(imm4));
7868     return imm4 << ImmNEONExt_offset;
7869   }
7870 
ImmNEON5(Instr format,int index)7871   static Instr ImmNEON5(Instr format, int index) {
7872     VIXL_ASSERT(IsUint4(index));
7873     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
7874     int imm5 = (index << (s + 1)) | (1 << s);
7875     return imm5 << ImmNEON5_offset;
7876   }
7877 
ImmNEON4(Instr format,int index)7878   static Instr ImmNEON4(Instr format, int index) {
7879     VIXL_ASSERT(IsUint4(index));
7880     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
7881     int imm4 = index << s;
7882     return imm4 << ImmNEON4_offset;
7883   }
7884 
ImmNEONabcdefgh(int imm8)7885   static Instr ImmNEONabcdefgh(int imm8) {
7886     VIXL_ASSERT(IsUint8(imm8));
7887     Instr instr;
7888     instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
7889     instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
7890     return instr;
7891   }
7892 
NEONCmode(int cmode)7893   static Instr NEONCmode(int cmode) {
7894     VIXL_ASSERT(IsUint4(cmode));
7895     return cmode << NEONCmode_offset;
7896   }
7897 
NEONModImmOp(int op)7898   static Instr NEONModImmOp(int op) {
7899     VIXL_ASSERT(IsUint1(op));
7900     return op << NEONModImmOp_offset;
7901   }
7902 
7903   // Size of the code generated since label to the current position.
GetSizeOfCodeGeneratedSince(Label * label)7904   size_t GetSizeOfCodeGeneratedSince(Label* label) const {
7905     VIXL_ASSERT(label->IsBound());
7906     return GetBuffer().GetOffsetFrom(label->GetLocation());
7907   }
7908   VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince",
7909                   size_t SizeOfCodeGeneratedSince(Label* label) const) {
7910     return GetSizeOfCodeGeneratedSince(label);
7911   }
7912 
7913   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
7914                   size_t GetBufferCapacity() const) {
7915     return GetBuffer().GetCapacity();
7916   }
7917   VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) {
7918     return GetBuffer().GetCapacity();
7919   }
7920 
7921   VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
7922                   size_t GetRemainingBufferSpace() const) {
7923     return GetBuffer().GetRemainingBytes();
7924   }
7925   VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
7926                   size_t RemainingBufferSpace() const) {
7927     return GetBuffer().GetRemainingBytes();
7928   }
7929 
GetPic()7930   PositionIndependentCodeOption GetPic() const { return pic_; }
7931   VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) {
7932     return GetPic();
7933   }
7934 
GetCPUFeatures()7935   CPUFeatures* GetCPUFeatures() { return &cpu_features_; }
7936 
SetCPUFeatures(const CPUFeatures & cpu_features)7937   void SetCPUFeatures(const CPUFeatures& cpu_features) {
7938     cpu_features_ = cpu_features;
7939   }
7940 
AllowPageOffsetDependentCode()7941   bool AllowPageOffsetDependentCode() const {
7942     return (GetPic() == PageOffsetDependentCode) ||
7943            (GetPic() == PositionDependentCode);
7944   }
7945 
AppropriateZeroRegFor(const CPURegister & reg)7946   static Register AppropriateZeroRegFor(const CPURegister& reg) {
7947     return reg.Is64Bits() ? Register(xzr) : Register(wzr);
7948   }
7949 
7950  protected:
7951   void LoadStore(const CPURegister& rt,
7952                  const MemOperand& addr,
7953                  LoadStoreOp op,
7954                  LoadStoreScalingOption option = PreferScaledOffset);
7955 
7956   void LoadStorePAC(const Register& xt,
7957                     const MemOperand& addr,
7958                     LoadStorePACOp op);
7959 
7960   void LoadStorePair(const CPURegister& rt,
7961                      const CPURegister& rt2,
7962                      const MemOperand& addr,
7963                      LoadStorePairOp op);
7964   void LoadStoreStruct(const VRegister& vt,
7965                        const MemOperand& addr,
7966                        NEONLoadStoreMultiStructOp op);
7967   void LoadStoreStruct1(const VRegister& vt,
7968                         int reg_count,
7969                         const MemOperand& addr);
7970   void LoadStoreStructSingle(const VRegister& vt,
7971                              uint32_t lane,
7972                              const MemOperand& addr,
7973                              NEONLoadStoreSingleStructOp op);
7974   void LoadStoreStructSingleAllLanes(const VRegister& vt,
7975                                      const MemOperand& addr,
7976                                      NEONLoadStoreSingleStructOp op);
7977   void LoadStoreStructVerify(const VRegister& vt,
7978                              const MemOperand& addr,
7979                              Instr op);
7980 
7981   // Set `is_load` to false in default as it's only used in the
7982   // scalar-plus-vector form.
7983   Instr SVEMemOperandHelper(unsigned msize_in_bytes_log2,
7984                             int num_regs,
7985                             const SVEMemOperand& addr,
7986                             bool is_load = false);
7987 
7988   // E.g. st1b, st1h, ...
7989   // This supports both contiguous and scatter stores.
7990   void SVESt1Helper(unsigned msize_in_bytes_log2,
7991                     const ZRegister& zt,
7992                     const PRegister& pg,
7993                     const SVEMemOperand& addr);
7994 
7995   // E.g. ld1b, ld1h, ...
7996   // This supports both contiguous and gather loads.
7997   void SVELd1Helper(unsigned msize_in_bytes_log2,
7998                     const ZRegister& zt,
7999                     const PRegisterZ& pg,
8000                     const SVEMemOperand& addr,
8001                     bool is_signed);
8002 
8003   // E.g. ld1rb, ld1rh, ...
8004   void SVELd1BroadcastHelper(unsigned msize_in_bytes_log2,
8005                              const ZRegister& zt,
8006                              const PRegisterZ& pg,
8007                              const SVEMemOperand& addr,
8008                              bool is_signed);
8009 
8010   // E.g. ldff1b, ldff1h, ...
8011   // This supports both contiguous and gather loads.
8012   void SVELdff1Helper(unsigned msize_in_bytes_log2,
8013                       const ZRegister& zt,
8014                       const PRegisterZ& pg,
8015                       const SVEMemOperand& addr,
8016                       bool is_signed);
8017 
8018   // Common code for the helpers above.
8019   void SVELdSt1Helper(unsigned msize_in_bytes_log2,
8020                       const ZRegister& zt,
8021                       const PRegister& pg,
8022                       const SVEMemOperand& addr,
8023                       bool is_signed,
8024                       Instr op);
8025 
8026   // Common code for the helpers above.
8027   void SVEScatterGatherHelper(unsigned msize_in_bytes_log2,
8028                               const ZRegister& zt,
8029                               const PRegister& pg,
8030                               const SVEMemOperand& addr,
8031                               bool is_load,
8032                               bool is_signed,
8033                               bool is_first_fault);
8034 
8035   // E.g. st2b, st3h, ...
8036   void SVESt234Helper(int num_regs,
8037                       const ZRegister& zt1,
8038                       const PRegister& pg,
8039                       const SVEMemOperand& addr);
8040 
8041   // E.g. ld2b, ld3h, ...
8042   void SVELd234Helper(int num_regs,
8043                       const ZRegister& zt1,
8044                       const PRegisterZ& pg,
8045                       const SVEMemOperand& addr);
8046 
8047   // Common code for the helpers above.
8048   void SVELdSt234Helper(int num_regs,
8049                         const ZRegister& zt1,
8050                         const PRegister& pg,
8051                         const SVEMemOperand& addr,
8052                         Instr op);
8053 
8054   // E.g. ld1qb, ld1qh, ldnt1b, ...
8055   void SVELd1St1ScaImmHelper(const ZRegister& zt,
8056                              const PRegister& pg,
8057                              const SVEMemOperand& addr,
8058                              Instr regoffset_op,
8059                              Instr immoffset_op,
8060                              int imm_divisor = 1);
8061 
8062   void SVELd1VecScaHelper(const ZRegister& zt,
8063                           const PRegister& pg,
8064                           const SVEMemOperand& addr,
8065                           uint32_t msize,
8066                           bool is_signed);
8067   void SVESt1VecScaHelper(const ZRegister& zt,
8068                           const PRegister& pg,
8069                           const SVEMemOperand& addr,
8070                           uint32_t msize);
8071 
8072   void Prefetch(PrefetchOperation op,
8073                 const MemOperand& addr,
8074                 LoadStoreScalingOption option = PreferScaledOffset);
8075   void Prefetch(int op,
8076                 const MemOperand& addr,
8077                 LoadStoreScalingOption option = PreferScaledOffset);
8078 
8079   // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
8080   // reports a bogus uninitialised warning then.
8081   void Logical(const Register& rd,
8082                const Register& rn,
8083                const Operand operand,
8084                LogicalOp op);
8085 
8086   void SVELogicalImmediate(const ZRegister& zd, uint64_t imm, Instr op);
8087 
8088   void LogicalImmediate(const Register& rd,
8089                         const Register& rn,
8090                         unsigned n,
8091                         unsigned imm_s,
8092                         unsigned imm_r,
8093                         LogicalOp op);
8094 
8095   void ConditionalCompare(const Register& rn,
8096                           const Operand& operand,
8097                           StatusFlags nzcv,
8098                           Condition cond,
8099                           ConditionalCompareOp op);
8100 
8101   void AddSubWithCarry(const Register& rd,
8102                        const Register& rn,
8103                        const Operand& operand,
8104                        FlagsUpdate S,
8105                        AddSubWithCarryOp op);
8106 
8107   void CompareVectors(const PRegisterWithLaneSize& pd,
8108                       const PRegisterZ& pg,
8109                       const ZRegister& zn,
8110                       const ZRegister& zm,
8111                       SVEIntCompareVectorsOp op);
8112 
8113   void CompareVectors(const PRegisterWithLaneSize& pd,
8114                       const PRegisterZ& pg,
8115                       const ZRegister& zn,
8116                       int imm,
8117                       SVEIntCompareSignedImmOp op);
8118 
8119   void CompareVectors(const PRegisterWithLaneSize& pd,
8120                       const PRegisterZ& pg,
8121                       const ZRegister& zn,
8122                       unsigned imm,
8123                       SVEIntCompareUnsignedImmOp op);
8124 
8125   void SVEIntAddSubtractImmUnpredicatedHelper(
8126       SVEIntAddSubtractImm_UnpredicatedOp op,
8127       const ZRegister& zd,
8128       int imm8,
8129       int shift);
8130 
8131   void SVEElementCountToRegisterHelper(Instr op,
8132                                        const Register& rd,
8133                                        int pattern,
8134                                        int multiplier);
8135 
8136   Instr EncodeSVEShiftLeftImmediate(int shift, int lane_size_in_bits);
8137 
8138   Instr EncodeSVEShiftRightImmediate(int shift, int lane_size_in_bits);
8139 
8140   void SVEBitwiseShiftImmediate(const ZRegister& zd,
8141                                 const ZRegister& zn,
8142                                 Instr encoded_imm,
8143                                 Instr op);
8144 
8145   void SVEBitwiseShiftImmediatePred(const ZRegister& zdn,
8146                                     const PRegisterM& pg,
8147                                     Instr encoded_imm,
8148                                     Instr op);
8149 
8150   Instr SVEMulIndexHelper(unsigned lane_size_in_bytes_log2,
8151                           const ZRegister& zm,
8152                           int index,
8153                           Instr op_h,
8154                           Instr op_s,
8155                           Instr op_d);
8156 
8157   Instr SVEMulLongIndexHelper(const ZRegister& zm, int index);
8158 
8159   Instr SVEMulComplexIndexHelper(const ZRegister& zm, int index);
8160 
8161   void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop,
8162                                                    const PRegister& pg,
8163                                                    const SVEMemOperand& addr,
8164                                                    int prefetch_size);
8165 
8166   void SVEContiguousPrefetchScalarPlusVectorHelper(PrefetchOperation prfop,
8167                                                    const PRegister& pg,
8168                                                    const SVEMemOperand& addr,
8169                                                    int prefetch_size);
8170 
8171   void SVEGatherPrefetchVectorPlusImmediateHelper(PrefetchOperation prfop,
8172                                                   const PRegister& pg,
8173                                                   const SVEMemOperand& addr,
8174                                                   int prefetch_size);
8175 
8176   void SVEGatherPrefetchScalarPlusImmediateHelper(PrefetchOperation prfop,
8177                                                   const PRegister& pg,
8178                                                   const SVEMemOperand& addr,
8179                                                   int prefetch_size);
8180 
8181   void SVEPrefetchHelper(PrefetchOperation prfop,
8182                          const PRegister& pg,
8183                          const SVEMemOperand& addr,
8184                          int prefetch_size);
8185 
SVEImmPrefetchOperation(PrefetchOperation prfop)8186   static Instr SVEImmPrefetchOperation(PrefetchOperation prfop) {
8187     // SVE only supports PLD and PST, not PLI.
8188     VIXL_ASSERT(((prfop >= PLDL1KEEP) && (prfop <= PLDL3STRM)) ||
8189                 ((prfop >= PSTL1KEEP) && (prfop <= PSTL3STRM)));
8190     // Check that we can simply map bits.
8191     VIXL_STATIC_ASSERT(PLDL1KEEP == 0b00000);
8192     VIXL_STATIC_ASSERT(PSTL1KEEP == 0b10000);
8193     // Remaining operations map directly.
8194     return ((prfop & 0b10000) >> 1) | (prfop & 0b00111);
8195   }
8196 
8197   // Functions for emulating operands not directly supported by the instruction
8198   // set.
8199   void EmitShift(const Register& rd,
8200                  const Register& rn,
8201                  Shift shift,
8202                  unsigned amount);
8203   void EmitExtendShift(const Register& rd,
8204                        const Register& rn,
8205                        Extend extend,
8206                        unsigned left_shift);
8207 
8208   void AddSub(const Register& rd,
8209               const Register& rn,
8210               const Operand& operand,
8211               FlagsUpdate S,
8212               AddSubOp op);
8213 
8214   void NEONTable(const VRegister& vd,
8215                  const VRegister& vn,
8216                  const VRegister& vm,
8217                  NEONTableOp op);
8218 
8219   // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
8220   // registers. Only simple loads are supported; sign- and zero-extension (such
8221   // as in LDPSW_x or LDRB_w) are not supported.
8222   static LoadStoreOp LoadOpFor(const CPURegister& rt);
8223   static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
8224                                        const CPURegister& rt2);
8225   static LoadStoreOp StoreOpFor(const CPURegister& rt);
8226   static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
8227                                         const CPURegister& rt2);
8228   static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
8229       const CPURegister& rt, const CPURegister& rt2);
8230   static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
8231       const CPURegister& rt, const CPURegister& rt2);
8232   static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
8233 
8234   // Convenience pass-through for CPU feature checks.
8235   bool CPUHas(CPUFeatures::Feature feature0,
8236               CPUFeatures::Feature feature1 = CPUFeatures::kNone,
8237               CPUFeatures::Feature feature2 = CPUFeatures::kNone,
8238               CPUFeatures::Feature feature3 = CPUFeatures::kNone) const {
8239     return cpu_features_.Has(feature0, feature1, feature2, feature3);
8240   }
8241 
8242   // Determine whether the target CPU has the specified registers, based on the
8243   // currently-enabled CPU features. Presence of a register does not imply
8244   // support for arbitrary operations on it. For example, CPUs with FP have H
8245   // registers, but most half-precision operations require the FPHalf feature.
8246   //
8247   // These are used to check CPU features in loads and stores that have the same
8248   // entry point for both integer and FP registers.
8249   bool CPUHas(const CPURegister& rt) const;
8250   bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const;
8251 
8252   bool CPUHas(SystemRegister sysreg) const;
8253 
8254  private:
8255   static uint32_t FP16ToImm8(Float16 imm);
8256   static uint32_t FP32ToImm8(float imm);
8257   static uint32_t FP64ToImm8(double imm);
8258 
8259   // Instruction helpers.
8260   void MoveWide(const Register& rd,
8261                 uint64_t imm,
8262                 int shift,
8263                 MoveWideImmediateOp mov_op);
8264   void DataProcShiftedRegister(const Register& rd,
8265                                const Register& rn,
8266                                const Operand& operand,
8267                                FlagsUpdate S,
8268                                Instr op);
8269   void DataProcExtendedRegister(const Register& rd,
8270                                 const Register& rn,
8271                                 const Operand& operand,
8272                                 FlagsUpdate S,
8273                                 Instr op);
8274   void LoadStorePairNonTemporal(const CPURegister& rt,
8275                                 const CPURegister& rt2,
8276                                 const MemOperand& addr,
8277                                 LoadStorePairNonTemporalOp op);
8278   void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
8279   void ConditionalSelect(const Register& rd,
8280                          const Register& rn,
8281                          const Register& rm,
8282                          Condition cond,
8283                          ConditionalSelectOp op);
8284   void DataProcessing1Source(const Register& rd,
8285                              const Register& rn,
8286                              DataProcessing1SourceOp op);
8287   void DataProcessing3Source(const Register& rd,
8288                              const Register& rn,
8289                              const Register& rm,
8290                              const Register& ra,
8291                              DataProcessing3SourceOp op);
8292   void FPDataProcessing1Source(const VRegister& fd,
8293                                const VRegister& fn,
8294                                FPDataProcessing1SourceOp op);
8295   void FPDataProcessing3Source(const VRegister& fd,
8296                                const VRegister& fn,
8297                                const VRegister& fm,
8298                                const VRegister& fa,
8299                                FPDataProcessing3SourceOp op);
8300   void NEONAcrossLanesL(const VRegister& vd,
8301                         const VRegister& vn,
8302                         NEONAcrossLanesOp op);
8303   void NEONAcrossLanes(const VRegister& vd,
8304                        const VRegister& vn,
8305                        NEONAcrossLanesOp op,
8306                        Instr op_half);
8307   void NEONModifiedImmShiftLsl(const VRegister& vd,
8308                                const int imm8,
8309                                const int left_shift,
8310                                NEONModifiedImmediateOp op);
8311   void NEONModifiedImmShiftMsl(const VRegister& vd,
8312                                const int imm8,
8313                                const int shift_amount,
8314                                NEONModifiedImmediateOp op);
8315   void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop);
8316   void NEON3Same(const VRegister& vd,
8317                  const VRegister& vn,
8318                  const VRegister& vm,
8319                  NEON3SameOp vop);
8320   void NEON3SameFP16(const VRegister& vd,
8321                      const VRegister& vn,
8322                      const VRegister& vm,
8323                      Instr op);
8324   void NEONFP3Same(const VRegister& vd,
8325                    const VRegister& vn,
8326                    const VRegister& vm,
8327                    Instr op);
8328   void NEON3DifferentL(const VRegister& vd,
8329                        const VRegister& vn,
8330                        const VRegister& vm,
8331                        NEON3DifferentOp vop);
8332   void NEON3DifferentW(const VRegister& vd,
8333                        const VRegister& vn,
8334                        const VRegister& vm,
8335                        NEON3DifferentOp vop);
8336   void NEON3DifferentHN(const VRegister& vd,
8337                         const VRegister& vn,
8338                         const VRegister& vm,
8339                         NEON3DifferentOp vop);
8340   void NEONFP2RegMisc(const VRegister& vd,
8341                       const VRegister& vn,
8342                       NEON2RegMiscOp vop,
8343                       double value = 0.0);
8344   void NEONFP2RegMiscFP16(const VRegister& vd,
8345                           const VRegister& vn,
8346                           NEON2RegMiscFP16Op vop,
8347                           double value = 0.0);
8348   void NEON2RegMisc(const VRegister& vd,
8349                     const VRegister& vn,
8350                     NEON2RegMiscOp vop,
8351                     int value = 0);
8352   void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
8353   void NEONFP2RegMiscFP16(const VRegister& vd, const VRegister& vn, Instr op);
8354   void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
8355   void NEONPerm(const VRegister& vd,
8356                 const VRegister& vn,
8357                 const VRegister& vm,
8358                 NEONPermOp op);
8359   void NEONFPByElement(const VRegister& vd,
8360                        const VRegister& vn,
8361                        const VRegister& vm,
8362                        int vm_index,
8363                        NEONByIndexedElementOp op,
8364                        NEONByIndexedElementOp op_half);
8365   void NEONByElement(const VRegister& vd,
8366                      const VRegister& vn,
8367                      const VRegister& vm,
8368                      int vm_index,
8369                      NEONByIndexedElementOp op);
8370   void NEONByElementL(const VRegister& vd,
8371                       const VRegister& vn,
8372                       const VRegister& vm,
8373                       int vm_index,
8374                       NEONByIndexedElementOp op);
8375   void NEONShiftImmediate(const VRegister& vd,
8376                           const VRegister& vn,
8377                           NEONShiftImmediateOp op,
8378                           int immh_immb);
8379   void NEONShiftLeftImmediate(const VRegister& vd,
8380                               const VRegister& vn,
8381                               int shift,
8382                               NEONShiftImmediateOp op);
8383   void NEONShiftRightImmediate(const VRegister& vd,
8384                                const VRegister& vn,
8385                                int shift,
8386                                NEONShiftImmediateOp op);
8387   void NEONShiftImmediateL(const VRegister& vd,
8388                            const VRegister& vn,
8389                            int shift,
8390                            NEONShiftImmediateOp op);
8391   void NEONShiftImmediateN(const VRegister& vd,
8392                            const VRegister& vn,
8393                            int shift,
8394                            NEONShiftImmediateOp op);
8395   void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
8396 
8397   // If *shift is -1, find values of *imm8 and *shift such that IsInt8(*imm8)
8398   // and *shift is either 0 or 8. Otherwise, leave the values unchanged.
8399   void ResolveSVEImm8Shift(int* imm8, int* shift);
8400 
8401   Instr LoadStoreStructAddrModeField(const MemOperand& addr);
8402 
8403   // Encode the specified MemOperand for the specified access size and scaling
8404   // preference.
8405   Instr LoadStoreMemOperand(const MemOperand& addr,
8406                             unsigned access_size_in_bytes_log2,
8407                             LoadStoreScalingOption option);
8408 
8409   // Link the current (not-yet-emitted) instruction to the specified label, then
8410   // return an offset to be encoded in the instruction. If the label is not yet
8411   // bound, an offset of 0 is returned.
8412   ptrdiff_t LinkAndGetByteOffsetTo(Label* label);
8413   ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label);
8414   ptrdiff_t LinkAndGetPageOffsetTo(Label* label);
8415 
8416   // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
8417   template <int element_shift>
8418   ptrdiff_t LinkAndGetOffsetTo(Label* label);
8419 
8420   // Literal load offset are in words (32-bit).
8421   ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
8422 
8423   // Emit the instruction in buffer_.
Emit(Instr instruction)8424   void Emit(Instr instruction) {
8425     VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
8426     VIXL_ASSERT(AllowAssembler());
8427     GetBuffer()->Emit32(instruction);
8428   }
8429 
8430   PositionIndependentCodeOption pic_;
8431 
8432   CPUFeatures cpu_features_;
8433 };
8434 
8435 
8436 template <typename T>
UpdateValue(T new_value,const Assembler * assembler)8437 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) {
8438   return UpdateValue(new_value,
8439                      assembler->GetBuffer().GetStartAddress<uint8_t*>());
8440 }
8441 
8442 
8443 template <typename T>
UpdateValue(T high64,T low64,const Assembler * assembler)8444 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) {
8445   return UpdateValue(high64,
8446                      low64,
8447                      assembler->GetBuffer().GetStartAddress<uint8_t*>());
8448 }
8449 
8450 
8451 }  // namespace aarch64
8452 
8453 // Required InvalSet template specialisations.
8454 // TODO: These template specialisations should not live in this file.  Move
8455 // Label out of the aarch64 namespace in order to share its implementation
8456 // later.
8457 #define INVAL_SET_TEMPLATE_PARAMETERS                                \
8458   ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t,         \
8459       aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \
8460       aarch64::Label::kReclaimFactor
8461 template <>
GetKey(const ptrdiff_t & element)8462 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::GetKey(
8463     const ptrdiff_t& element) {
8464   return element;
8465 }
8466 template <>
SetKey(ptrdiff_t * element,ptrdiff_t key)8467 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(ptrdiff_t* element,
8468                                                             ptrdiff_t key) {
8469   *element = key;
8470 }
8471 #undef INVAL_SET_TEMPLATE_PARAMETERS
8472 
8473 }  // namespace vixl
8474 
8475 #endif  // VIXL_AARCH64_ASSEMBLER_AARCH64_H_
8476