• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_
28 #define VIXL_AARCH64_ASSEMBLER_AARCH64_H_
29 
30 #include "../assembler-base-vixl.h"
31 #include "../code-generation-scopes-vixl.h"
32 #include "../cpu-features.h"
33 #include "../globals-vixl.h"
34 #include "../invalset-vixl.h"
35 #include "../utils-vixl.h"
36 #include "operands-aarch64.h"
37 
38 namespace vixl {
39 namespace aarch64 {
40 
41 class LabelTestHelper;  // Forward declaration.
42 
43 
44 class Label {
45  public:
46 #ifndef PANDA_BUILD
Label()47   Label() : location_(kLocationUnbound) {}
48 #else
49   Label() = delete;
50   Label(AllocatorWrapper allocator) : links_(allocator), location_(kLocationUnbound) {}
51 #endif
~Label()52   virtual ~Label() {
53     // All links to a label must have been resolved before it is destructed.
54 #ifndef PANDA_BUILD
55     VIXL_ASSERT(!IsLinked());
56 #else
57     // Codegen may create unlinked labels
58 #endif
59   }
60 
IsBound()61   bool IsBound() const { return location_ >= 0; }
IsLinked()62   bool IsLinked() const { return !links_.empty(); }
63 
GetLocation()64   ptrdiff_t GetLocation() const { return location_; }
65   VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) {
66     return GetLocation();
67   }
68 
69   static const int kNPreallocatedLinks = 4;
70   static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
71   static const size_t kReclaimFrom = 512;
72   static const size_t kReclaimFactor = 2;
73 
74   typedef InvalSet<ptrdiff_t,
75                    kNPreallocatedLinks,
76                    ptrdiff_t,
77                    kInvalidLinkKey,
78                    kReclaimFrom,
79                    kReclaimFactor>
80       LinksSetBase;
81   typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
82 
83  private:
84   class LinksSet : public LinksSetBase {
85    public:
86 #ifndef PANDA_BUILD
LinksSet()87     LinksSet() : LinksSetBase() {}
88 #else
89     LinksSet() = delete;
90     LinksSet(AllocatorWrapper allocator) : LinksSetBase(allocator) {}
91 #endif
92   };
93 
94   // Allows iterating over the links of a label. The behaviour is undefined if
95   // the list of links is modified in any way while iterating.
96   class LabelLinksIterator : public LabelLinksIteratorBase {
97    public:
LabelLinksIterator(Label * label)98     explicit LabelLinksIterator(Label* label)
99         : LabelLinksIteratorBase(&label->links_) {}
100 
101     // TODO: Remove these and use the STL-like interface instead.
102     using LabelLinksIteratorBase::Advance;
103     using LabelLinksIteratorBase::Current;
104   };
105 
Bind(ptrdiff_t location)106   void Bind(ptrdiff_t location) {
107     // Labels can only be bound once.
108 #ifndef PANDA_BUILD
109     VIXL_ASSERT(!IsBound());
110 #else
111     // Disabled for unit-tests (it bind non-bound locs)
112 #endif
113     location_ = location;
114   }
115 
AddLink(ptrdiff_t instruction)116   void AddLink(ptrdiff_t instruction) {
117     // If a label is bound, the assembler already has the information it needs
118     // to write the instruction, so there is no need to add it to links_.
119     VIXL_ASSERT(!IsBound());
120     links_.insert(instruction);
121   }
122 
DeleteLink(ptrdiff_t instruction)123   void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); }
124 
ClearAllLinks()125   void ClearAllLinks() { links_.clear(); }
126 
127   // TODO: The comment below considers average case complexity for our
128   // usual use-cases. The elements of interest are:
129   // - Branches to a label are emitted in order: branch instructions to a label
130   // are generated at an offset in the code generation buffer greater than any
131   // other branch to that same label already generated. As an example, this can
132   // be broken when an instruction is patched to become a branch. Note that the
133   // code will still work, but the complexity considerations below may locally
134   // not apply any more.
135   // - Veneers are generated in order: for multiple branches of the same type
136   // branching to the same unbound label going out of range, veneers are
137   // generated in growing order of the branch instruction offset from the start
138   // of the buffer.
139   //
140   // When creating a veneer for a branch going out of range, the link for this
141   // branch needs to be removed from this `links_`. Since all branches are
142   // tracked in one underlying InvalSet, the complexity for this deletion is the
143   // same as for finding the element, ie. O(n), where n is the number of links
144   // in the set.
145   // This could be reduced to O(1) by using the same trick as used when tracking
146   // branch information for veneers: split the container to use one set per type
147   // of branch. With that setup, when a veneer is created and the link needs to
148   // be deleted, if the two points above hold, it must be the minimum element of
149   // the set for its type of branch, and that minimum element will be accessible
150   // in O(1).
151 
152   // The offsets of the instructions that have linked to this label.
153   LinksSet links_;
154   // The label location.
155   ptrdiff_t location_;
156 
157   static const ptrdiff_t kLocationUnbound = -1;
158 
159 // It is not safe to copy labels, so disable the copy constructor and operator
160 // by declaring them private (without an implementation).
161 #if __cplusplus >= 201103L
162   Label(const Label&) = delete;
163   void operator=(const Label&) = delete;
164 #else
165   Label(const Label&);
166   void operator=(const Label&);
167 #endif
168 
169   // The Assembler class is responsible for binding and linking labels, since
170   // the stored offsets need to be consistent with the Assembler's buffer.
171   friend class Assembler;
172   // The MacroAssembler and VeneerPool handle resolution of branches to distant
173   // targets.
174   friend class MacroAssembler;
175   friend class VeneerPool;
176 };
177 
178 class Assembler;
179 class LiteralPool;
180 
181 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
182 // stream and loaded through a pc relative load. The same literal can be
183 // referred to by multiple instructions but a literal can only reside at one
184 // place in memory. A literal can be used by a load before or after being
185 // placed in memory.
186 //
187 // Internally an offset of 0 is associated with a literal which has been
188 // neither used nor placed. Then two possibilities arise:
189 //  1) the label is placed, the offset (stored as offset + 1) is used to
190 //     resolve any subsequent load using the label.
191 //  2) the label is not placed and offset is the offset of the last load using
192 //     the literal (stored as -offset -1). If multiple loads refer to this
193 //     literal then the last load holds the offset of the preceding load and
194 //     all loads form a chain. Once the offset is placed all the loads in the
195 //     chain are resolved and future loads fall back to possibility 1.
196 class RawLiteral {
197  public:
198   enum DeletionPolicy {
199     kDeletedOnPlacementByPool,
200     kDeletedOnPoolDestruction,
201     kManuallyDeleted
202   };
203 
204   RawLiteral(size_t size,
205              LiteralPool* literal_pool,
206              DeletionPolicy deletion_policy = kManuallyDeleted);
207 
208   // The literal pool only sees and deletes `RawLiteral*` pointers, but they are
209   // actually pointing to `Literal<T>` objects.
~RawLiteral()210   virtual ~RawLiteral() {}
211 
GetSize()212   size_t GetSize() const {
213     VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
214     VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
215     VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) ||
216                 (size_ == kQRegSizeInBytes));
217     return size_;
218   }
219   VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); }
220 
GetRawValue128Low64()221   uint64_t GetRawValue128Low64() const {
222     VIXL_ASSERT(size_ == kQRegSizeInBytes);
223     return low64_;
224   }
225   VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) {
226     return GetRawValue128Low64();
227   }
228 
GetRawValue128High64()229   uint64_t GetRawValue128High64() const {
230     VIXL_ASSERT(size_ == kQRegSizeInBytes);
231     return high64_;
232   }
233   VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) {
234     return GetRawValue128High64();
235   }
236 
GetRawValue64()237   uint64_t GetRawValue64() const {
238     VIXL_ASSERT(size_ == kXRegSizeInBytes);
239     VIXL_ASSERT(high64_ == 0);
240     return low64_;
241   }
242   VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) {
243     return GetRawValue64();
244   }
245 
GetRawValue32()246   uint32_t GetRawValue32() const {
247     VIXL_ASSERT(size_ == kWRegSizeInBytes);
248     VIXL_ASSERT(high64_ == 0);
249     VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_));
250     return static_cast<uint32_t>(low64_);
251   }
252   VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) {
253     return GetRawValue32();
254   }
255 
IsUsed()256   bool IsUsed() const { return offset_ < 0; }
IsPlaced()257   bool IsPlaced() const { return offset_ > 0; }
258 
GetLiteralPool()259   LiteralPool* GetLiteralPool() const { return literal_pool_; }
260 
GetOffset()261   ptrdiff_t GetOffset() const {
262     VIXL_ASSERT(IsPlaced());
263     return offset_ - 1;
264   }
265   VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); }
266 
267  protected:
SetOffset(ptrdiff_t offset)268   void SetOffset(ptrdiff_t offset) {
269     VIXL_ASSERT(offset >= 0);
270     VIXL_ASSERT(IsWordAligned(offset));
271     VIXL_ASSERT(!IsPlaced());
272     offset_ = offset + 1;
273   }
set_offset(ptrdiff_t offset)274   VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) {
275     SetOffset(offset);
276   }
277 
GetLastUse()278   ptrdiff_t GetLastUse() const {
279     VIXL_ASSERT(IsUsed());
280     return -offset_ - 1;
281   }
282   VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); }
283 
SetLastUse(ptrdiff_t offset)284   void SetLastUse(ptrdiff_t offset) {
285     VIXL_ASSERT(offset >= 0);
286     VIXL_ASSERT(IsWordAligned(offset));
287     VIXL_ASSERT(!IsPlaced());
288     offset_ = -offset - 1;
289   }
set_last_use(ptrdiff_t offset)290   VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) {
291     SetLastUse(offset);
292   }
293 
294   size_t size_;
295   ptrdiff_t offset_;
296   uint64_t low64_;
297   uint64_t high64_;
298 
299  private:
300   LiteralPool* literal_pool_;
301   DeletionPolicy deletion_policy_;
302 
303   friend class Assembler;
304   friend class LiteralPool;
305 };
306 
307 
308 template <typename T>
309 class Literal : public RawLiteral {
310  public:
311   explicit Literal(T value,
312                    LiteralPool* literal_pool = NULL,
313                    RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(sizeof (value),literal_pool,ownership)314       : RawLiteral(sizeof(value), literal_pool, ownership) {
315     VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes);
316     UpdateValue(value);
317   }
318 
319   Literal(T high64,
320           T low64,
321           LiteralPool* literal_pool = NULL,
322           RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(kQRegSizeInBytes,literal_pool,ownership)323       : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) {
324     VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2));
325     UpdateValue(high64, low64);
326   }
327 
~Literal()328   virtual ~Literal() {}
329 
330   // Update the value of this literal, if necessary by rewriting the value in
331   // the pool.
332   // If the literal has already been placed in a literal pool, the address of
333   // the start of the code buffer must be provided, as the literal only knows it
334   // offset from there. This also allows patching the value after the code has
335   // been moved in memory.
336   void UpdateValue(T new_value, uint8_t* code_buffer = NULL) {
337     VIXL_ASSERT(sizeof(new_value) == size_);
338     memcpy(&low64_, &new_value, sizeof(new_value));
339     if (IsPlaced()) {
340       VIXL_ASSERT(code_buffer != NULL);
341       RewriteValueInCode(code_buffer);
342     }
343   }
344 
345   void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) {
346     VIXL_ASSERT(sizeof(low64) == size_ / 2);
347     memcpy(&low64_, &low64, sizeof(low64));
348     memcpy(&high64_, &high64, sizeof(high64));
349     if (IsPlaced()) {
350       VIXL_ASSERT(code_buffer != NULL);
351       RewriteValueInCode(code_buffer);
352     }
353   }
354 
355   void UpdateValue(T new_value, const Assembler* assembler);
356   void UpdateValue(T high64, T low64, const Assembler* assembler);
357 
358  private:
RewriteValueInCode(uint8_t * code_buffer)359   void RewriteValueInCode(uint8_t* code_buffer) {
360     VIXL_ASSERT(IsPlaced());
361     VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
362     switch (GetSize()) {
363       case kSRegSizeInBytes:
364         *reinterpret_cast<uint32_t*>(code_buffer + GetOffset()) =
365             GetRawValue32();
366         break;
367       case kDRegSizeInBytes:
368         *reinterpret_cast<uint64_t*>(code_buffer + GetOffset()) =
369             GetRawValue64();
370         break;
371       default:
372         VIXL_ASSERT(GetSize() == kQRegSizeInBytes);
373         uint64_t* base_address =
374             reinterpret_cast<uint64_t*>(code_buffer + GetOffset());
375         *base_address = GetRawValue128Low64();
376         *(base_address + 1) = GetRawValue128High64();
377     }
378   }
379 };
380 
381 
382 // Control whether or not position-independent code should be emitted.
383 enum PositionIndependentCodeOption {
384   // All code generated will be position-independent; all branches and
385   // references to labels generated with the Label class will use PC-relative
386   // addressing.
387   PositionIndependentCode,
388 
389   // Allow VIXL to generate code that refers to absolute addresses. With this
390   // option, it will not be possible to copy the code buffer and run it from a
391   // different address; code must be generated in its final location.
392   PositionDependentCode,
393 
394   // Allow VIXL to assume that the bottom 12 bits of the address will be
395   // constant, but that the top 48 bits may change. This allows `adrp` to
396   // function in systems which copy code between pages, but otherwise maintain
397   // 4KB page alignment.
398   PageOffsetDependentCode
399 };
400 
401 
402 // Control how scaled- and unscaled-offset loads and stores are generated.
403 enum LoadStoreScalingOption {
404   // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
405   // register-offset, pre-index or post-index instructions if necessary.
406   PreferScaledOffset,
407 
408   // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
409   // register-offset, pre-index or post-index instructions if necessary.
410   PreferUnscaledOffset,
411 
412   // Require scaled-immediate-offset instructions.
413   RequireScaledOffset,
414 
415   // Require unscaled-immediate-offset instructions.
416   RequireUnscaledOffset
417 };
418 
419 
420 // Assembler.
421 class Assembler : public vixl::internal::AssemblerBase {
422  public:
423   explicit Assembler(
424       PositionIndependentCodeOption pic = PositionIndependentCode)
pic_(pic)425       : pic_(pic), cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
426 
427 #ifdef PANDA_BUILD
428   explicit Assembler(
429       size_t capacity,
430       PositionIndependentCodeOption pic = PositionIndependentCode) = delete;
431 #else
432   explicit Assembler(
433       size_t capacity,
434       PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(capacity)435       : AssemblerBase(capacity),
436         pic_(pic),
437         cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
438 #endif
439   Assembler(byte* buffer,
440             size_t capacity,
441             PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(buffer,capacity)442       : AssemblerBase(buffer, capacity),
443         pic_(pic),
444         cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {}
445 
446   // Upon destruction, the code will assert that one of the following is true:
447   //  * The Assembler object has not been used.
448   //  * Nothing has been emitted since the last Reset() call.
449   //  * Nothing has been emitted since the last FinalizeCode() call.
~Assembler()450   ~Assembler() {}
451 
452   // System functions.
453 
454   // Start generating code from the beginning of the buffer, discarding any code
455   // and data that has already been emitted into the buffer.
456   void Reset();
457 
458   // Bind a label to the current PC.
459   void bind(Label* label);
460 
461   // Bind a label to a specified offset from the start of the buffer.
462   void BindToOffset(Label* label, ptrdiff_t offset);
463 
464   // Place a literal at the current PC.
465   void place(RawLiteral* literal);
466 
467   VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) {
468     return GetCursorOffset();
469   }
470 
471   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
472                   ptrdiff_t GetBufferEndOffset() const) {
473     return static_cast<ptrdiff_t>(GetBuffer().GetCapacity());
474   }
475   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
476                   ptrdiff_t BufferEndOffset() const) {
477     return GetBuffer().GetCapacity();
478   }
479 
480   // Return the address of a bound label.
481   template <typename T>
GetLabelAddress(const Label * label)482   T GetLabelAddress(const Label* label) const {
483     VIXL_ASSERT(label->IsBound());
484     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
485     return GetBuffer().GetOffsetAddress<T>(label->GetLocation());
486   }
487 
GetInstructionAt(ptrdiff_t instruction_offset)488   Instruction* GetInstructionAt(ptrdiff_t instruction_offset) {
489     return GetBuffer()->GetOffsetAddress<Instruction*>(instruction_offset);
490   }
491   VIXL_DEPRECATED("GetInstructionAt",
492                   Instruction* InstructionAt(ptrdiff_t instruction_offset)) {
493     return GetInstructionAt(instruction_offset);
494   }
495 
GetInstructionOffset(Instruction * instruction)496   ptrdiff_t GetInstructionOffset(Instruction* instruction) {
497     VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
498     ptrdiff_t offset =
499         instruction - GetBuffer()->GetStartAddress<Instruction*>();
500     VIXL_ASSERT((0 <= offset) &&
501                 (offset < static_cast<ptrdiff_t>(GetBuffer()->GetCapacity())));
502     return offset;
503   }
504   VIXL_DEPRECATED("GetInstructionOffset",
505                   ptrdiff_t InstructionOffset(Instruction* instruction)) {
506     return GetInstructionOffset(instruction);
507   }
508 
509   // Instruction set functions.
510 
511   // Branch / Jump instructions.
512 
513   // Branch to register.
514   void br(const Register& xn);
515 
516   // Branch with link to register.
517   void blr(const Register& xn);
518 
519   // Branch to register with return hint.
520   void ret(const Register& xn = lr);
521 
522   // Branch to register, with pointer authentication. Using key A and a modifier
523   // of zero [Armv8.3].
524   void braaz(const Register& xn);
525 
526   // Branch to register, with pointer authentication. Using key B and a modifier
527   // of zero [Armv8.3].
528   void brabz(const Register& xn);
529 
530   // Branch with link to register, with pointer authentication. Using key A and
531   // a modifier of zero [Armv8.3].
532   void blraaz(const Register& xn);
533 
534   // Branch with link to register, with pointer authentication. Using key B and
535   // a modifier of zero [Armv8.3].
536   void blrabz(const Register& xn);
537 
538   // Return from subroutine, with pointer authentication. Using key A [Armv8.3].
539   void retaa();
540 
541   // Return from subroutine, with pointer authentication. Using key B [Armv8.3].
542   void retab();
543 
544   // Branch to register, with pointer authentication. Using key A [Armv8.3].
545   void braa(const Register& xn, const Register& xm);
546 
547   // Branch to register, with pointer authentication. Using key B [Armv8.3].
548   void brab(const Register& xn, const Register& xm);
549 
550   // Branch with link to register, with pointer authentication. Using key A
551   // [Armv8.3].
552   void blraa(const Register& xn, const Register& xm);
553 
554   // Branch with link to register, with pointer authentication. Using key B
555   // [Armv8.3].
556   void blrab(const Register& xn, const Register& xm);
557 
558   // Unconditional branch to label.
559   void b(Label* label);
560 
561   // Conditional branch to label.
562   void b(Label* label, Condition cond);
563 
564   // Unconditional branch to PC offset.
565   void b(int64_t imm26);
566 
567   // Conditional branch to PC offset.
568   void b(int64_t imm19, Condition cond);
569 
570   // Branch with link to label.
571   void bl(Label* label);
572 
573   // Branch with link to PC offset.
574   void bl(int64_t imm26);
575 
576   // Compare and branch to label if zero.
577   void cbz(const Register& rt, Label* label);
578 
579   // Compare and branch to PC offset if zero.
580   void cbz(const Register& rt, int64_t imm19);
581 
582   // Compare and branch to label if not zero.
583   void cbnz(const Register& rt, Label* label);
584 
585   // Compare and branch to PC offset if not zero.
586   void cbnz(const Register& rt, int64_t imm19);
587 
588   // Table lookup from one register.
589   void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
590 
591   // Table lookup from two registers.
592   void tbl(const VRegister& vd,
593            const VRegister& vn,
594            const VRegister& vn2,
595            const VRegister& vm);
596 
597   // Table lookup from three registers.
598   void tbl(const VRegister& vd,
599            const VRegister& vn,
600            const VRegister& vn2,
601            const VRegister& vn3,
602            const VRegister& vm);
603 
604   // Table lookup from four registers.
605   void tbl(const VRegister& vd,
606            const VRegister& vn,
607            const VRegister& vn2,
608            const VRegister& vn3,
609            const VRegister& vn4,
610            const VRegister& vm);
611 
612   // Table lookup extension from one register.
613   void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
614 
615   // Table lookup extension from two registers.
616   void tbx(const VRegister& vd,
617            const VRegister& vn,
618            const VRegister& vn2,
619            const VRegister& vm);
620 
621   // Table lookup extension from three registers.
622   void tbx(const VRegister& vd,
623            const VRegister& vn,
624            const VRegister& vn2,
625            const VRegister& vn3,
626            const VRegister& vm);
627 
628   // Table lookup extension from four registers.
629   void tbx(const VRegister& vd,
630            const VRegister& vn,
631            const VRegister& vn2,
632            const VRegister& vn3,
633            const VRegister& vn4,
634            const VRegister& vm);
635 
636   // Test bit and branch to label if zero.
637   void tbz(const Register& rt, unsigned bit_pos, Label* label);
638 
639   // Test bit and branch to PC offset if zero.
640   void tbz(const Register& rt, unsigned bit_pos, int64_t imm14);
641 
642   // Test bit and branch to label if not zero.
643   void tbnz(const Register& rt, unsigned bit_pos, Label* label);
644 
645   // Test bit and branch to PC offset if not zero.
646   void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14);
647 
648   // Address calculation instructions.
649   // Calculate a PC-relative address. Unlike for branches the offset in adr is
650   // unscaled (i.e. the result can be unaligned).
651 
652   // Calculate the address of a label.
653   void adr(const Register& xd, Label* label);
654 
655   // Calculate the address of a PC offset.
656   void adr(const Register& xd, int64_t imm21);
657 
658   // Calculate the page address of a label.
659   void adrp(const Register& xd, Label* label);
660 
661   // Calculate the page address of a PC offset.
662   void adrp(const Register& xd, int64_t imm21);
663 
664   // Data Processing instructions.
665 
666   // Add.
667   void add(const Register& rd, const Register& rn, const Operand& operand);
668 
669   // Add and update status flags.
670   void adds(const Register& rd, const Register& rn, const Operand& operand);
671 
672   // Compare negative.
673   void cmn(const Register& rn, const Operand& operand);
674 
675   // Subtract.
676   void sub(const Register& rd, const Register& rn, const Operand& operand);
677 
678   // Subtract and update status flags.
679   void subs(const Register& rd, const Register& rn, const Operand& operand);
680 
681   // Compare.
682   void cmp(const Register& rn, const Operand& operand);
683 
684   // Negate.
685   void neg(const Register& rd, const Operand& operand);
686 
687   // Negate and update status flags.
688   void negs(const Register& rd, const Operand& operand);
689 
690   // Add with carry bit.
691   void adc(const Register& rd, const Register& rn, const Operand& operand);
692 
693   // Add with carry bit and update status flags.
694   void adcs(const Register& rd, const Register& rn, const Operand& operand);
695 
696   // Subtract with carry bit.
697   void sbc(const Register& rd, const Register& rn, const Operand& operand);
698 
699   // Subtract with carry bit and update status flags.
700   void sbcs(const Register& rd, const Register& rn, const Operand& operand);
701 
702   // Rotate register right and insert into NZCV flags under the control of a
703   // mask [Armv8.4].
704   void rmif(const Register& xn, unsigned rotation, StatusFlags flags);
705 
706   // Set NZCV flags from register, treated as an 8-bit value [Armv8.4].
707   void setf8(const Register& rn);
708 
709   // Set NZCV flags from register, treated as an 16-bit value [Armv8.4].
710   void setf16(const Register& rn);
711 
712   // Negate with carry bit.
713   void ngc(const Register& rd, const Operand& operand);
714 
715   // Negate with carry bit and update status flags.
716   void ngcs(const Register& rd, const Operand& operand);
717 
718   // Logical instructions.
719 
720   // Bitwise and (A & B).
721   void and_(const Register& rd, const Register& rn, const Operand& operand);
722 
723   // Bitwise and (A & B) and update status flags.
724   void ands(const Register& rd, const Register& rn, const Operand& operand);
725 
726   // Bit test and set flags.
727   void tst(const Register& rn, const Operand& operand);
728 
729   // Bit clear (A & ~B).
730   void bic(const Register& rd, const Register& rn, const Operand& operand);
731 
732   // Bit clear (A & ~B) and update status flags.
733   void bics(const Register& rd, const Register& rn, const Operand& operand);
734 
735   // Bitwise or (A | B).
736   void orr(const Register& rd, const Register& rn, const Operand& operand);
737 
738   // Bitwise nor (A | ~B).
739   void orn(const Register& rd, const Register& rn, const Operand& operand);
740 
741   // Bitwise eor/xor (A ^ B).
742   void eor(const Register& rd, const Register& rn, const Operand& operand);
743 
744   // Bitwise enor/xnor (A ^ ~B).
745   void eon(const Register& rd, const Register& rn, const Operand& operand);
746 
747   // Logical shift left by variable.
748   void lslv(const Register& rd, const Register& rn, const Register& rm);
749 
750   // Logical shift right by variable.
751   void lsrv(const Register& rd, const Register& rn, const Register& rm);
752 
753   // Arithmetic shift right by variable.
754   void asrv(const Register& rd, const Register& rn, const Register& rm);
755 
756   // Rotate right by variable.
757   void rorv(const Register& rd, const Register& rn, const Register& rm);
758 
759   // Bitfield instructions.
760 
761   // Bitfield move.
762   void bfm(const Register& rd,
763            const Register& rn,
764            unsigned immr,
765            unsigned imms);
766 
767   // Signed bitfield move.
768   void sbfm(const Register& rd,
769             const Register& rn,
770             unsigned immr,
771             unsigned imms);
772 
773   // Unsigned bitfield move.
774   void ubfm(const Register& rd,
775             const Register& rn,
776             unsigned immr,
777             unsigned imms);
778 
779   // Bfm aliases.
780 
781   // Bitfield insert.
bfi(const Register & rd,const Register & rn,unsigned lsb,unsigned width)782   void bfi(const Register& rd,
783            const Register& rn,
784            unsigned lsb,
785            unsigned width) {
786     VIXL_ASSERT(width >= 1);
787     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
788     bfm(rd,
789         rn,
790         (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
791         width - 1);
792   }
793 
794   // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,unsigned lsb,unsigned width)795   void bfxil(const Register& rd,
796              const Register& rn,
797              unsigned lsb,
798              unsigned width) {
799     VIXL_ASSERT(width >= 1);
800     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
801     bfm(rd, rn, lsb, lsb + width - 1);
802   }
803 
804   // Bitfield clear [Armv8.2].
bfc(const Register & rd,unsigned lsb,unsigned width)805   void bfc(const Register& rd, unsigned lsb, unsigned width) {
806     bfi(rd, AppropriateZeroRegFor(rd), lsb, width);
807   }
808 
809   // Sbfm aliases.
810 
811   // Arithmetic shift right.
asr(const Register & rd,const Register & rn,unsigned shift)812   void asr(const Register& rd, const Register& rn, unsigned shift) {
813     VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
814     sbfm(rd, rn, shift, rd.GetSizeInBits() - 1);
815   }
816 
817   // Signed bitfield insert with zero at right.
sbfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)818   void sbfiz(const Register& rd,
819              const Register& rn,
820              unsigned lsb,
821              unsigned width) {
822     VIXL_ASSERT(width >= 1);
823     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
824     sbfm(rd,
825          rn,
826          (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
827          width - 1);
828   }
829 
830   // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)831   void sbfx(const Register& rd,
832             const Register& rn,
833             unsigned lsb,
834             unsigned width) {
835     VIXL_ASSERT(width >= 1);
836     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
837     sbfm(rd, rn, lsb, lsb + width - 1);
838   }
839 
840   // Signed extend byte.
sxtb(const Register & rd,const Register & rn)841   void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); }
842 
843   // Signed extend halfword.
sxth(const Register & rd,const Register & rn)844   void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); }
845 
846   // Signed extend word.
sxtw(const Register & rd,const Register & rn)847   void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); }
848 
849   // Ubfm aliases.
850 
851   // Logical shift left.
lsl(const Register & rd,const Register & rn,unsigned shift)852   void lsl(const Register& rd, const Register& rn, unsigned shift) {
853     unsigned reg_size = rd.GetSizeInBits();
854     VIXL_ASSERT(shift < reg_size);
855     // NOLINTNEXTLINE(clang-analyzer-core.DivideZero)
856     ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
857   }
858 
859   // Logical shift right.
lsr(const Register & rd,const Register & rn,unsigned shift)860   void lsr(const Register& rd, const Register& rn, unsigned shift) {
861     VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
862     ubfm(rd, rn, shift, rd.GetSizeInBits() - 1);
863   }
864 
865   // Unsigned bitfield insert with zero at right.
ubfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)866   void ubfiz(const Register& rd,
867              const Register& rn,
868              unsigned lsb,
869              unsigned width) {
870     VIXL_ASSERT(width >= 1);
871     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
872     ubfm(rd,
873          rn,
874          (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
875          width - 1);
876   }
877 
878   // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)879   void ubfx(const Register& rd,
880             const Register& rn,
881             unsigned lsb,
882             unsigned width) {
883     VIXL_ASSERT(width >= 1);
884     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
885     ubfm(rd, rn, lsb, lsb + width - 1);
886   }
887 
888   // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)889   void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); }
890 
891   // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)892   void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); }
893 
894   // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)895   void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); }
896 
897   // Extract.
898   void extr(const Register& rd,
899             const Register& rn,
900             const Register& rm,
901             unsigned lsb);
902 
903   // Conditional select: rd = cond ? rn : rm.
904   void csel(const Register& rd,
905             const Register& rn,
906             const Register& rm,
907             Condition cond);
908 
909   // Conditional select increment: rd = cond ? rn : rm + 1.
910   void csinc(const Register& rd,
911              const Register& rn,
912              const Register& rm,
913              Condition cond);
914 
915   // Conditional select inversion: rd = cond ? rn : ~rm.
916   void csinv(const Register& rd,
917              const Register& rn,
918              const Register& rm,
919              Condition cond);
920 
921   // Conditional select negation: rd = cond ? rn : -rm.
922   void csneg(const Register& rd,
923              const Register& rn,
924              const Register& rm,
925              Condition cond);
926 
927   // Conditional set: rd = cond ? 1 : 0.
928   void cset(const Register& rd, Condition cond);
929 
930   // Conditional set mask: rd = cond ? -1 : 0.
931   void csetm(const Register& rd, Condition cond);
932 
933   // Conditional increment: rd = cond ? rn + 1 : rn.
934   void cinc(const Register& rd, const Register& rn, Condition cond);
935 
936   // Conditional invert: rd = cond ? ~rn : rn.
937   void cinv(const Register& rd, const Register& rn, Condition cond);
938 
939   // Conditional negate: rd = cond ? -rn : rn.
940   void cneg(const Register& rd, const Register& rn, Condition cond);
941 
942   // Rotate right.
ror(const Register & rd,const Register & rs,unsigned shift)943   void ror(const Register& rd, const Register& rs, unsigned shift) {
944     extr(rd, rs, rs, shift);
945   }
946 
947   // Conditional comparison.
948 
949   // Conditional compare negative.
950   void ccmn(const Register& rn,
951             const Operand& operand,
952             StatusFlags nzcv,
953             Condition cond);
954 
955   // Conditional compare.
956   void ccmp(const Register& rn,
957             const Operand& operand,
958             StatusFlags nzcv,
959             Condition cond);
960 
961   // CRC-32 checksum from byte.
962   void crc32b(const Register& wd, const Register& wn, const Register& wm);
963 
964   // CRC-32 checksum from half-word.
965   void crc32h(const Register& wd, const Register& wn, const Register& wm);
966 
967   // CRC-32 checksum from word.
968   void crc32w(const Register& wd, const Register& wn, const Register& wm);
969 
970   // CRC-32 checksum from double word.
971   void crc32x(const Register& wd, const Register& wn, const Register& xm);
972 
973   // CRC-32 C checksum from byte.
974   void crc32cb(const Register& wd, const Register& wn, const Register& wm);
975 
976   // CRC-32 C checksum from half-word.
977   void crc32ch(const Register& wd, const Register& wn, const Register& wm);
978 
979   // CRC-32 C checksum from word.
980   void crc32cw(const Register& wd, const Register& wn, const Register& wm);
981 
982   // CRC-32C checksum from double word.
983   void crc32cx(const Register& wd, const Register& wn, const Register& xm);
984 
985   // Multiply.
986   void mul(const Register& rd, const Register& rn, const Register& rm);
987 
988   // Negated multiply.
989   void mneg(const Register& rd, const Register& rn, const Register& rm);
990 
991   // Signed long multiply: 32 x 32 -> 64-bit.
992   void smull(const Register& xd, const Register& wn, const Register& wm);
993 
994   // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
995   void smulh(const Register& xd, const Register& xn, const Register& xm);
996 
997   // Multiply and accumulate.
998   void madd(const Register& rd,
999             const Register& rn,
1000             const Register& rm,
1001             const Register& ra);
1002 
1003   // Multiply and subtract.
1004   void msub(const Register& rd,
1005             const Register& rn,
1006             const Register& rm,
1007             const Register& ra);
1008 
1009   // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1010   void smaddl(const Register& xd,
1011               const Register& wn,
1012               const Register& wm,
1013               const Register& xa);
1014 
1015   // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1016   void umaddl(const Register& xd,
1017               const Register& wn,
1018               const Register& wm,
1019               const Register& xa);
1020 
1021   // Unsigned long multiply: 32 x 32 -> 64-bit.
umull(const Register & xd,const Register & wn,const Register & wm)1022   void umull(const Register& xd, const Register& wn, const Register& wm) {
1023     umaddl(xd, wn, wm, xzr);
1024   }
1025 
1026   // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
1027   void umulh(const Register& xd, const Register& xn, const Register& xm);
1028 
1029   // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1030   void smsubl(const Register& xd,
1031               const Register& wn,
1032               const Register& wm,
1033               const Register& xa);
1034 
1035   // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1036   void umsubl(const Register& xd,
1037               const Register& wn,
1038               const Register& wm,
1039               const Register& xa);
1040 
1041   // Signed integer divide.
1042   void sdiv(const Register& rd, const Register& rn, const Register& rm);
1043 
1044   // Unsigned integer divide.
1045   void udiv(const Register& rd, const Register& rn, const Register& rm);
1046 
1047   // Bit reverse.
1048   void rbit(const Register& rd, const Register& rn);
1049 
1050   // Reverse bytes in 16-bit half words.
1051   void rev16(const Register& rd, const Register& rn);
1052 
1053   // Reverse bytes in 32-bit words.
1054   void rev32(const Register& xd, const Register& xn);
1055 
1056   // Reverse bytes in 64-bit general purpose register, an alias for rev
1057   // [Armv8.2].
rev64(const Register & xd,const Register & xn)1058   void rev64(const Register& xd, const Register& xn) {
1059     VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits());
1060     rev(xd, xn);
1061   }
1062 
1063   // Reverse bytes.
1064   void rev(const Register& rd, const Register& rn);
1065 
1066   // Count leading zeroes.
1067   void clz(const Register& rd, const Register& rn);
1068 
1069   // Count leading sign bits.
1070   void cls(const Register& rd, const Register& rn);
1071 
1072   // Pointer Authentication Code for Instruction address, using key A [Armv8.3].
1073   void pacia(const Register& xd, const Register& rn);
1074 
1075   // Pointer Authentication Code for Instruction address, using key A and a
1076   // modifier of zero [Armv8.3].
1077   void paciza(const Register& xd);
1078 
1079   // Pointer Authentication Code for Instruction address, using key A, with
1080   // address in x17 and modifier in x16 [Armv8.3].
1081   void pacia1716();
1082 
1083   // Pointer Authentication Code for Instruction address, using key A, with
1084   // address in LR and modifier in SP [Armv8.3].
1085   void paciasp();
1086 
1087   // Pointer Authentication Code for Instruction address, using key A, with
1088   // address in LR and a modifier of zero [Armv8.3].
1089   void paciaz();
1090 
1091   // Pointer Authentication Code for Instruction address, using key B [Armv8.3].
1092   void pacib(const Register& xd, const Register& xn);
1093 
1094   // Pointer Authentication Code for Instruction address, using key B and a
1095   // modifier of zero [Armv8.3].
1096   void pacizb(const Register& xd);
1097 
1098   // Pointer Authentication Code for Instruction address, using key B, with
1099   // address in x17 and modifier in x16 [Armv8.3].
1100   void pacib1716();
1101 
1102   // Pointer Authentication Code for Instruction address, using key B, with
1103   // address in LR and modifier in SP [Armv8.3].
1104   void pacibsp();
1105 
1106   // Pointer Authentication Code for Instruction address, using key B, with
1107   // address in LR and a modifier of zero [Armv8.3].
1108   void pacibz();
1109 
1110   // Pointer Authentication Code for Data address, using key A [Armv8.3].
1111   void pacda(const Register& xd, const Register& xn);
1112 
1113   // Pointer Authentication Code for Data address, using key A and a modifier of
1114   // zero [Armv8.3].
1115   void pacdza(const Register& xd);
1116 
1117   // Pointer Authentication Code for Data address, using key B [Armv8.3].
1118   void pacdb(const Register& xd, const Register& xn);
1119 
1120   // Pointer Authentication Code for Data address, using key B and a modifier of
1121   // zero [Armv8.3].
1122   void pacdzb(const Register& xd);
1123 
1124   // Pointer Authentication Code, using Generic key [Armv8.3].
1125   void pacga(const Register& xd, const Register& xn, const Register& xm);
1126 
1127   // Authenticate Instruction address, using key A [Armv8.3].
1128   void autia(const Register& xd, const Register& xn);
1129 
1130   // Authenticate Instruction address, using key A and a modifier of zero
1131   // [Armv8.3].
1132   void autiza(const Register& xd);
1133 
1134   // Authenticate Instruction address, using key A, with address in x17 and
1135   // modifier in x16 [Armv8.3].
1136   void autia1716();
1137 
1138   // Authenticate Instruction address, using key A, with address in LR and
1139   // modifier in SP [Armv8.3].
1140   void autiasp();
1141 
1142   // Authenticate Instruction address, using key A, with address in LR and a
1143   // modifier of zero [Armv8.3].
1144   void autiaz();
1145 
1146   // Authenticate Instruction address, using key B [Armv8.3].
1147   void autib(const Register& xd, const Register& xn);
1148 
1149   // Authenticate Instruction address, using key B and a modifier of zero
1150   // [Armv8.3].
1151   void autizb(const Register& xd);
1152 
1153   // Authenticate Instruction address, using key B, with address in x17 and
1154   // modifier in x16 [Armv8.3].
1155   void autib1716();
1156 
1157   // Authenticate Instruction address, using key B, with address in LR and
1158   // modifier in SP [Armv8.3].
1159   void autibsp();
1160 
1161   // Authenticate Instruction address, using key B, with address in LR and a
1162   // modifier of zero [Armv8.3].
1163   void autibz();
1164 
1165   // Authenticate Data address, using key A [Armv8.3].
1166   void autda(const Register& xd, const Register& xn);
1167 
1168   // Authenticate Data address, using key A and a modifier of zero [Armv8.3].
1169   void autdza(const Register& xd);
1170 
1171   // Authenticate Data address, using key B [Armv8.3].
1172   void autdb(const Register& xd, const Register& xn);
1173 
1174   // Authenticate Data address, using key B and a modifier of zero [Armv8.3].
1175   void autdzb(const Register& xd);
1176 
1177   // Strip Pointer Authentication Code of Data address [Armv8.3].
1178   void xpacd(const Register& xd);
1179 
1180   // Strip Pointer Authentication Code of Instruction address [Armv8.3].
1181   void xpaci(const Register& xd);
1182 
1183   // Strip Pointer Authentication Code of Instruction address in LR [Armv8.3].
1184   void xpaclri();
1185 
1186   // Memory instructions.
1187 
1188   // Load integer or FP register.
1189   void ldr(const CPURegister& rt,
1190            const MemOperand& src,
1191            LoadStoreScalingOption option = PreferScaledOffset);
1192 
1193   // Store integer or FP register.
1194   void str(const CPURegister& rt,
1195            const MemOperand& dst,
1196            LoadStoreScalingOption option = PreferScaledOffset);
1197 
1198   // Load word with sign extension.
1199   void ldrsw(const Register& xt,
1200              const MemOperand& src,
1201              LoadStoreScalingOption option = PreferScaledOffset);
1202 
1203   // Load byte.
1204   void ldrb(const Register& rt,
1205             const MemOperand& src,
1206             LoadStoreScalingOption option = PreferScaledOffset);
1207 
1208   // Store byte.
1209   void strb(const Register& rt,
1210             const MemOperand& dst,
1211             LoadStoreScalingOption option = PreferScaledOffset);
1212 
1213   // Load byte with sign extension.
1214   void ldrsb(const Register& rt,
1215              const MemOperand& src,
1216              LoadStoreScalingOption option = PreferScaledOffset);
1217 
1218   // Load half-word.
1219   void ldrh(const Register& rt,
1220             const MemOperand& src,
1221             LoadStoreScalingOption option = PreferScaledOffset);
1222 
1223   // Store half-word.
1224   void strh(const Register& rt,
1225             const MemOperand& dst,
1226             LoadStoreScalingOption option = PreferScaledOffset);
1227 
1228   // Load half-word with sign extension.
1229   void ldrsh(const Register& rt,
1230              const MemOperand& src,
1231              LoadStoreScalingOption option = PreferScaledOffset);
1232 
1233   // Load integer or FP register (with unscaled offset).
1234   void ldur(const CPURegister& rt,
1235             const MemOperand& src,
1236             LoadStoreScalingOption option = PreferUnscaledOffset);
1237 
1238   // Store integer or FP register (with unscaled offset).
1239   void stur(const CPURegister& rt,
1240             const MemOperand& src,
1241             LoadStoreScalingOption option = PreferUnscaledOffset);
1242 
1243   // Load word with sign extension.
1244   void ldursw(const Register& xt,
1245               const MemOperand& src,
1246               LoadStoreScalingOption option = PreferUnscaledOffset);
1247 
1248   // Load byte (with unscaled offset).
1249   void ldurb(const Register& rt,
1250              const MemOperand& src,
1251              LoadStoreScalingOption option = PreferUnscaledOffset);
1252 
1253   // Store byte (with unscaled offset).
1254   void sturb(const Register& rt,
1255              const MemOperand& dst,
1256              LoadStoreScalingOption option = PreferUnscaledOffset);
1257 
1258   // Load byte with sign extension (and unscaled offset).
1259   void ldursb(const Register& rt,
1260               const MemOperand& src,
1261               LoadStoreScalingOption option = PreferUnscaledOffset);
1262 
1263   // Load half-word (with unscaled offset).
1264   void ldurh(const Register& rt,
1265              const MemOperand& src,
1266              LoadStoreScalingOption option = PreferUnscaledOffset);
1267 
1268   // Store half-word (with unscaled offset).
1269   void sturh(const Register& rt,
1270              const MemOperand& dst,
1271              LoadStoreScalingOption option = PreferUnscaledOffset);
1272 
1273   // Load half-word with sign extension (and unscaled offset).
1274   void ldursh(const Register& rt,
1275               const MemOperand& src,
1276               LoadStoreScalingOption option = PreferUnscaledOffset);
1277 
1278   // Load double-word with pointer authentication, using data key A and a
1279   // modifier of zero [Armv8.3].
1280   void ldraa(const Register& xt, const MemOperand& src);
1281 
1282   // Load double-word with pointer authentication, using data key B and a
1283   // modifier of zero [Armv8.3].
1284   void ldrab(const Register& xt, const MemOperand& src);
1285 
1286   // Load integer or FP register pair.
1287   void ldp(const CPURegister& rt,
1288            const CPURegister& rt2,
1289            const MemOperand& src);
1290 
1291   // Store integer or FP register pair.
1292   void stp(const CPURegister& rt,
1293            const CPURegister& rt2,
1294            const MemOperand& dst);
1295 
1296   // Load word pair with sign extension.
1297   void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src);
1298 
1299   // Load integer or FP register pair, non-temporal.
1300   void ldnp(const CPURegister& rt,
1301             const CPURegister& rt2,
1302             const MemOperand& src);
1303 
1304   // Store integer or FP register pair, non-temporal.
1305   void stnp(const CPURegister& rt,
1306             const CPURegister& rt2,
1307             const MemOperand& dst);
1308 
1309   // Load integer or FP register from literal pool.
1310   void ldr(const CPURegister& rt, RawLiteral* literal);
1311 
1312   // Load word with sign extension from literal pool.
1313   void ldrsw(const Register& xt, RawLiteral* literal);
1314 
1315   // Load integer or FP register from pc + imm19 << 2.
1316   void ldr(const CPURegister& rt, int64_t imm19);
1317 
1318   // Load word with sign extension from pc + imm19 << 2.
1319   void ldrsw(const Register& xt, int64_t imm19);
1320 
1321   // Store exclusive byte.
1322   void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1323 
1324   // Store exclusive half-word.
1325   void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1326 
1327   // Store exclusive register.
1328   void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1329 
1330   // Load exclusive byte.
1331   void ldxrb(const Register& rt, const MemOperand& src);
1332 
1333   // Load exclusive half-word.
1334   void ldxrh(const Register& rt, const MemOperand& src);
1335 
1336   // Load exclusive register.
1337   void ldxr(const Register& rt, const MemOperand& src);
1338 
1339   // Store exclusive register pair.
1340   void stxp(const Register& rs,
1341             const Register& rt,
1342             const Register& rt2,
1343             const MemOperand& dst);
1344 
1345   // Load exclusive register pair.
1346   void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1347 
1348   // Store-release exclusive byte.
1349   void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1350 
1351   // Store-release exclusive half-word.
1352   void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1353 
1354   // Store-release exclusive register.
1355   void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1356 
1357   // Load-acquire exclusive byte.
1358   void ldaxrb(const Register& rt, const MemOperand& src);
1359 
1360   // Load-acquire exclusive half-word.
1361   void ldaxrh(const Register& rt, const MemOperand& src);
1362 
1363   // Load-acquire exclusive register.
1364   void ldaxr(const Register& rt, const MemOperand& src);
1365 
1366   // Store-release exclusive register pair.
1367   void stlxp(const Register& rs,
1368              const Register& rt,
1369              const Register& rt2,
1370              const MemOperand& dst);
1371 
1372   // Load-acquire exclusive register pair.
1373   void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1374 
1375   // Store-release byte.
1376   void stlrb(const Register& rt, const MemOperand& dst);
1377 
1378   // Store-release half-word.
1379   void stlrh(const Register& rt, const MemOperand& dst);
1380 
1381   // Store-release register.
1382   void stlr(const Register& rt, const MemOperand& dst);
1383 
1384   // Load-acquire byte.
1385   void ldarb(const Register& rt, const MemOperand& src);
1386 
1387   // Load-acquire half-word.
1388   void ldarh(const Register& rt, const MemOperand& src);
1389 
1390   // Load-acquire register.
1391   void ldar(const Register& rt, const MemOperand& src);
1392 
1393   // Store LORelease byte [Armv8.1].
1394   void stllrb(const Register& rt, const MemOperand& dst);
1395 
1396   // Store LORelease half-word [Armv8.1].
1397   void stllrh(const Register& rt, const MemOperand& dst);
1398 
1399   // Store LORelease register [Armv8.1].
1400   void stllr(const Register& rt, const MemOperand& dst);
1401 
1402   // Load LORelease byte [Armv8.1].
1403   void ldlarb(const Register& rt, const MemOperand& src);
1404 
1405   // Load LORelease half-word [Armv8.1].
1406   void ldlarh(const Register& rt, const MemOperand& src);
1407 
1408   // Load LORelease register [Armv8.1].
1409   void ldlar(const Register& rt, const MemOperand& src);
1410 
1411   // Compare and Swap word or doubleword in memory [Armv8.1].
1412   void cas(const Register& rs, const Register& rt, const MemOperand& src);
1413 
1414   // Compare and Swap word or doubleword in memory [Armv8.1].
1415   void casa(const Register& rs, const Register& rt, const MemOperand& src);
1416 
1417   // Compare and Swap word or doubleword in memory [Armv8.1].
1418   void casl(const Register& rs, const Register& rt, const MemOperand& src);
1419 
1420   // Compare and Swap word or doubleword in memory [Armv8.1].
1421   void casal(const Register& rs, const Register& rt, const MemOperand& src);
1422 
1423   // Compare and Swap byte in memory [Armv8.1].
1424   void casb(const Register& rs, const Register& rt, const MemOperand& src);
1425 
1426   // Compare and Swap byte in memory [Armv8.1].
1427   void casab(const Register& rs, const Register& rt, const MemOperand& src);
1428 
1429   // Compare and Swap byte in memory [Armv8.1].
1430   void caslb(const Register& rs, const Register& rt, const MemOperand& src);
1431 
1432   // Compare and Swap byte in memory [Armv8.1].
1433   void casalb(const Register& rs, const Register& rt, const MemOperand& src);
1434 
1435   // Compare and Swap halfword in memory [Armv8.1].
1436   void cash(const Register& rs, const Register& rt, const MemOperand& src);
1437 
1438   // Compare and Swap halfword in memory [Armv8.1].
1439   void casah(const Register& rs, const Register& rt, const MemOperand& src);
1440 
1441   // Compare and Swap halfword in memory [Armv8.1].
1442   void caslh(const Register& rs, const Register& rt, const MemOperand& src);
1443 
1444   // Compare and Swap halfword in memory [Armv8.1].
1445   void casalh(const Register& rs, const Register& rt, const MemOperand& src);
1446 
1447   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1448   void casp(const Register& rs,
1449             const Register& rs2,
1450             const Register& rt,
1451             const Register& rt2,
1452             const MemOperand& src);
1453 
1454   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1455   void caspa(const Register& rs,
1456              const Register& rs2,
1457              const Register& rt,
1458              const Register& rt2,
1459              const MemOperand& src);
1460 
1461   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1462   void caspl(const Register& rs,
1463              const Register& rs2,
1464              const Register& rt,
1465              const Register& rt2,
1466              const MemOperand& src);
1467 
1468   // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
1469   void caspal(const Register& rs,
1470               const Register& rs2,
1471               const Register& rt,
1472               const Register& rt2,
1473               const MemOperand& src);
1474 
1475   // Store-release byte (with unscaled offset) [Armv8.4].
1476   void stlurb(const Register& rt, const MemOperand& dst);
1477 
1478   // Load-acquire RCpc Register byte (with unscaled offset) [Armv8.4].
1479   void ldapurb(const Register& rt, const MemOperand& src);
1480 
1481   // Load-acquire RCpc Register signed byte (with unscaled offset) [Armv8.4].
1482   void ldapursb(const Register& rt, const MemOperand& src);
1483 
1484   // Store-release half-word (with unscaled offset) [Armv8.4].
1485   void stlurh(const Register& rt, const MemOperand& dst);
1486 
1487   // Load-acquire RCpc Register half-word (with unscaled offset) [Armv8.4].
1488   void ldapurh(const Register& rt, const MemOperand& src);
1489 
1490   // Load-acquire RCpc Register signed half-word (with unscaled offset)
1491   // [Armv8.4].
1492   void ldapursh(const Register& rt, const MemOperand& src);
1493 
1494   // Store-release word or double-word (with unscaled offset) [Armv8.4].
1495   void stlur(const Register& rt, const MemOperand& dst);
1496 
1497   // Load-acquire RCpc Register word or double-word (with unscaled offset)
1498   // [Armv8.4].
1499   void ldapur(const Register& rt, const MemOperand& src);
1500 
1501   // Load-acquire RCpc Register signed word (with unscaled offset) [Armv8.4].
1502   void ldapursw(const Register& xt, const MemOperand& src);
1503 
1504   // Atomic add on byte in memory [Armv8.1]
1505   void ldaddb(const Register& rs, const Register& rt, const MemOperand& src);
1506 
1507   // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1]
1508   void ldaddab(const Register& rs, const Register& rt, const MemOperand& src);
1509 
1510   // Atomic add on byte in memory, with Store-release semantics [Armv8.1]
1511   void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src);
1512 
1513   // Atomic add on byte in memory, with Load-acquire and Store-release semantics
1514   // [Armv8.1]
1515   void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src);
1516 
1517   // Atomic add on halfword in memory [Armv8.1]
1518   void ldaddh(const Register& rs, const Register& rt, const MemOperand& src);
1519 
1520   // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1]
1521   void ldaddah(const Register& rs, const Register& rt, const MemOperand& src);
1522 
1523   // Atomic add on halfword in memory, with Store-release semantics [Armv8.1]
1524   void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src);
1525 
1526   // Atomic add on halfword in memory, with Load-acquire and Store-release
1527   // semantics [Armv8.1]
1528   void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src);
1529 
1530   // Atomic add on word or doubleword in memory [Armv8.1]
1531   void ldadd(const Register& rs, const Register& rt, const MemOperand& src);
1532 
1533   // Atomic add on word or doubleword in memory, with Load-acquire semantics
1534   // [Armv8.1]
1535   void ldadda(const Register& rs, const Register& rt, const MemOperand& src);
1536 
1537   // Atomic add on word or doubleword in memory, with Store-release semantics
1538   // [Armv8.1]
1539   void ldaddl(const Register& rs, const Register& rt, const MemOperand& src);
1540 
1541   // Atomic add on word or doubleword in memory, with Load-acquire and
1542   // Store-release semantics [Armv8.1]
1543   void ldaddal(const Register& rs, const Register& rt, const MemOperand& src);
1544 
1545   // Atomic bit clear on byte in memory [Armv8.1]
1546   void ldclrb(const Register& rs, const Register& rt, const MemOperand& src);
1547 
1548   // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1]
1549   void ldclrab(const Register& rs, const Register& rt, const MemOperand& src);
1550 
1551   // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1]
1552   void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src);
1553 
1554   // Atomic bit clear on byte in memory, with Load-acquire and Store-release
1555   // semantics [Armv8.1]
1556   void ldclralb(const Register& rs, const Register& rt, const MemOperand& src);
1557 
1558   // Atomic bit clear on halfword in memory [Armv8.1]
1559   void ldclrh(const Register& rs, const Register& rt, const MemOperand& src);
1560 
1561   // Atomic bit clear on halfword in memory, with Load-acquire semantics
1562   // [Armv8.1]
1563   void ldclrah(const Register& rs, const Register& rt, const MemOperand& src);
1564 
1565   // Atomic bit clear on halfword in memory, with Store-release semantics
1566   // [Armv8.1]
1567   void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src);
1568 
1569   // Atomic bit clear on halfword in memory, with Load-acquire and Store-release
1570   // semantics [Armv8.1]
1571   void ldclralh(const Register& rs, const Register& rt, const MemOperand& src);
1572 
1573   // Atomic bit clear on word or doubleword in memory [Armv8.1]
1574   void ldclr(const Register& rs, const Register& rt, const MemOperand& src);
1575 
1576   // Atomic bit clear on word or doubleword in memory, with Load-acquire
1577   // semantics [Armv8.1]
1578   void ldclra(const Register& rs, const Register& rt, const MemOperand& src);
1579 
1580   // Atomic bit clear on word or doubleword in memory, with Store-release
1581   // semantics [Armv8.1]
1582   void ldclrl(const Register& rs, const Register& rt, const MemOperand& src);
1583 
1584   // Atomic bit clear on word or doubleword in memory, with Load-acquire and
1585   // Store-release semantics [Armv8.1]
1586   void ldclral(const Register& rs, const Register& rt, const MemOperand& src);
1587 
1588   // Atomic exclusive OR on byte in memory [Armv8.1]
1589   void ldeorb(const Register& rs, const Register& rt, const MemOperand& src);
1590 
1591   // Atomic exclusive OR on byte in memory, with Load-acquire semantics
1592   // [Armv8.1]
1593   void ldeorab(const Register& rs, const Register& rt, const MemOperand& src);
1594 
1595   // Atomic exclusive OR on byte in memory, with Store-release semantics
1596   // [Armv8.1]
1597   void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src);
1598 
1599   // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release
1600   // semantics [Armv8.1]
1601   void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src);
1602 
1603   // Atomic exclusive OR on halfword in memory [Armv8.1]
1604   void ldeorh(const Register& rs, const Register& rt, const MemOperand& src);
1605 
1606   // Atomic exclusive OR on halfword in memory, with Load-acquire semantics
1607   // [Armv8.1]
1608   void ldeorah(const Register& rs, const Register& rt, const MemOperand& src);
1609 
1610   // Atomic exclusive OR on halfword in memory, with Store-release semantics
1611   // [Armv8.1]
1612   void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src);
1613 
1614   // Atomic exclusive OR on halfword in memory, with Load-acquire and
1615   // Store-release semantics [Armv8.1]
1616   void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src);
1617 
1618   // Atomic exclusive OR on word or doubleword in memory [Armv8.1]
1619   void ldeor(const Register& rs, const Register& rt, const MemOperand& src);
1620 
1621   // Atomic exclusive OR on word or doubleword in memory, with Load-acquire
1622   // semantics [Armv8.1]
1623   void ldeora(const Register& rs, const Register& rt, const MemOperand& src);
1624 
1625   // Atomic exclusive OR on word or doubleword in memory, with Store-release
1626   // semantics [Armv8.1]
1627   void ldeorl(const Register& rs, const Register& rt, const MemOperand& src);
1628 
1629   // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and
1630   // Store-release semantics [Armv8.1]
1631   void ldeoral(const Register& rs, const Register& rt, const MemOperand& src);
1632 
1633   // Atomic bit set on byte in memory [Armv8.1]
1634   void ldsetb(const Register& rs, const Register& rt, const MemOperand& src);
1635 
1636   // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1]
1637   void ldsetab(const Register& rs, const Register& rt, const MemOperand& src);
1638 
1639   // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1]
1640   void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src);
1641 
1642   // Atomic bit set on byte in memory, with Load-acquire and Store-release
1643   // semantics [Armv8.1]
1644   void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src);
1645 
1646   // Atomic bit set on halfword in memory [Armv8.1]
1647   void ldseth(const Register& rs, const Register& rt, const MemOperand& src);
1648 
1649   // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1]
1650   void ldsetah(const Register& rs, const Register& rt, const MemOperand& src);
1651 
1652   // Atomic bit set on halfword in memory, with Store-release semantics
1653   // [Armv8.1]
1654   void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src);
1655 
1656   // Atomic bit set on halfword in memory, with Load-acquire and Store-release
1657   // semantics [Armv8.1]
1658   void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src);
1659 
1660   // Atomic bit set on word or doubleword in memory [Armv8.1]
1661   void ldset(const Register& rs, const Register& rt, const MemOperand& src);
1662 
1663   // Atomic bit set on word or doubleword in memory, with Load-acquire semantics
1664   // [Armv8.1]
1665   void ldseta(const Register& rs, const Register& rt, const MemOperand& src);
1666 
1667   // Atomic bit set on word or doubleword in memory, with Store-release
1668   // semantics [Armv8.1]
1669   void ldsetl(const Register& rs, const Register& rt, const MemOperand& src);
1670 
1671   // Atomic bit set on word or doubleword in memory, with Load-acquire and
1672   // Store-release semantics [Armv8.1]
1673   void ldsetal(const Register& rs, const Register& rt, const MemOperand& src);
1674 
1675   // Atomic signed maximum on byte in memory [Armv8.1]
1676   void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src);
1677 
1678   // Atomic signed maximum on byte in memory, with Load-acquire semantics
1679   // [Armv8.1]
1680   void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src);
1681 
1682   // Atomic signed maximum on byte in memory, with Store-release semantics
1683   // [Armv8.1]
1684   void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1685 
1686   // Atomic signed maximum on byte in memory, with Load-acquire and
1687   // Store-release semantics [Armv8.1]
1688   void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1689 
1690   // Atomic signed maximum on halfword in memory [Armv8.1]
1691   void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src);
1692 
1693   // Atomic signed maximum on halfword in memory, with Load-acquire semantics
1694   // [Armv8.1]
1695   void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src);
1696 
1697   // Atomic signed maximum on halfword in memory, with Store-release semantics
1698   // [Armv8.1]
1699   void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1700 
1701   // Atomic signed maximum on halfword in memory, with Load-acquire and
1702   // Store-release semantics [Armv8.1]
1703   void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1704 
1705   // Atomic signed maximum on word or doubleword in memory [Armv8.1]
1706   void ldsmax(const Register& rs, const Register& rt, const MemOperand& src);
1707 
1708   // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1709   // semantics [Armv8.1]
1710   void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src);
1711 
1712   // Atomic signed maximum on word or doubleword in memory, with Store-release
1713   // semantics [Armv8.1]
1714   void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src);
1715 
1716   // Atomic signed maximum on word or doubleword in memory, with Load-acquire
1717   // and Store-release semantics [Armv8.1]
1718   void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src);
1719 
1720   // Atomic signed minimum on byte in memory [Armv8.1]
1721   void ldsminb(const Register& rs, const Register& rt, const MemOperand& src);
1722 
1723   // Atomic signed minimum on byte in memory, with Load-acquire semantics
1724   // [Armv8.1]
1725   void ldsminab(const Register& rs, const Register& rt, const MemOperand& src);
1726 
1727   // Atomic signed minimum on byte in memory, with Store-release semantics
1728   // [Armv8.1]
1729   void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src);
1730 
1731   // Atomic signed minimum on byte in memory, with Load-acquire and
1732   // Store-release semantics [Armv8.1]
1733   void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src);
1734 
1735   // Atomic signed minimum on halfword in memory [Armv8.1]
1736   void ldsminh(const Register& rs, const Register& rt, const MemOperand& src);
1737 
1738   // Atomic signed minimum on halfword in memory, with Load-acquire semantics
1739   // [Armv8.1]
1740   void ldsminah(const Register& rs, const Register& rt, const MemOperand& src);
1741 
1742   // Atomic signed minimum on halfword in memory, with Store-release semantics
1743   // [Armv8.1]
1744   void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src);
1745 
1746   // Atomic signed minimum on halfword in memory, with Load-acquire and
1747   // Store-release semantics [Armv8.1]
1748   void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src);
1749 
1750   // Atomic signed minimum on word or doubleword in memory [Armv8.1]
1751   void ldsmin(const Register& rs, const Register& rt, const MemOperand& src);
1752 
1753   // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1754   // semantics [Armv8.1]
1755   void ldsmina(const Register& rs, const Register& rt, const MemOperand& src);
1756 
1757   // Atomic signed minimum on word or doubleword in memory, with Store-release
1758   // semantics [Armv8.1]
1759   void ldsminl(const Register& rs, const Register& rt, const MemOperand& src);
1760 
1761   // Atomic signed minimum on word or doubleword in memory, with Load-acquire
1762   // and Store-release semantics [Armv8.1]
1763   void ldsminal(const Register& rs, const Register& rt, const MemOperand& src);
1764 
1765   // Atomic unsigned maximum on byte in memory [Armv8.1]
1766   void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src);
1767 
1768   // Atomic unsigned maximum on byte in memory, with Load-acquire semantics
1769   // [Armv8.1]
1770   void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src);
1771 
1772   // Atomic unsigned maximum on byte in memory, with Store-release semantics
1773   // [Armv8.1]
1774   void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src);
1775 
1776   // Atomic unsigned maximum on byte in memory, with Load-acquire and
1777   // Store-release semantics [Armv8.1]
1778   void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src);
1779 
1780   // Atomic unsigned maximum on halfword in memory [Armv8.1]
1781   void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src);
1782 
1783   // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics
1784   // [Armv8.1]
1785   void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src);
1786 
1787   // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1788   // [Armv8.1]
1789   void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src);
1790 
1791   // Atomic unsigned maximum on halfword in memory, with Load-acquire and
1792   // Store-release semantics [Armv8.1]
1793   void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src);
1794 
1795   // Atomic unsigned maximum on word or doubleword in memory [Armv8.1]
1796   void ldumax(const Register& rs, const Register& rt, const MemOperand& src);
1797 
1798   // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1799   // semantics [Armv8.1]
1800   void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src);
1801 
1802   // Atomic unsigned maximum on word or doubleword in memory, with Store-release
1803   // semantics [Armv8.1]
1804   void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src);
1805 
1806   // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
1807   // and Store-release semantics [Armv8.1]
1808   void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src);
1809 
1810   // Atomic unsigned minimum on byte in memory [Armv8.1]
1811   void lduminb(const Register& rs, const Register& rt, const MemOperand& src);
1812 
1813   // Atomic unsigned minimum on byte in memory, with Load-acquire semantics
1814   // [Armv8.1]
1815   void lduminab(const Register& rs, const Register& rt, const MemOperand& src);
1816 
1817   // Atomic unsigned minimum on byte in memory, with Store-release semantics
1818   // [Armv8.1]
1819   void lduminlb(const Register& rs, const Register& rt, const MemOperand& src);
1820 
1821   // Atomic unsigned minimum on byte in memory, with Load-acquire and
1822   // Store-release semantics [Armv8.1]
1823   void lduminalb(const Register& rs, const Register& rt, const MemOperand& src);
1824 
1825   // Atomic unsigned minimum on halfword in memory [Armv8.1]
1826   void lduminh(const Register& rs, const Register& rt, const MemOperand& src);
1827 
1828   // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics
1829   // [Armv8.1]
1830   void lduminah(const Register& rs, const Register& rt, const MemOperand& src);
1831 
1832   // Atomic unsigned minimum on halfword in memory, with Store-release semantics
1833   // [Armv8.1]
1834   void lduminlh(const Register& rs, const Register& rt, const MemOperand& src);
1835 
1836   // Atomic unsigned minimum on halfword in memory, with Load-acquire and
1837   // Store-release semantics [Armv8.1]
1838   void lduminalh(const Register& rs, const Register& rt, const MemOperand& src);
1839 
1840   // Atomic unsigned minimum on word or doubleword in memory [Armv8.1]
1841   void ldumin(const Register& rs, const Register& rt, const MemOperand& src);
1842 
1843   // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1844   // semantics [Armv8.1]
1845   void ldumina(const Register& rs, const Register& rt, const MemOperand& src);
1846 
1847   // Atomic unsigned minimum on word or doubleword in memory, with Store-release
1848   // semantics [Armv8.1]
1849   void lduminl(const Register& rs, const Register& rt, const MemOperand& src);
1850 
1851   // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
1852   // and Store-release semantics [Armv8.1]
1853   void lduminal(const Register& rs, const Register& rt, const MemOperand& src);
1854 
1855   // Atomic add on byte in memory, without return. [Armv8.1]
1856   void staddb(const Register& rs, const MemOperand& src);
1857 
1858   // Atomic add on byte in memory, with Store-release semantics and without
1859   // return. [Armv8.1]
1860   void staddlb(const Register& rs, const MemOperand& src);
1861 
1862   // Atomic add on halfword in memory, without return. [Armv8.1]
1863   void staddh(const Register& rs, const MemOperand& src);
1864 
1865   // Atomic add on halfword in memory, with Store-release semantics and without
1866   // return. [Armv8.1]
1867   void staddlh(const Register& rs, const MemOperand& src);
1868 
1869   // Atomic add on word or doubleword in memory, without return. [Armv8.1]
1870   void stadd(const Register& rs, const MemOperand& src);
1871 
1872   // Atomic add on word or doubleword in memory, with Store-release semantics
1873   // and without return. [Armv8.1]
1874   void staddl(const Register& rs, const MemOperand& src);
1875 
1876   // Atomic bit clear on byte in memory, without return. [Armv8.1]
1877   void stclrb(const Register& rs, const MemOperand& src);
1878 
1879   // Atomic bit clear on byte in memory, with Store-release semantics and
1880   // without return. [Armv8.1]
1881   void stclrlb(const Register& rs, const MemOperand& src);
1882 
1883   // Atomic bit clear on halfword in memory, without return. [Armv8.1]
1884   void stclrh(const Register& rs, const MemOperand& src);
1885 
1886   // Atomic bit clear on halfword in memory, with Store-release semantics and
1887   // without return. [Armv8.1]
1888   void stclrlh(const Register& rs, const MemOperand& src);
1889 
1890   // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1]
1891   void stclr(const Register& rs, const MemOperand& src);
1892 
1893   // Atomic bit clear on word or doubleword in memory, with Store-release
1894   // semantics and without return. [Armv8.1]
1895   void stclrl(const Register& rs, const MemOperand& src);
1896 
1897   // Atomic exclusive OR on byte in memory, without return. [Armv8.1]
1898   void steorb(const Register& rs, const MemOperand& src);
1899 
1900   // Atomic exclusive OR on byte in memory, with Store-release semantics and
1901   // without return. [Armv8.1]
1902   void steorlb(const Register& rs, const MemOperand& src);
1903 
1904   // Atomic exclusive OR on halfword in memory, without return. [Armv8.1]
1905   void steorh(const Register& rs, const MemOperand& src);
1906 
1907   // Atomic exclusive OR on halfword in memory, with Store-release semantics
1908   // and without return. [Armv8.1]
1909   void steorlh(const Register& rs, const MemOperand& src);
1910 
1911   // Atomic exclusive OR on word or doubleword in memory, without return.
1912   // [Armv8.1]
1913   void steor(const Register& rs, const MemOperand& src);
1914 
1915   // Atomic exclusive OR on word or doubleword in memory, with Store-release
1916   // semantics and without return. [Armv8.1]
1917   void steorl(const Register& rs, const MemOperand& src);
1918 
1919   // Atomic bit set on byte in memory, without return. [Armv8.1]
1920   void stsetb(const Register& rs, const MemOperand& src);
1921 
1922   // Atomic bit set on byte in memory, with Store-release semantics and without
1923   // return. [Armv8.1]
1924   void stsetlb(const Register& rs, const MemOperand& src);
1925 
1926   // Atomic bit set on halfword in memory, without return. [Armv8.1]
1927   void stseth(const Register& rs, const MemOperand& src);
1928 
1929   // Atomic bit set on halfword in memory, with Store-release semantics and
1930   // without return. [Armv8.1]
1931   void stsetlh(const Register& rs, const MemOperand& src);
1932 
1933   // Atomic bit set on word or doubleword in memory, without return. [Armv8.1]
1934   void stset(const Register& rs, const MemOperand& src);
1935 
1936   // Atomic bit set on word or doubleword in memory, with Store-release
1937   // semantics and without return. [Armv8.1]
1938   void stsetl(const Register& rs, const MemOperand& src);
1939 
1940   // Atomic signed maximum on byte in memory, without return. [Armv8.1]
1941   void stsmaxb(const Register& rs, const MemOperand& src);
1942 
1943   // Atomic signed maximum on byte in memory, with Store-release semantics and
1944   // without return. [Armv8.1]
1945   void stsmaxlb(const Register& rs, const MemOperand& src);
1946 
1947   // Atomic signed maximum on halfword in memory, without return. [Armv8.1]
1948   void stsmaxh(const Register& rs, const MemOperand& src);
1949 
1950   // Atomic signed maximum on halfword in memory, with Store-release semantics
1951   // and without return. [Armv8.1]
1952   void stsmaxlh(const Register& rs, const MemOperand& src);
1953 
1954   // Atomic signed maximum on word or doubleword in memory, without return.
1955   // [Armv8.1]
1956   void stsmax(const Register& rs, const MemOperand& src);
1957 
1958   // Atomic signed maximum on word or doubleword in memory, with Store-release
1959   // semantics and without return. [Armv8.1]
1960   void stsmaxl(const Register& rs, const MemOperand& src);
1961 
1962   // Atomic signed minimum on byte in memory, without return. [Armv8.1]
1963   void stsminb(const Register& rs, const MemOperand& src);
1964 
1965   // Atomic signed minimum on byte in memory, with Store-release semantics and
1966   // without return. [Armv8.1]
1967   void stsminlb(const Register& rs, const MemOperand& src);
1968 
1969   // Atomic signed minimum on halfword in memory, without return. [Armv8.1]
1970   void stsminh(const Register& rs, const MemOperand& src);
1971 
1972   // Atomic signed minimum on halfword in memory, with Store-release semantics
1973   // and without return. [Armv8.1]
1974   void stsminlh(const Register& rs, const MemOperand& src);
1975 
1976   // Atomic signed minimum on word or doubleword in memory, without return.
1977   // [Armv8.1]
1978   void stsmin(const Register& rs, const MemOperand& src);
1979 
1980   // Atomic signed minimum on word or doubleword in memory, with Store-release
1981   // semantics and without return. semantics [Armv8.1]
1982   void stsminl(const Register& rs, const MemOperand& src);
1983 
1984   // Atomic unsigned maximum on byte in memory, without return. [Armv8.1]
1985   void stumaxb(const Register& rs, const MemOperand& src);
1986 
1987   // Atomic unsigned maximum on byte in memory, with Store-release semantics and
1988   // without return. [Armv8.1]
1989   void stumaxlb(const Register& rs, const MemOperand& src);
1990 
1991   // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1]
1992   void stumaxh(const Register& rs, const MemOperand& src);
1993 
1994   // Atomic unsigned maximum on halfword in memory, with Store-release semantics
1995   // and without return. [Armv8.1]
1996   void stumaxlh(const Register& rs, const MemOperand& src);
1997 
1998   // Atomic unsigned maximum on word or doubleword in memory, without return.
1999   // [Armv8.1]
2000   void stumax(const Register& rs, const MemOperand& src);
2001 
2002   // Atomic unsigned maximum on word or doubleword in memory, with Store-release
2003   // semantics and without return. [Armv8.1]
2004   void stumaxl(const Register& rs, const MemOperand& src);
2005 
2006   // Atomic unsigned minimum on byte in memory, without return. [Armv8.1]
2007   void stuminb(const Register& rs, const MemOperand& src);
2008 
2009   // Atomic unsigned minimum on byte in memory, with Store-release semantics and
2010   // without return. [Armv8.1]
2011   void stuminlb(const Register& rs, const MemOperand& src);
2012 
2013   // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1]
2014   void stuminh(const Register& rs, const MemOperand& src);
2015 
2016   // Atomic unsigned minimum on halfword in memory, with Store-release semantics
2017   // and without return. [Armv8.1]
2018   void stuminlh(const Register& rs, const MemOperand& src);
2019 
2020   // Atomic unsigned minimum on word or doubleword in memory, without return.
2021   // [Armv8.1]
2022   void stumin(const Register& rs, const MemOperand& src);
2023 
2024   // Atomic unsigned minimum on word or doubleword in memory, with Store-release
2025   // semantics and without return. [Armv8.1]
2026   void stuminl(const Register& rs, const MemOperand& src);
2027 
2028   // Swap byte in memory [Armv8.1]
2029   void swpb(const Register& rs, const Register& rt, const MemOperand& src);
2030 
2031   // Swap byte in memory, with Load-acquire semantics [Armv8.1]
2032   void swpab(const Register& rs, const Register& rt, const MemOperand& src);
2033 
2034   // Swap byte in memory, with Store-release semantics [Armv8.1]
2035   void swplb(const Register& rs, const Register& rt, const MemOperand& src);
2036 
2037   // Swap byte in memory, with Load-acquire and Store-release semantics
2038   // [Armv8.1]
2039   void swpalb(const Register& rs, const Register& rt, const MemOperand& src);
2040 
2041   // Swap halfword in memory [Armv8.1]
2042   void swph(const Register& rs, const Register& rt, const MemOperand& src);
2043 
2044   // Swap halfword in memory, with Load-acquire semantics [Armv8.1]
2045   void swpah(const Register& rs, const Register& rt, const MemOperand& src);
2046 
2047   // Swap halfword in memory, with Store-release semantics [Armv8.1]
2048   void swplh(const Register& rs, const Register& rt, const MemOperand& src);
2049 
2050   // Swap halfword in memory, with Load-acquire and Store-release semantics
2051   // [Armv8.1]
2052   void swpalh(const Register& rs, const Register& rt, const MemOperand& src);
2053 
2054   // Swap word or doubleword in memory [Armv8.1]
2055   void swp(const Register& rs, const Register& rt, const MemOperand& src);
2056 
2057   // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1]
2058   void swpa(const Register& rs, const Register& rt, const MemOperand& src);
2059 
2060   // Swap word or doubleword in memory, with Store-release semantics [Armv8.1]
2061   void swpl(const Register& rs, const Register& rt, const MemOperand& src);
2062 
2063   // Swap word or doubleword in memory, with Load-acquire and Store-release
2064   // semantics [Armv8.1]
2065   void swpal(const Register& rs, const Register& rt, const MemOperand& src);
2066 
2067   // Load-Acquire RCpc Register byte [Armv8.3]
2068   void ldaprb(const Register& rt, const MemOperand& src);
2069 
2070   // Load-Acquire RCpc Register halfword [Armv8.3]
2071   void ldaprh(const Register& rt, const MemOperand& src);
2072 
2073   // Load-Acquire RCpc Register word or doubleword [Armv8.3]
2074   void ldapr(const Register& rt, const MemOperand& src);
2075 
2076   // Prefetch memory.
2077   void prfm(PrefetchOperation op,
2078             const MemOperand& addr,
2079             LoadStoreScalingOption option = PreferScaledOffset);
2080 
2081   // Prefetch memory (with unscaled offset).
2082   void prfum(PrefetchOperation op,
2083              const MemOperand& addr,
2084              LoadStoreScalingOption option = PreferUnscaledOffset);
2085 
2086   // Prefetch memory in the literal pool.
2087   void prfm(PrefetchOperation op, RawLiteral* literal);
2088 
2089   // Prefetch from pc + imm19 << 2.
2090   void prfm(PrefetchOperation op, int64_t imm19);
2091 
2092   // Prefetch memory (allowing unallocated hints).
2093   void prfm(int op,
2094             const MemOperand& addr,
2095             LoadStoreScalingOption option = PreferScaledOffset);
2096 
2097   // Prefetch memory (with unscaled offset, allowing unallocated hints).
2098   void prfum(int op,
2099              const MemOperand& addr,
2100              LoadStoreScalingOption option = PreferUnscaledOffset);
2101 
2102   // Prefetch memory in the literal pool (allowing unallocated hints).
2103   void prfm(int op, RawLiteral* literal);
2104 
2105   // Prefetch from pc + imm19 << 2 (allowing unallocated hints).
2106   void prfm(int op, int64_t imm19);
2107 
2108   // Move instructions. The default shift of -1 indicates that the move
2109   // instruction will calculate an appropriate 16-bit immediate and left shift
2110   // that is equal to the 64-bit immediate argument. If an explicit left shift
2111   // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
2112   //
2113   // For movk, an explicit shift can be used to indicate which half word should
2114   // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
2115   // half word with zero, whereas movk(x0, 0, 48) will overwrite the
2116   // most-significant.
2117 
2118   // Move immediate and keep.
2119   void movk(const Register& rd, uint64_t imm, int shift = -1) {
2120     MoveWide(rd, imm, shift, MOVK);
2121   }
2122 
2123   // Move inverted immediate.
2124   void movn(const Register& rd, uint64_t imm, int shift = -1) {
2125     MoveWide(rd, imm, shift, MOVN);
2126   }
2127 
2128   // Move immediate.
2129   void movz(const Register& rd, uint64_t imm, int shift = -1) {
2130     MoveWide(rd, imm, shift, MOVZ);
2131   }
2132 
2133   // Move immediate, aliases for movz, movn, orr.
mov(const Register & rd,uint64_t imm)2134   void mov(const Register& rd, uint64_t imm) {
2135     if (!OneInstrMoveImmediateHelper(this, rd, imm)) {
2136       VIXL_UNIMPLEMENTED();
2137     }
2138   }
2139 
2140   // Misc instructions.
2141 
2142   // Monitor debug-mode breakpoint.
2143   void brk(int code);
2144 
2145   // Halting debug-mode breakpoint.
2146   void hlt(int code);
2147 
2148   // Generate exception targeting EL1.
2149   void svc(int code);
2150 
2151   // Generate undefined instruction exception.
2152   void udf(int code);
2153 
2154   // Move register to register.
2155   void mov(const Register& rd, const Register& rn);
2156 
2157   // Move inverted operand to register.
2158   void mvn(const Register& rd, const Operand& operand);
2159 
2160   // System instructions.
2161 
2162   // Move to register from system register.
2163   void mrs(const Register& xt, SystemRegister sysreg);
2164 
2165   // Move from register to system register.
2166   void msr(SystemRegister sysreg, const Register& xt);
2167 
2168   // Invert carry flag [Armv8.4].
2169   void cfinv();
2170 
2171   // Convert floating-point condition flags from alternative format to Arm
2172   // format [Armv8.5].
2173   void xaflag();
2174 
2175   // Convert floating-point condition flags from Arm format to alternative
2176   // format [Armv8.5].
2177   void axflag();
2178 
2179   // System instruction.
2180   void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr);
2181 
2182   // System instruction with pre-encoded op (op1:crn:crm:op2).
2183   void sys(int op, const Register& xt = xzr);
2184 
2185   // System data cache operation.
2186   void dc(DataCacheOp op, const Register& rt);
2187 
2188   // System instruction cache operation.
2189   void ic(InstructionCacheOp op, const Register& rt);
2190 
2191   // System hint (named type).
2192   void hint(SystemHint code);
2193 
2194   // System hint (numbered type).
2195   void hint(int imm7);
2196 
2197   // Clear exclusive monitor.
2198   void clrex(int imm4 = 0xf);
2199 
2200   // Data memory barrier.
2201   void dmb(BarrierDomain domain, BarrierType type);
2202 
2203   // Data synchronization barrier.
2204   void dsb(BarrierDomain domain, BarrierType type);
2205 
2206   // Instruction synchronization barrier.
2207   void isb();
2208 
2209   // Error synchronization barrier.
2210   void esb();
2211 
2212   // Conditional speculation dependency barrier.
2213   void csdb();
2214 
2215   // No-op.
nop()2216   void nop() { hint(NOP); }
2217 
2218   // Branch target identification.
2219   void bti(BranchTargetIdentifier id);
2220 
2221   // FP and NEON instructions.
2222 
2223   // Move double precision immediate to FP register.
2224   void fmov(const VRegister& vd, double imm);
2225 
2226   // Move single precision immediate to FP register.
2227   void fmov(const VRegister& vd, float imm);
2228 
2229   // Move half precision immediate to FP register [Armv8.2].
2230   void fmov(const VRegister& vd, Float16 imm);
2231 
2232   // Move FP register to register.
2233   void fmov(const Register& rd, const VRegister& fn);
2234 
2235   // Move register to FP register.
2236   void fmov(const VRegister& vd, const Register& rn);
2237 
2238   // Move FP register to FP register.
2239   void fmov(const VRegister& vd, const VRegister& fn);
2240 
2241   // Move 64-bit register to top half of 128-bit FP register.
2242   void fmov(const VRegister& vd, int index, const Register& rn);
2243 
2244   // Move top half of 128-bit FP register to 64-bit register.
2245   void fmov(const Register& rd, const VRegister& vn, int index);
2246 
2247   // FP add.
2248   void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2249 
2250   // FP subtract.
2251   void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2252 
2253   // FP multiply.
2254   void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2255 
2256   // FP fused multiply-add.
2257   void fmadd(const VRegister& vd,
2258              const VRegister& vn,
2259              const VRegister& vm,
2260              const VRegister& va);
2261 
2262   // FP fused multiply-subtract.
2263   void fmsub(const VRegister& vd,
2264              const VRegister& vn,
2265              const VRegister& vm,
2266              const VRegister& va);
2267 
2268   // FP fused multiply-add and negate.
2269   void fnmadd(const VRegister& vd,
2270               const VRegister& vn,
2271               const VRegister& vm,
2272               const VRegister& va);
2273 
2274   // FP fused multiply-subtract and negate.
2275   void fnmsub(const VRegister& vd,
2276               const VRegister& vn,
2277               const VRegister& vm,
2278               const VRegister& va);
2279 
2280   // FP multiply-negate scalar.
2281   void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2282 
2283   // FP reciprocal exponent scalar.
2284   void frecpx(const VRegister& vd, const VRegister& vn);
2285 
2286   // FP divide.
2287   void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2288 
2289   // FP maximum.
2290   void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2291 
2292   // FP minimum.
2293   void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2294 
2295   // FP maximum number.
2296   void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2297 
2298   // FP minimum number.
2299   void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
2300 
2301   // FP absolute.
2302   void fabs(const VRegister& vd, const VRegister& vn);
2303 
2304   // FP negate.
2305   void fneg(const VRegister& vd, const VRegister& vn);
2306 
2307   // FP square root.
2308   void fsqrt(const VRegister& vd, const VRegister& vn);
2309 
2310   // FP round to integer, nearest with ties to away.
2311   void frinta(const VRegister& vd, const VRegister& vn);
2312 
2313   // FP round to integer, implicit rounding.
2314   void frinti(const VRegister& vd, const VRegister& vn);
2315 
2316   // FP round to integer, toward minus infinity.
2317   void frintm(const VRegister& vd, const VRegister& vn);
2318 
2319   // FP round to integer, nearest with ties to even.
2320   void frintn(const VRegister& vd, const VRegister& vn);
2321 
2322   // FP round to integer, toward plus infinity.
2323   void frintp(const VRegister& vd, const VRegister& vn);
2324 
2325   // FP round to integer, exact, implicit rounding.
2326   void frintx(const VRegister& vd, const VRegister& vn);
2327 
2328   // FP round to integer, towards zero.
2329   void frintz(const VRegister& vd, const VRegister& vn);
2330 
2331   // FP round to 32-bit integer, exact, implicit rounding [Armv8.5].
2332   void frint32x(const VRegister& vd, const VRegister& vn);
2333 
2334   // FP round to 32-bit integer, towards zero [Armv8.5].
2335   void frint32z(const VRegister& vd, const VRegister& vn);
2336 
2337   // FP round to 64-bit integer, exact, implicit rounding [Armv8.5].
2338   void frint64x(const VRegister& vd, const VRegister& vn);
2339 
2340   // FP round to 64-bit integer, towards zero [Armv8.5].
2341   void frint64z(const VRegister& vd, const VRegister& vn);
2342 
2343   void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap);
2344 
2345   void FPCompareMacro(const VRegister& vn,
2346                       const VRegister& vm,
2347                       FPTrapFlags trap);
2348 
2349   // FP compare registers.
2350   void fcmp(const VRegister& vn, const VRegister& vm);
2351 
2352   // FP compare immediate.
2353   void fcmp(const VRegister& vn, double value);
2354 
2355   void FPCCompareMacro(const VRegister& vn,
2356                        const VRegister& vm,
2357                        StatusFlags nzcv,
2358                        Condition cond,
2359                        FPTrapFlags trap);
2360 
2361   // FP conditional compare.
2362   void fccmp(const VRegister& vn,
2363              const VRegister& vm,
2364              StatusFlags nzcv,
2365              Condition cond);
2366 
2367   // FP signaling compare registers.
2368   void fcmpe(const VRegister& vn, const VRegister& vm);
2369 
2370   // FP signaling compare immediate.
2371   void fcmpe(const VRegister& vn, double value);
2372 
2373   // FP conditional signaling compare.
2374   void fccmpe(const VRegister& vn,
2375               const VRegister& vm,
2376               StatusFlags nzcv,
2377               Condition cond);
2378 
2379   // FP conditional select.
2380   void fcsel(const VRegister& vd,
2381              const VRegister& vn,
2382              const VRegister& vm,
2383              Condition cond);
2384 
2385   // Common FP Convert functions.
2386   void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
2387   void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2388   void NEONFP16ConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2389 
2390   // FP convert between precisions.
2391   void fcvt(const VRegister& vd, const VRegister& vn);
2392 
2393   // FP convert to higher precision.
2394   void fcvtl(const VRegister& vd, const VRegister& vn);
2395 
2396   // FP convert to higher precision (second part).
2397   void fcvtl2(const VRegister& vd, const VRegister& vn);
2398 
2399   // FP convert to lower precision.
2400   void fcvtn(const VRegister& vd, const VRegister& vn);
2401 
2402   // FP convert to lower prevision (second part).
2403   void fcvtn2(const VRegister& vd, const VRegister& vn);
2404 
2405   // FP convert to lower precision, rounding to odd.
2406   void fcvtxn(const VRegister& vd, const VRegister& vn);
2407 
2408   // FP convert to lower precision, rounding to odd (second part).
2409   void fcvtxn2(const VRegister& vd, const VRegister& vn);
2410 
2411   // FP convert to signed integer, nearest with ties to away.
2412   void fcvtas(const Register& rd, const VRegister& vn);
2413 
2414   // FP convert to unsigned integer, nearest with ties to away.
2415   void fcvtau(const Register& rd, const VRegister& vn);
2416 
2417   // FP convert to signed integer, nearest with ties to away.
2418   void fcvtas(const VRegister& vd, const VRegister& vn);
2419 
2420   // FP convert to unsigned integer, nearest with ties to away.
2421   void fcvtau(const VRegister& vd, const VRegister& vn);
2422 
2423   // FP convert to signed integer, round towards -infinity.
2424   void fcvtms(const Register& rd, const VRegister& vn);
2425 
2426   // FP convert to unsigned integer, round towards -infinity.
2427   void fcvtmu(const Register& rd, const VRegister& vn);
2428 
2429   // FP convert to signed integer, round towards -infinity.
2430   void fcvtms(const VRegister& vd, const VRegister& vn);
2431 
2432   // FP convert to unsigned integer, round towards -infinity.
2433   void fcvtmu(const VRegister& vd, const VRegister& vn);
2434 
2435   // FP convert to signed integer, nearest with ties to even.
2436   void fcvtns(const Register& rd, const VRegister& vn);
2437 
2438   // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
2439   void fjcvtzs(const Register& rd, const VRegister& vn);
2440 
2441   // FP convert to unsigned integer, nearest with ties to even.
2442   void fcvtnu(const Register& rd, const VRegister& vn);
2443 
2444   // FP convert to signed integer, nearest with ties to even.
2445   void fcvtns(const VRegister& rd, const VRegister& vn);
2446 
2447   // FP convert to unsigned integer, nearest with ties to even.
2448   void fcvtnu(const VRegister& rd, const VRegister& vn);
2449 
2450   // FP convert to signed integer or fixed-point, round towards zero.
2451   void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2452 
2453   // FP convert to unsigned integer or fixed-point, round towards zero.
2454   void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2455 
2456   // FP convert to signed integer or fixed-point, round towards zero.
2457   void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2458 
2459   // FP convert to unsigned integer or fixed-point, round towards zero.
2460   void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2461 
2462   // FP convert to signed integer, round towards +infinity.
2463   void fcvtps(const Register& rd, const VRegister& vn);
2464 
2465   // FP convert to unsigned integer, round towards +infinity.
2466   void fcvtpu(const Register& rd, const VRegister& vn);
2467 
2468   // FP convert to signed integer, round towards +infinity.
2469   void fcvtps(const VRegister& vd, const VRegister& vn);
2470 
2471   // FP convert to unsigned integer, round towards +infinity.
2472   void fcvtpu(const VRegister& vd, const VRegister& vn);
2473 
2474   // Convert signed integer or fixed point to FP.
2475   void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2476 
2477   // Convert unsigned integer or fixed point to FP.
2478   void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2479 
2480   // Convert signed integer or fixed-point to FP.
2481   void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2482 
2483   // Convert unsigned integer or fixed-point to FP.
2484   void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2485 
2486   // Unsigned absolute difference.
2487   void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2488 
2489   // Signed absolute difference.
2490   void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2491 
2492   // Unsigned absolute difference and accumulate.
2493   void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2494 
2495   // Signed absolute difference and accumulate.
2496   void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2497 
2498   // Add.
2499   void add(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2500 
2501   // Subtract.
2502   void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2503 
2504   // Unsigned halving add.
2505   void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2506 
2507   // Signed halving add.
2508   void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2509 
2510   // Unsigned rounding halving add.
2511   void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2512 
2513   // Signed rounding halving add.
2514   void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2515 
2516   // Unsigned halving sub.
2517   void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2518 
2519   // Signed halving sub.
2520   void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2521 
2522   // Unsigned saturating add.
2523   void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2524 
2525   // Signed saturating add.
2526   void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2527 
2528   // Unsigned saturating subtract.
2529   void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2530 
2531   // Signed saturating subtract.
2532   void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2533 
2534   // Add pairwise.
2535   void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2536 
2537   // Add pair of elements scalar.
2538   void addp(const VRegister& vd, const VRegister& vn);
2539 
2540   // Multiply-add to accumulator.
2541   void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2542 
2543   // Multiply-subtract to accumulator.
2544   void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2545 
2546   // Multiply.
2547   void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2548 
2549   // Multiply by scalar element.
2550   void mul(const VRegister& vd,
2551            const VRegister& vn,
2552            const VRegister& vm,
2553            int vm_index);
2554 
2555   // Multiply-add by scalar element.
2556   void mla(const VRegister& vd,
2557            const VRegister& vn,
2558            const VRegister& vm,
2559            int vm_index);
2560 
2561   // Multiply-subtract by scalar element.
2562   void mls(const VRegister& vd,
2563            const VRegister& vn,
2564            const VRegister& vm,
2565            int vm_index);
2566 
2567   // Signed long multiply-add by scalar element.
2568   void smlal(const VRegister& vd,
2569              const VRegister& vn,
2570              const VRegister& vm,
2571              int vm_index);
2572 
2573   // Signed long multiply-add by scalar element (second part).
2574   void smlal2(const VRegister& vd,
2575               const VRegister& vn,
2576               const VRegister& vm,
2577               int vm_index);
2578 
2579   // Unsigned long multiply-add by scalar element.
2580   void umlal(const VRegister& vd,
2581              const VRegister& vn,
2582              const VRegister& vm,
2583              int vm_index);
2584 
2585   // Unsigned long multiply-add by scalar element (second part).
2586   void umlal2(const VRegister& vd,
2587               const VRegister& vn,
2588               const VRegister& vm,
2589               int vm_index);
2590 
2591   // Signed long multiply-sub by scalar element.
2592   void smlsl(const VRegister& vd,
2593              const VRegister& vn,
2594              const VRegister& vm,
2595              int vm_index);
2596 
2597   // Signed long multiply-sub by scalar element (second part).
2598   void smlsl2(const VRegister& vd,
2599               const VRegister& vn,
2600               const VRegister& vm,
2601               int vm_index);
2602 
2603   // Unsigned long multiply-sub by scalar element.
2604   void umlsl(const VRegister& vd,
2605              const VRegister& vn,
2606              const VRegister& vm,
2607              int vm_index);
2608 
2609   // Unsigned long multiply-sub by scalar element (second part).
2610   void umlsl2(const VRegister& vd,
2611               const VRegister& vn,
2612               const VRegister& vm,
2613               int vm_index);
2614 
2615   // Signed long multiply by scalar element.
2616   void smull(const VRegister& vd,
2617              const VRegister& vn,
2618              const VRegister& vm,
2619              int vm_index);
2620 
2621   // Signed long multiply by scalar element (second part).
2622   void smull2(const VRegister& vd,
2623               const VRegister& vn,
2624               const VRegister& vm,
2625               int vm_index);
2626 
2627   // Unsigned long multiply by scalar element.
2628   void umull(const VRegister& vd,
2629              const VRegister& vn,
2630              const VRegister& vm,
2631              int vm_index);
2632 
2633   // Unsigned long multiply by scalar element (second part).
2634   void umull2(const VRegister& vd,
2635               const VRegister& vn,
2636               const VRegister& vm,
2637               int vm_index);
2638 
2639   // Signed saturating double long multiply by element.
2640   void sqdmull(const VRegister& vd,
2641                const VRegister& vn,
2642                const VRegister& vm,
2643                int vm_index);
2644 
2645   // Signed saturating double long multiply by element (second part).
2646   void sqdmull2(const VRegister& vd,
2647                 const VRegister& vn,
2648                 const VRegister& vm,
2649                 int vm_index);
2650 
2651   // Signed saturating doubling long multiply-add by element.
2652   void sqdmlal(const VRegister& vd,
2653                const VRegister& vn,
2654                const VRegister& vm,
2655                int vm_index);
2656 
2657   // Signed saturating doubling long multiply-add by element (second part).
2658   void sqdmlal2(const VRegister& vd,
2659                 const VRegister& vn,
2660                 const VRegister& vm,
2661                 int vm_index);
2662 
2663   // Signed saturating doubling long multiply-sub by element.
2664   void sqdmlsl(const VRegister& vd,
2665                const VRegister& vn,
2666                const VRegister& vm,
2667                int vm_index);
2668 
2669   // Signed saturating doubling long multiply-sub by element (second part).
2670   void sqdmlsl2(const VRegister& vd,
2671                 const VRegister& vn,
2672                 const VRegister& vm,
2673                 int vm_index);
2674 
2675   // Compare equal.
2676   void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2677 
2678   // Compare signed greater than or equal.
2679   void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2680 
2681   // Compare signed greater than.
2682   void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2683 
2684   // Compare unsigned higher.
2685   void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2686 
2687   // Compare unsigned higher or same.
2688   void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2689 
2690   // Compare bitwise test bits nonzero.
2691   void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2692 
2693   // Compare bitwise to zero.
2694   void cmeq(const VRegister& vd, const VRegister& vn, int value);
2695 
2696   // Compare signed greater than or equal to zero.
2697   void cmge(const VRegister& vd, const VRegister& vn, int value);
2698 
2699   // Compare signed greater than zero.
2700   void cmgt(const VRegister& vd, const VRegister& vn, int value);
2701 
2702   // Compare signed less than or equal to zero.
2703   void cmle(const VRegister& vd, const VRegister& vn, int value);
2704 
2705   // Compare signed less than zero.
2706   void cmlt(const VRegister& vd, const VRegister& vn, int value);
2707 
2708   // Signed shift left by register.
2709   void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2710 
2711   // Unsigned shift left by register.
2712   void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2713 
2714   // Signed saturating shift left by register.
2715   void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2716 
2717   // Unsigned saturating shift left by register.
2718   void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2719 
2720   // Signed rounding shift left by register.
2721   void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2722 
2723   // Unsigned rounding shift left by register.
2724   void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2725 
2726   // Signed saturating rounding shift left by register.
2727   void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2728 
2729   // Unsigned saturating rounding shift left by register.
2730   void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2731 
2732   // Bitwise and.
2733   void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2734 
2735   // Bitwise or.
2736   void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2737 
2738   // Bitwise or immediate.
2739   void orr(const VRegister& vd, const int imm8, const int left_shift = 0);
2740 
2741   // Move register to register.
2742   void mov(const VRegister& vd, const VRegister& vn);
2743 
2744   // Bitwise orn.
2745   void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2746 
2747   // Bitwise eor.
2748   void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2749 
2750   // Bit clear immediate.
2751   void bic(const VRegister& vd, const int imm8, const int left_shift = 0);
2752 
2753   // Bit clear.
2754   void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2755 
2756   // Bitwise insert if false.
2757   void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2758 
2759   // Bitwise insert if true.
2760   void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2761 
2762   // Bitwise select.
2763   void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2764 
2765   // Polynomial multiply.
2766   void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2767 
2768   // Vector move immediate.
2769   void movi(const VRegister& vd,
2770             const uint64_t imm,
2771             Shift shift = LSL,
2772             const int shift_amount = 0);
2773 
2774   // Bitwise not.
2775   void mvn(const VRegister& vd, const VRegister& vn);
2776 
2777   // Vector move inverted immediate.
2778   void mvni(const VRegister& vd,
2779             const int imm8,
2780             Shift shift = LSL,
2781             const int shift_amount = 0);
2782 
2783   // Signed saturating accumulate of unsigned value.
2784   void suqadd(const VRegister& vd, const VRegister& vn);
2785 
2786   // Unsigned saturating accumulate of signed value.
2787   void usqadd(const VRegister& vd, const VRegister& vn);
2788 
2789   // Absolute value.
2790   void abs(const VRegister& vd, const VRegister& vn);
2791 
2792   // Signed saturating absolute value.
2793   void sqabs(const VRegister& vd, const VRegister& vn);
2794 
2795   // Negate.
2796   void neg(const VRegister& vd, const VRegister& vn);
2797 
2798   // Signed saturating negate.
2799   void sqneg(const VRegister& vd, const VRegister& vn);
2800 
2801   // Bitwise not.
2802   void not_(const VRegister& vd, const VRegister& vn);
2803 
2804   // Extract narrow.
2805   void xtn(const VRegister& vd, const VRegister& vn);
2806 
2807   // Extract narrow (second part).
2808   void xtn2(const VRegister& vd, const VRegister& vn);
2809 
2810   // Signed saturating extract narrow.
2811   void sqxtn(const VRegister& vd, const VRegister& vn);
2812 
2813   // Signed saturating extract narrow (second part).
2814   void sqxtn2(const VRegister& vd, const VRegister& vn);
2815 
2816   // Unsigned saturating extract narrow.
2817   void uqxtn(const VRegister& vd, const VRegister& vn);
2818 
2819   // Unsigned saturating extract narrow (second part).
2820   void uqxtn2(const VRegister& vd, const VRegister& vn);
2821 
2822   // Signed saturating extract unsigned narrow.
2823   void sqxtun(const VRegister& vd, const VRegister& vn);
2824 
2825   // Signed saturating extract unsigned narrow (second part).
2826   void sqxtun2(const VRegister& vd, const VRegister& vn);
2827 
2828   // Extract vector from pair of vectors.
2829   void ext(const VRegister& vd,
2830            const VRegister& vn,
2831            const VRegister& vm,
2832            int index);
2833 
2834   // Duplicate vector element to vector or scalar.
2835   void dup(const VRegister& vd, const VRegister& vn, int vn_index);
2836 
2837   // Move vector element to scalar.
2838   void mov(const VRegister& vd, const VRegister& vn, int vn_index);
2839 
2840   // Duplicate general-purpose register to vector.
2841   void dup(const VRegister& vd, const Register& rn);
2842 
2843   // Insert vector element from another vector element.
2844   void ins(const VRegister& vd,
2845            int vd_index,
2846            const VRegister& vn,
2847            int vn_index);
2848 
2849   // Move vector element to another vector element.
2850   void mov(const VRegister& vd,
2851            int vd_index,
2852            const VRegister& vn,
2853            int vn_index);
2854 
2855   // Insert vector element from general-purpose register.
2856   void ins(const VRegister& vd, int vd_index, const Register& rn);
2857 
2858   // Move general-purpose register to a vector element.
2859   void mov(const VRegister& vd, int vd_index, const Register& rn);
2860 
2861   // Unsigned move vector element to general-purpose register.
2862   void umov(const Register& rd, const VRegister& vn, int vn_index);
2863 
2864   // Move vector element to general-purpose register.
2865   void mov(const Register& rd, const VRegister& vn, int vn_index);
2866 
2867   // Signed move vector element to general-purpose register.
2868   void smov(const Register& rd, const VRegister& vn, int vn_index);
2869 
2870   // One-element structure load to one register.
2871   void ld1(const VRegister& vt, const MemOperand& src);
2872 
2873   // One-element structure load to two registers.
2874   void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2875 
2876   // One-element structure load to three registers.
2877   void ld1(const VRegister& vt,
2878            const VRegister& vt2,
2879            const VRegister& vt3,
2880            const MemOperand& src);
2881 
2882   // One-element structure load to four registers.
2883   void ld1(const VRegister& vt,
2884            const VRegister& vt2,
2885            const VRegister& vt3,
2886            const VRegister& vt4,
2887            const MemOperand& src);
2888 
2889   // One-element single structure load to one lane.
2890   void ld1(const VRegister& vt, int lane, const MemOperand& src);
2891 
2892   // One-element single structure load to all lanes.
2893   void ld1r(const VRegister& vt, const MemOperand& src);
2894 
2895   // Two-element structure load.
2896   void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2897 
2898   // Two-element single structure load to one lane.
2899   void ld2(const VRegister& vt,
2900            const VRegister& vt2,
2901            int lane,
2902            const MemOperand& src);
2903 
2904   // Two-element single structure load to all lanes.
2905   void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2906 
2907   // Three-element structure load.
2908   void ld3(const VRegister& vt,
2909            const VRegister& vt2,
2910            const VRegister& vt3,
2911            const MemOperand& src);
2912 
2913   // Three-element single structure load to one lane.
2914   void ld3(const VRegister& vt,
2915            const VRegister& vt2,
2916            const VRegister& vt3,
2917            int lane,
2918            const MemOperand& src);
2919 
2920   // Three-element single structure load to all lanes.
2921   void ld3r(const VRegister& vt,
2922             const VRegister& vt2,
2923             const VRegister& vt3,
2924             const MemOperand& src);
2925 
2926   // Four-element structure load.
2927   void ld4(const VRegister& vt,
2928            const VRegister& vt2,
2929            const VRegister& vt3,
2930            const VRegister& vt4,
2931            const MemOperand& src);
2932 
2933   // Four-element single structure load to one lane.
2934   void ld4(const VRegister& vt,
2935            const VRegister& vt2,
2936            const VRegister& vt3,
2937            const VRegister& vt4,
2938            int lane,
2939            const MemOperand& src);
2940 
2941   // Four-element single structure load to all lanes.
2942   void ld4r(const VRegister& vt,
2943             const VRegister& vt2,
2944             const VRegister& vt3,
2945             const VRegister& vt4,
2946             const MemOperand& src);
2947 
2948   // Count leading sign bits.
2949   void cls(const VRegister& vd, const VRegister& vn);
2950 
2951   // Count leading zero bits (vector).
2952   void clz(const VRegister& vd, const VRegister& vn);
2953 
2954   // Population count per byte.
2955   void cnt(const VRegister& vd, const VRegister& vn);
2956 
2957   // Reverse bit order.
2958   void rbit(const VRegister& vd, const VRegister& vn);
2959 
2960   // Reverse elements in 16-bit halfwords.
2961   void rev16(const VRegister& vd, const VRegister& vn);
2962 
2963   // Reverse elements in 32-bit words.
2964   void rev32(const VRegister& vd, const VRegister& vn);
2965 
2966   // Reverse elements in 64-bit doublewords.
2967   void rev64(const VRegister& vd, const VRegister& vn);
2968 
2969   // Unsigned reciprocal square root estimate.
2970   void ursqrte(const VRegister& vd, const VRegister& vn);
2971 
2972   // Unsigned reciprocal estimate.
2973   void urecpe(const VRegister& vd, const VRegister& vn);
2974 
2975   // Signed pairwise long add.
2976   void saddlp(const VRegister& vd, const VRegister& vn);
2977 
2978   // Unsigned pairwise long add.
2979   void uaddlp(const VRegister& vd, const VRegister& vn);
2980 
2981   // Signed pairwise long add and accumulate.
2982   void sadalp(const VRegister& vd, const VRegister& vn);
2983 
2984   // Unsigned pairwise long add and accumulate.
2985   void uadalp(const VRegister& vd, const VRegister& vn);
2986 
2987   // Shift left by immediate.
2988   void shl(const VRegister& vd, const VRegister& vn, int shift);
2989 
2990   // Signed saturating shift left by immediate.
2991   void sqshl(const VRegister& vd, const VRegister& vn, int shift);
2992 
2993   // Signed saturating shift left unsigned by immediate.
2994   void sqshlu(const VRegister& vd, const VRegister& vn, int shift);
2995 
2996   // Unsigned saturating shift left by immediate.
2997   void uqshl(const VRegister& vd, const VRegister& vn, int shift);
2998 
2999   // Signed shift left long by immediate.
3000   void sshll(const VRegister& vd, const VRegister& vn, int shift);
3001 
3002   // Signed shift left long by immediate (second part).
3003   void sshll2(const VRegister& vd, const VRegister& vn, int shift);
3004 
3005   // Signed extend long.
3006   void sxtl(const VRegister& vd, const VRegister& vn);
3007 
3008   // Signed extend long (second part).
3009   void sxtl2(const VRegister& vd, const VRegister& vn);
3010 
3011   // Unsigned shift left long by immediate.
3012   void ushll(const VRegister& vd, const VRegister& vn, int shift);
3013 
3014   // Unsigned shift left long by immediate (second part).
3015   void ushll2(const VRegister& vd, const VRegister& vn, int shift);
3016 
3017   // Shift left long by element size.
3018   void shll(const VRegister& vd, const VRegister& vn, int shift);
3019 
3020   // Shift left long by element size (second part).
3021   void shll2(const VRegister& vd, const VRegister& vn, int shift);
3022 
3023   // Unsigned extend long.
3024   void uxtl(const VRegister& vd, const VRegister& vn);
3025 
3026   // Unsigned extend long (second part).
3027   void uxtl2(const VRegister& vd, const VRegister& vn);
3028 
3029   // Shift left by immediate and insert.
3030   void sli(const VRegister& vd, const VRegister& vn, int shift);
3031 
3032   // Shift right by immediate and insert.
3033   void sri(const VRegister& vd, const VRegister& vn, int shift);
3034 
3035   // Signed maximum.
3036   void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3037 
3038   // Signed pairwise maximum.
3039   void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3040 
3041   // Add across vector.
3042   void addv(const VRegister& vd, const VRegister& vn);
3043 
3044   // Signed add long across vector.
3045   void saddlv(const VRegister& vd, const VRegister& vn);
3046 
3047   // Unsigned add long across vector.
3048   void uaddlv(const VRegister& vd, const VRegister& vn);
3049 
3050   // FP maximum number across vector.
3051   void fmaxnmv(const VRegister& vd, const VRegister& vn);
3052 
3053   // FP maximum across vector.
3054   void fmaxv(const VRegister& vd, const VRegister& vn);
3055 
3056   // FP minimum number across vector.
3057   void fminnmv(const VRegister& vd, const VRegister& vn);
3058 
3059   // FP minimum across vector.
3060   void fminv(const VRegister& vd, const VRegister& vn);
3061 
3062   // Signed maximum across vector.
3063   void smaxv(const VRegister& vd, const VRegister& vn);
3064 
3065   // Signed minimum.
3066   void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3067 
3068   // Signed minimum pairwise.
3069   void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3070 
3071   // Signed minimum across vector.
3072   void sminv(const VRegister& vd, const VRegister& vn);
3073 
3074   // One-element structure store from one register.
3075   void st1(const VRegister& vt, const MemOperand& src);
3076 
3077   // One-element structure store from two registers.
3078   void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3079 
3080   // One-element structure store from three registers.
3081   void st1(const VRegister& vt,
3082            const VRegister& vt2,
3083            const VRegister& vt3,
3084            const MemOperand& src);
3085 
3086   // One-element structure store from four registers.
3087   void st1(const VRegister& vt,
3088            const VRegister& vt2,
3089            const VRegister& vt3,
3090            const VRegister& vt4,
3091            const MemOperand& src);
3092 
3093   // One-element single structure store from one lane.
3094   void st1(const VRegister& vt, int lane, const MemOperand& src);
3095 
3096   // Two-element structure store from two registers.
3097   void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
3098 
3099   // Two-element single structure store from two lanes.
3100   void st2(const VRegister& vt,
3101            const VRegister& vt2,
3102            int lane,
3103            const MemOperand& src);
3104 
3105   // Three-element structure store from three registers.
3106   void st3(const VRegister& vt,
3107            const VRegister& vt2,
3108            const VRegister& vt3,
3109            const MemOperand& src);
3110 
3111   // Three-element single structure store from three lanes.
3112   void st3(const VRegister& vt,
3113            const VRegister& vt2,
3114            const VRegister& vt3,
3115            int lane,
3116            const MemOperand& src);
3117 
3118   // Four-element structure store from four registers.
3119   void st4(const VRegister& vt,
3120            const VRegister& vt2,
3121            const VRegister& vt3,
3122            const VRegister& vt4,
3123            const MemOperand& src);
3124 
3125   // Four-element single structure store from four lanes.
3126   void st4(const VRegister& vt,
3127            const VRegister& vt2,
3128            const VRegister& vt3,
3129            const VRegister& vt4,
3130            int lane,
3131            const MemOperand& src);
3132 
3133   // Unsigned add long.
3134   void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3135 
3136   // Unsigned add long (second part).
3137   void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3138 
3139   // Unsigned add wide.
3140   void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3141 
3142   // Unsigned add wide (second part).
3143   void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3144 
3145   // Signed add long.
3146   void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3147 
3148   // Signed add long (second part).
3149   void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3150 
3151   // Signed add wide.
3152   void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3153 
3154   // Signed add wide (second part).
3155   void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3156 
3157   // Unsigned subtract long.
3158   void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3159 
3160   // Unsigned subtract long (second part).
3161   void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3162 
3163   // Unsigned subtract wide.
3164   void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3165 
3166   // Unsigned subtract wide (second part).
3167   void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3168 
3169   // Signed subtract long.
3170   void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3171 
3172   // Signed subtract long (second part).
3173   void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3174 
3175   // Signed integer subtract wide.
3176   void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3177 
3178   // Signed integer subtract wide (second part).
3179   void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3180 
3181   // Unsigned maximum.
3182   void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3183 
3184   // Unsigned pairwise maximum.
3185   void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3186 
3187   // Unsigned maximum across vector.
3188   void umaxv(const VRegister& vd, const VRegister& vn);
3189 
3190   // Unsigned minimum.
3191   void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3192 
3193   // Unsigned pairwise minimum.
3194   void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3195 
3196   // Unsigned minimum across vector.
3197   void uminv(const VRegister& vd, const VRegister& vn);
3198 
3199   // Transpose vectors (primary).
3200   void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3201 
3202   // Transpose vectors (secondary).
3203   void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3204 
3205   // Unzip vectors (primary).
3206   void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3207 
3208   // Unzip vectors (secondary).
3209   void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3210 
3211   // Zip vectors (primary).
3212   void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3213 
3214   // Zip vectors (secondary).
3215   void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3216 
3217   // Signed shift right by immediate.
3218   void sshr(const VRegister& vd, const VRegister& vn, int shift);
3219 
3220   // Unsigned shift right by immediate.
3221   void ushr(const VRegister& vd, const VRegister& vn, int shift);
3222 
3223   // Signed rounding shift right by immediate.
3224   void srshr(const VRegister& vd, const VRegister& vn, int shift);
3225 
3226   // Unsigned rounding shift right by immediate.
3227   void urshr(const VRegister& vd, const VRegister& vn, int shift);
3228 
3229   // Signed shift right by immediate and accumulate.
3230   void ssra(const VRegister& vd, const VRegister& vn, int shift);
3231 
3232   // Unsigned shift right by immediate and accumulate.
3233   void usra(const VRegister& vd, const VRegister& vn, int shift);
3234 
3235   // Signed rounding shift right by immediate and accumulate.
3236   void srsra(const VRegister& vd, const VRegister& vn, int shift);
3237 
3238   // Unsigned rounding shift right by immediate and accumulate.
3239   void ursra(const VRegister& vd, const VRegister& vn, int shift);
3240 
3241   // Shift right narrow by immediate.
3242   void shrn(const VRegister& vd, const VRegister& vn, int shift);
3243 
3244   // Shift right narrow by immediate (second part).
3245   void shrn2(const VRegister& vd, const VRegister& vn, int shift);
3246 
3247   // Rounding shift right narrow by immediate.
3248   void rshrn(const VRegister& vd, const VRegister& vn, int shift);
3249 
3250   // Rounding shift right narrow by immediate (second part).
3251   void rshrn2(const VRegister& vd, const VRegister& vn, int shift);
3252 
3253   // Unsigned saturating shift right narrow by immediate.
3254   void uqshrn(const VRegister& vd, const VRegister& vn, int shift);
3255 
3256   // Unsigned saturating shift right narrow by immediate (second part).
3257   void uqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3258 
3259   // Unsigned saturating rounding shift right narrow by immediate.
3260   void uqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3261 
3262   // Unsigned saturating rounding shift right narrow by immediate (second part).
3263   void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3264 
3265   // Signed saturating shift right narrow by immediate.
3266   void sqshrn(const VRegister& vd, const VRegister& vn, int shift);
3267 
3268   // Signed saturating shift right narrow by immediate (second part).
3269   void sqshrn2(const VRegister& vd, const VRegister& vn, int shift);
3270 
3271   // Signed saturating rounded shift right narrow by immediate.
3272   void sqrshrn(const VRegister& vd, const VRegister& vn, int shift);
3273 
3274   // Signed saturating rounded shift right narrow by immediate (second part).
3275   void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
3276 
3277   // Signed saturating shift right unsigned narrow by immediate.
3278   void sqshrun(const VRegister& vd, const VRegister& vn, int shift);
3279 
3280   // Signed saturating shift right unsigned narrow by immediate (second part).
3281   void sqshrun2(const VRegister& vd, const VRegister& vn, int shift);
3282 
3283   // Signed sat rounded shift right unsigned narrow by immediate.
3284   void sqrshrun(const VRegister& vd, const VRegister& vn, int shift);
3285 
3286   // Signed sat rounded shift right unsigned narrow by immediate (second part).
3287   void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift);
3288 
3289   // FP reciprocal step.
3290   void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3291 
3292   // FP reciprocal estimate.
3293   void frecpe(const VRegister& vd, const VRegister& vn);
3294 
3295   // FP reciprocal square root estimate.
3296   void frsqrte(const VRegister& vd, const VRegister& vn);
3297 
3298   // FP reciprocal square root step.
3299   void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3300 
3301   // Signed absolute difference and accumulate long.
3302   void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3303 
3304   // Signed absolute difference and accumulate long (second part).
3305   void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3306 
3307   // Unsigned absolute difference and accumulate long.
3308   void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3309 
3310   // Unsigned absolute difference and accumulate long (second part).
3311   void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3312 
3313   // Signed absolute difference long.
3314   void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3315 
3316   // Signed absolute difference long (second part).
3317   void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3318 
3319   // Unsigned absolute difference long.
3320   void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3321 
3322   // Unsigned absolute difference long (second part).
3323   void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3324 
3325   // Polynomial multiply long.
3326   void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3327 
3328   // Polynomial multiply long (second part).
3329   void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3330 
3331   // Signed long multiply-add.
3332   void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3333 
3334   // Signed long multiply-add (second part).
3335   void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3336 
3337   // Unsigned long multiply-add.
3338   void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3339 
3340   // Unsigned long multiply-add (second part).
3341   void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3342 
3343   // Signed long multiply-sub.
3344   void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3345 
3346   // Signed long multiply-sub (second part).
3347   void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3348 
3349   // Unsigned long multiply-sub.
3350   void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3351 
3352   // Unsigned long multiply-sub (second part).
3353   void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3354 
3355   // Signed long multiply.
3356   void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3357 
3358   // Signed long multiply (second part).
3359   void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3360 
3361   // Signed saturating doubling long multiply-add.
3362   void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3363 
3364   // Signed saturating doubling long multiply-add (second part).
3365   void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3366 
3367   // Signed saturating doubling long multiply-subtract.
3368   void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3369 
3370   // Signed saturating doubling long multiply-subtract (second part).
3371   void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3372 
3373   // Signed saturating doubling long multiply.
3374   void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3375 
3376   // Signed saturating doubling long multiply (second part).
3377   void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3378 
3379   // Signed saturating doubling multiply returning high half.
3380   void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3381 
3382   // Signed saturating rounding doubling multiply returning high half.
3383   void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3384 
3385   // Signed dot product [Armv8.2].
3386   void sdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3387 
3388   // Signed saturating rounding doubling multiply accumulate returning high
3389   // half [Armv8.1].
3390   void sqrdmlah(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3391 
3392   // Unsigned dot product [Armv8.2].
3393   void udot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3394 
3395   // Dot Product with unsigned and signed integers (vector).
3396   void usdot(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3397 
3398   // Dot product with signed and unsigned integers (vector, by element).
3399   void sudot(const VRegister& vd,
3400              const VRegister& vn,
3401              const VRegister& vm,
3402              int vm_index);
3403 
3404   // Dot product with unsigned and signed integers (vector, by element).
3405   void usdot(const VRegister& vd,
3406              const VRegister& vn,
3407              const VRegister& vm,
3408              int vm_index);
3409 
3410   // Signed saturating rounding doubling multiply subtract returning high half
3411   // [Armv8.1].
3412   void sqrdmlsh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3413 
3414   // Signed saturating doubling multiply element returning high half.
3415   void sqdmulh(const VRegister& vd,
3416                const VRegister& vn,
3417                const VRegister& vm,
3418                int vm_index);
3419 
3420   // Signed saturating rounding doubling multiply element returning high half.
3421   void sqrdmulh(const VRegister& vd,
3422                 const VRegister& vn,
3423                 const VRegister& vm,
3424                 int vm_index);
3425 
3426   // Signed dot product by element [Armv8.2].
3427   void sdot(const VRegister& vd,
3428             const VRegister& vn,
3429             const VRegister& vm,
3430             int vm_index);
3431 
3432   // Signed saturating rounding doubling multiply accumulate element returning
3433   // high half [Armv8.1].
3434   void sqrdmlah(const VRegister& vd,
3435                 const VRegister& vn,
3436                 const VRegister& vm,
3437                 int vm_index);
3438 
3439   // Unsigned dot product by element [Armv8.2].
3440   void udot(const VRegister& vd,
3441             const VRegister& vn,
3442             const VRegister& vm,
3443             int vm_index);
3444 
3445   // Signed saturating rounding doubling multiply subtract element returning
3446   // high half [Armv8.1].
3447   void sqrdmlsh(const VRegister& vd,
3448                 const VRegister& vn,
3449                 const VRegister& vm,
3450                 int vm_index);
3451 
3452   // Unsigned long multiply long.
3453   void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3454 
3455   // Unsigned long multiply (second part).
3456   void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3457 
3458   // Add narrow returning high half.
3459   void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3460 
3461   // Add narrow returning high half (second part).
3462   void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3463 
3464   // Rounding add narrow returning high half.
3465   void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3466 
3467   // Rounding add narrow returning high half (second part).
3468   void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3469 
3470   // Subtract narrow returning high half.
3471   void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3472 
3473   // Subtract narrow returning high half (second part).
3474   void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3475 
3476   // Rounding subtract narrow returning high half.
3477   void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3478 
3479   // Rounding subtract narrow returning high half (second part).
3480   void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3481 
3482   // FP vector multiply accumulate.
3483   void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3484 
3485   // FP fused multiply-add long to accumulator.
3486   void fmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3487 
3488   // FP fused multiply-add long to accumulator (second part).
3489   void fmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3490 
3491   // FP fused multiply-add long to accumulator by element.
3492   void fmlal(const VRegister& vd,
3493              const VRegister& vn,
3494              const VRegister& vm,
3495              int vm_index);
3496 
3497   // FP fused multiply-add long to accumulator by element (second part).
3498   void fmlal2(const VRegister& vd,
3499               const VRegister& vn,
3500               const VRegister& vm,
3501               int vm_index);
3502 
3503   // FP vector multiply subtract.
3504   void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3505 
3506   // FP fused multiply-subtract long to accumulator.
3507   void fmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3508 
3509   // FP fused multiply-subtract long to accumulator (second part).
3510   void fmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3511 
3512   // FP fused multiply-subtract long to accumulator by element.
3513   void fmlsl(const VRegister& vd,
3514              const VRegister& vn,
3515              const VRegister& vm,
3516              int vm_index);
3517 
3518   // FP fused multiply-subtract long to accumulator by element (second part).
3519   void fmlsl2(const VRegister& vd,
3520               const VRegister& vn,
3521               const VRegister& vm,
3522               int vm_index);
3523 
3524   // FP vector multiply extended.
3525   void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3526 
3527   // FP absolute greater than or equal.
3528   void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3529 
3530   // FP absolute greater than.
3531   void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3532 
3533   // FP multiply by element.
3534   void fmul(const VRegister& vd,
3535             const VRegister& vn,
3536             const VRegister& vm,
3537             int vm_index);
3538 
3539   // FP fused multiply-add to accumulator by element.
3540   void fmla(const VRegister& vd,
3541             const VRegister& vn,
3542             const VRegister& vm,
3543             int vm_index);
3544 
3545   // FP fused multiply-sub from accumulator by element.
3546   void fmls(const VRegister& vd,
3547             const VRegister& vn,
3548             const VRegister& vm,
3549             int vm_index);
3550 
3551   // FP multiply extended by element.
3552   void fmulx(const VRegister& vd,
3553              const VRegister& vn,
3554              const VRegister& vm,
3555              int vm_index);
3556 
3557   // FP compare equal.
3558   void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3559 
3560   // FP greater than.
3561   void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3562 
3563   // FP greater than or equal.
3564   void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3565 
3566   // FP compare equal to zero.
3567   void fcmeq(const VRegister& vd, const VRegister& vn, double imm);
3568 
3569   // FP greater than zero.
3570   void fcmgt(const VRegister& vd, const VRegister& vn, double imm);
3571 
3572   // FP greater than or equal to zero.
3573   void fcmge(const VRegister& vd, const VRegister& vn, double imm);
3574 
3575   // FP less than or equal to zero.
3576   void fcmle(const VRegister& vd, const VRegister& vn, double imm);
3577 
3578   // FP less than to zero.
3579   void fcmlt(const VRegister& vd, const VRegister& vn, double imm);
3580 
3581   // FP absolute difference.
3582   void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3583 
3584   // FP pairwise add vector.
3585   void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3586 
3587   // FP pairwise add scalar.
3588   void faddp(const VRegister& vd, const VRegister& vn);
3589 
3590   // FP pairwise maximum vector.
3591   void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3592 
3593   // FP pairwise maximum scalar.
3594   void fmaxp(const VRegister& vd, const VRegister& vn);
3595 
3596   // FP pairwise minimum vector.
3597   void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3598 
3599   // FP pairwise minimum scalar.
3600   void fminp(const VRegister& vd, const VRegister& vn);
3601 
3602   // FP pairwise maximum number vector.
3603   void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3604 
3605   // FP pairwise maximum number scalar.
3606   void fmaxnmp(const VRegister& vd, const VRegister& vn);
3607 
3608   // FP pairwise minimum number vector.
3609   void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3610 
3611   // FP pairwise minimum number scalar.
3612   void fminnmp(const VRegister& vd, const VRegister& vn);
3613 
3614   // v8.3 complex numbers - note that these are only partial/helper functions
3615   // and must be used in series in order to perform full CN operations.
3616 
3617   // FP complex multiply accumulate (by element) [Armv8.3].
3618   void fcmla(const VRegister& vd,
3619              const VRegister& vn,
3620              const VRegister& vm,
3621              int vm_index,
3622              int rot);
3623 
3624   // FP complex multiply accumulate [Armv8.3].
3625   void fcmla(const VRegister& vd,
3626              const VRegister& vn,
3627              const VRegister& vm,
3628              int rot);
3629 
3630   // FP complex add [Armv8.3].
3631   void fcadd(const VRegister& vd,
3632              const VRegister& vn,
3633              const VRegister& vm,
3634              int rot);
3635 
3636   // Signed 8-bit integer matrix multiply-accumulate (vector).
3637   void smmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3638 
3639   // Unsigned and signed 8-bit integer matrix multiply-accumulate (vector).
3640   void usmmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3641 
3642   // Unsigned 8-bit integer matrix multiply-accumulate (vector).
3643   void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
3644 
3645   // Scalable Vector Extensions.
3646 
3647   // Absolute value (predicated).
3648   void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3649 
3650   // Add vectors (predicated).
3651   void add(const ZRegister& zd,
3652            const PRegisterM& pg,
3653            const ZRegister& zn,
3654            const ZRegister& zm);
3655 
3656   // Add vectors (unpredicated).
3657   void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3658 
3659   // Add immediate (unpredicated).
3660   void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
3661 
3662   // Add multiple of predicate register size to scalar register.
3663   void addpl(const Register& xd, const Register& xn, int imm6);
3664 
3665   // Add multiple of vector register size to scalar register.
3666   void addvl(const Register& xd, const Register& xn, int imm6);
3667 
3668   // Compute vector address.
3669   void adr(const ZRegister& zd, const SVEMemOperand& addr);
3670 
3671   // Bitwise AND predicates.
3672   void and_(const PRegisterWithLaneSize& pd,
3673             const PRegisterZ& pg,
3674             const PRegisterWithLaneSize& pn,
3675             const PRegisterWithLaneSize& pm);
3676 
3677   // Bitwise AND vectors (predicated).
3678   void and_(const ZRegister& zd,
3679             const PRegisterM& pg,
3680             const ZRegister& zn,
3681             const ZRegister& zm);
3682 
3683   // Bitwise AND with immediate (unpredicated).
3684   void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3685 
3686   // Bitwise AND vectors (unpredicated).
3687   void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3688 
3689   // Bitwise AND predicates.
3690   void ands(const PRegisterWithLaneSize& pd,
3691             const PRegisterZ& pg,
3692             const PRegisterWithLaneSize& pn,
3693             const PRegisterWithLaneSize& pm);
3694 
3695   // Bitwise AND reduction to scalar.
3696   void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
3697 
3698   // Arithmetic shift right by immediate (predicated).
3699   void asr(const ZRegister& zd,
3700            const PRegisterM& pg,
3701            const ZRegister& zn,
3702            int shift);
3703 
3704   // Arithmetic shift right by 64-bit wide elements (predicated).
3705   void asr(const ZRegister& zd,
3706            const PRegisterM& pg,
3707            const ZRegister& zn,
3708            const ZRegister& zm);
3709 
3710   // Arithmetic shift right by immediate (unpredicated).
3711   void asr(const ZRegister& zd, const ZRegister& zn, int shift);
3712 
3713   // Arithmetic shift right by 64-bit wide elements (unpredicated).
3714   void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3715 
3716   // Arithmetic shift right for divide by immediate (predicated).
3717   void asrd(const ZRegister& zd,
3718             const PRegisterM& pg,
3719             const ZRegister& zn,
3720             int shift);
3721 
3722   // Reversed arithmetic shift right by vector (predicated).
3723   void asrr(const ZRegister& zd,
3724             const PRegisterM& pg,
3725             const ZRegister& zn,
3726             const ZRegister& zm);
3727 
3728   // Bitwise clear predicates.
3729   void bic(const PRegisterWithLaneSize& pd,
3730            const PRegisterZ& pg,
3731            const PRegisterWithLaneSize& pn,
3732            const PRegisterWithLaneSize& pm);
3733 
3734   // Bitwise clear vectors (predicated).
3735   void bic(const ZRegister& zd,
3736            const PRegisterM& pg,
3737            const ZRegister& zn,
3738            const ZRegister& zm);
3739 
3740   // Bitwise clear bits using immediate (unpredicated).
3741   void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
3742 
3743   // Bitwise clear vectors (unpredicated).
3744   void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
3745 
3746   // Bitwise clear predicates.
3747   void bics(const PRegisterWithLaneSize& pd,
3748             const PRegisterZ& pg,
3749             const PRegisterWithLaneSize& pn,
3750             const PRegisterWithLaneSize& pm);
3751 
3752   // Break after first true condition.
3753   void brka(const PRegisterWithLaneSize& pd,
3754             const PRegister& pg,
3755             const PRegisterWithLaneSize& pn);
3756 
3757   // Break after first true condition.
3758   void brkas(const PRegisterWithLaneSize& pd,
3759              const PRegisterZ& pg,
3760              const PRegisterWithLaneSize& pn);
3761 
3762   // Break before first true condition.
3763   void brkb(const PRegisterWithLaneSize& pd,
3764             const PRegister& pg,
3765             const PRegisterWithLaneSize& pn);
3766 
3767   // Break before first true condition.
3768   void brkbs(const PRegisterWithLaneSize& pd,
3769              const PRegisterZ& pg,
3770              const PRegisterWithLaneSize& pn);
3771 
3772   // Propagate break to next partition.
3773   void brkn(const PRegisterWithLaneSize& pd,
3774             const PRegisterZ& pg,
3775             const PRegisterWithLaneSize& pn,
3776             const PRegisterWithLaneSize& pm);
3777 
3778   // Propagate break to next partition.
3779   void brkns(const PRegisterWithLaneSize& pd,
3780              const PRegisterZ& pg,
3781              const PRegisterWithLaneSize& pn,
3782              const PRegisterWithLaneSize& pm);
3783 
3784   // Break after first true condition, propagating from previous partition.
3785   void brkpa(const PRegisterWithLaneSize& pd,
3786              const PRegisterZ& pg,
3787              const PRegisterWithLaneSize& pn,
3788              const PRegisterWithLaneSize& pm);
3789 
3790   // Break after first true condition, propagating from previous partition.
3791   void brkpas(const PRegisterWithLaneSize& pd,
3792               const PRegisterZ& pg,
3793               const PRegisterWithLaneSize& pn,
3794               const PRegisterWithLaneSize& pm);
3795 
3796   // Break before first true condition, propagating from previous partition.
3797   void brkpb(const PRegisterWithLaneSize& pd,
3798              const PRegisterZ& pg,
3799              const PRegisterWithLaneSize& pn,
3800              const PRegisterWithLaneSize& pm);
3801 
3802   // Break before first true condition, propagating from previous partition.
3803   void brkpbs(const PRegisterWithLaneSize& pd,
3804               const PRegisterZ& pg,
3805               const PRegisterWithLaneSize& pn,
3806               const PRegisterWithLaneSize& pm);
3807 
3808   // Conditionally extract element after last to general-purpose register.
3809   void clasta(const Register& rd,
3810               const PRegister& pg,
3811               const Register& rn,
3812               const ZRegister& zm);
3813 
3814   // Conditionally extract element after last to SIMD&FP scalar register.
3815   void clasta(const VRegister& vd,
3816               const PRegister& pg,
3817               const VRegister& vn,
3818               const ZRegister& zm);
3819 
3820   // Conditionally extract element after last to vector register.
3821   void clasta(const ZRegister& zd,
3822               const PRegister& pg,
3823               const ZRegister& zn,
3824               const ZRegister& zm);
3825 
3826   // Conditionally extract last element to general-purpose register.
3827   void clastb(const Register& rd,
3828               const PRegister& pg,
3829               const Register& rn,
3830               const ZRegister& zm);
3831 
3832   // Conditionally extract last element to SIMD&FP scalar register.
3833   void clastb(const VRegister& vd,
3834               const PRegister& pg,
3835               const VRegister& vn,
3836               const ZRegister& zm);
3837 
3838   // Conditionally extract last element to vector register.
3839   void clastb(const ZRegister& zd,
3840               const PRegister& pg,
3841               const ZRegister& zn,
3842               const ZRegister& zm);
3843 
3844   // Count leading sign bits (predicated).
3845   void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3846 
3847   // Count leading zero bits (predicated).
3848   void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3849 
3850   void cmp(Condition cond,
3851            const PRegisterWithLaneSize& pd,
3852            const PRegisterZ& pg,
3853            const ZRegister& zn,
3854            const ZRegister& zm);
3855 
3856   // Compare vector to 64-bit wide elements.
3857   void cmpeq(const PRegisterWithLaneSize& pd,
3858              const PRegisterZ& pg,
3859              const ZRegister& zn,
3860              const ZRegister& zm);
3861 
3862   // Compare vector to immediate.
3863   void cmpeq(const PRegisterWithLaneSize& pd,
3864              const PRegisterZ& pg,
3865              const ZRegister& zn,
3866              int imm5);
3867 
3868   // Compare vector to 64-bit wide elements.
3869   void cmpge(const PRegisterWithLaneSize& pd,
3870              const PRegisterZ& pg,
3871              const ZRegister& zn,
3872              const ZRegister& zm);
3873 
3874   // Compare vector to immediate.
3875   void cmpge(const PRegisterWithLaneSize& pd,
3876              const PRegisterZ& pg,
3877              const ZRegister& zn,
3878              int imm5);
3879 
3880   // Compare vector to 64-bit wide elements.
3881   void cmpgt(const PRegisterWithLaneSize& pd,
3882              const PRegisterZ& pg,
3883              const ZRegister& zn,
3884              const ZRegister& zm);
3885 
3886   // Compare vector to immediate.
3887   void cmpgt(const PRegisterWithLaneSize& pd,
3888              const PRegisterZ& pg,
3889              const ZRegister& zn,
3890              int imm5);
3891 
3892   // Compare vector to 64-bit wide elements.
3893   void cmphi(const PRegisterWithLaneSize& pd,
3894              const PRegisterZ& pg,
3895              const ZRegister& zn,
3896              const ZRegister& zm);
3897 
3898   // Compare vector to immediate.
3899   void cmphi(const PRegisterWithLaneSize& pd,
3900              const PRegisterZ& pg,
3901              const ZRegister& zn,
3902              unsigned imm7);
3903 
3904   // Compare vector to 64-bit wide elements.
3905   void cmphs(const PRegisterWithLaneSize& pd,
3906              const PRegisterZ& pg,
3907              const ZRegister& zn,
3908              const ZRegister& zm);
3909 
3910   // Compare vector to immediate.
3911   void cmphs(const PRegisterWithLaneSize& pd,
3912              const PRegisterZ& pg,
3913              const ZRegister& zn,
3914              unsigned imm7);
3915 
3916   // Compare vector to 64-bit wide elements.
3917   void cmple(const PRegisterWithLaneSize& pd,
3918              const PRegisterZ& pg,
3919              const ZRegister& zn,
3920              const ZRegister& zm);
3921 
3922   // Compare vector to immediate.
3923   void cmple(const PRegisterWithLaneSize& pd,
3924              const PRegisterZ& pg,
3925              const ZRegister& zn,
3926              int imm5);
3927 
3928   // Compare vector to 64-bit wide elements.
3929   void cmplo(const PRegisterWithLaneSize& pd,
3930              const PRegisterZ& pg,
3931              const ZRegister& zn,
3932              const ZRegister& zm);
3933 
3934   // Compare vector to immediate.
3935   void cmplo(const PRegisterWithLaneSize& pd,
3936              const PRegisterZ& pg,
3937              const ZRegister& zn,
3938              unsigned imm7);
3939 
3940   // Compare vector to 64-bit wide elements.
3941   void cmpls(const PRegisterWithLaneSize& pd,
3942              const PRegisterZ& pg,
3943              const ZRegister& zn,
3944              const ZRegister& zm);
3945 
3946   // Compare vector to immediate.
3947   void cmpls(const PRegisterWithLaneSize& pd,
3948              const PRegisterZ& pg,
3949              const ZRegister& zn,
3950              unsigned imm7);
3951 
3952   // Compare vector to 64-bit wide elements.
3953   void cmplt(const PRegisterWithLaneSize& pd,
3954              const PRegisterZ& pg,
3955              const ZRegister& zn,
3956              const ZRegister& zm);
3957 
3958   // Compare vector to immediate.
3959   void cmplt(const PRegisterWithLaneSize& pd,
3960              const PRegisterZ& pg,
3961              const ZRegister& zn,
3962              int imm5);
3963 
3964   // Compare vector to 64-bit wide elements.
3965   void cmpne(const PRegisterWithLaneSize& pd,
3966              const PRegisterZ& pg,
3967              const ZRegister& zn,
3968              const ZRegister& zm);
3969 
3970   // Compare vector to immediate.
3971   void cmpne(const PRegisterWithLaneSize& pd,
3972              const PRegisterZ& pg,
3973              const ZRegister& zn,
3974              int imm5);
3975 
3976   // Logically invert boolean condition in vector (predicated).
3977   void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3978 
3979   // Count non-zero bits (predicated).
3980   void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
3981 
3982   // Set scalar to multiple of predicate constraint element count.
3983   void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3984 
3985   // Set scalar to multiple of predicate constraint element count.
3986   void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3987 
3988   // Set scalar to multiple of predicate constraint element count.
3989   void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3990 
3991   // Set scalar to active predicate element count.
3992   void cntp(const Register& xd,
3993             const PRegister& pg,
3994             const PRegisterWithLaneSize& pn);
3995 
3996   // Set scalar to multiple of predicate constraint element count.
3997   void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1);
3998 
3999   // Shuffle active elements of vector to the right and fill with zero.
4000   void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
4001 
4002   // Copy signed integer immediate to vector elements (predicated).
4003   void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
4004 
4005   // Copy general-purpose register to vector elements (predicated).
4006   void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
4007 
4008   // Copy SIMD&FP scalar register to vector elements (predicated).
4009   void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
4010 
4011   // Compare and terminate loop.
4012   void ctermeq(const Register& rn, const Register& rm);
4013 
4014   // Compare and terminate loop.
4015   void ctermne(const Register& rn, const Register& rm);
4016 
4017   // Decrement scalar by multiple of predicate constraint element count.
4018   void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4019 
4020   // Decrement scalar by multiple of predicate constraint element count.
4021   void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4022 
4023   // Decrement vector by multiple of predicate constraint element count.
4024   void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4025 
4026   // Decrement scalar by multiple of predicate constraint element count.
4027   void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4028 
4029   // Decrement vector by multiple of predicate constraint element count.
4030   void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4031 
4032   // Decrement scalar by active predicate element count.
4033   void decp(const Register& rdn, const PRegisterWithLaneSize& pg);
4034 
4035   // Decrement vector by active predicate element count.
4036   void decp(const ZRegister& zdn, const PRegister& pg);
4037 
4038   // Decrement scalar by multiple of predicate constraint element count.
4039   void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4040 
4041   // Decrement vector by multiple of predicate constraint element count.
4042   void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4043 
4044   // Broadcast general-purpose register to vector elements (unpredicated).
4045   void dup(const ZRegister& zd, const Register& xn);
4046 
4047   // Broadcast indexed element to vector (unpredicated).
4048   void dup(const ZRegister& zd, const ZRegister& zn, unsigned index);
4049 
4050   // As for movz/movk/movn, if the default shift of -1 is specified to dup, the
4051   // assembler will pick an appropriate immediate and left shift that is
4052   // equivalent to the immediate argument. If an explicit left shift is
4053   // specified (0 or 8), the immediate must be a signed 8-bit integer.
4054 
4055   // Broadcast signed immediate to vector elements (unpredicated).
4056   void dup(const ZRegister& zd, int imm8, int shift = -1);
4057 
4058   // Broadcast logical bitmask immediate to vector (unpredicated).
4059   void dupm(const ZRegister& zd, uint64_t imm);
4060 
4061   // Bitwise exclusive OR with inverted immediate (unpredicated).
4062   void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4063 
4064   // Bitwise exclusive OR predicates.
4065   void eor(const PRegisterWithLaneSize& pd,
4066            const PRegisterZ& pg,
4067            const PRegisterWithLaneSize& pn,
4068            const PRegisterWithLaneSize& pm);
4069 
4070   // Bitwise exclusive OR vectors (predicated).
4071   void eor(const ZRegister& zd,
4072            const PRegisterM& pg,
4073            const ZRegister& zn,
4074            const ZRegister& zm);
4075 
4076   // Bitwise exclusive OR with immediate (unpredicated).
4077   void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
4078 
4079   // Bitwise exclusive OR vectors (unpredicated).
4080   void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4081 
4082   // Bitwise exclusive OR predicates.
4083   void eors(const PRegisterWithLaneSize& pd,
4084             const PRegisterZ& pg,
4085             const PRegisterWithLaneSize& pn,
4086             const PRegisterWithLaneSize& pm);
4087 
4088   // Bitwise XOR reduction to scalar.
4089   void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4090 
4091   // Extract vector from pair of vectors.
4092   void ext(const ZRegister& zd,
4093            const ZRegister& zn,
4094            const ZRegister& zm,
4095            unsigned offset);
4096 
4097   // Floating-point absolute difference (predicated).
4098   void fabd(const ZRegister& zd,
4099             const PRegisterM& pg,
4100             const ZRegister& zn,
4101             const ZRegister& zm);
4102 
4103   // Floating-point absolute value (predicated).
4104   void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4105 
4106   // Floating-point absolute compare vectors.
4107   void facge(const PRegisterWithLaneSize& pd,
4108              const PRegisterZ& pg,
4109              const ZRegister& zn,
4110              const ZRegister& zm);
4111 
4112   // Floating-point absolute compare vectors.
4113   void facgt(const PRegisterWithLaneSize& pd,
4114              const PRegisterZ& pg,
4115              const ZRegister& zn,
4116              const ZRegister& zm);
4117 
4118   // Floating-point add immediate (predicated).
4119   void fadd(const ZRegister& zd,
4120             const PRegisterM& pg,
4121             const ZRegister& zn,
4122             double imm);
4123 
4124   // Floating-point add vector (predicated).
4125   void fadd(const ZRegister& zd,
4126             const PRegisterM& pg,
4127             const ZRegister& zn,
4128             const ZRegister& zm);
4129 
4130   // Floating-point add vector (unpredicated).
4131   void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4132 
4133   // Floating-point add strictly-ordered reduction, accumulating in scalar.
4134   void fadda(const VRegister& vd,
4135              const PRegister& pg,
4136              const VRegister& vn,
4137              const ZRegister& zm);
4138 
4139   // Floating-point add recursive reduction to scalar.
4140   void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4141 
4142   // Floating-point complex add with rotate (predicated).
4143   void fcadd(const ZRegister& zd,
4144              const PRegisterM& pg,
4145              const ZRegister& zn,
4146              const ZRegister& zm,
4147              int rot);
4148 
4149   // Floating-point compare vector with zero.
4150   void fcmeq(const PRegisterWithLaneSize& pd,
4151              const PRegisterZ& pg,
4152              const ZRegister& zn,
4153              double zero);
4154 
4155   // Floating-point compare vectors.
4156   void fcmeq(const PRegisterWithLaneSize& pd,
4157              const PRegisterZ& pg,
4158              const ZRegister& zn,
4159              const ZRegister& zm);
4160 
4161   // Floating-point compare vector with zero.
4162   void fcmge(const PRegisterWithLaneSize& pd,
4163              const PRegisterZ& pg,
4164              const ZRegister& zn,
4165              double zero);
4166 
4167   // Floating-point compare vectors.
4168   void fcmge(const PRegisterWithLaneSize& pd,
4169              const PRegisterZ& pg,
4170              const ZRegister& zn,
4171              const ZRegister& zm);
4172 
4173   // Floating-point compare vector with zero.
4174   void fcmgt(const PRegisterWithLaneSize& pd,
4175              const PRegisterZ& pg,
4176              const ZRegister& zn,
4177              double zero);
4178 
4179   // Floating-point compare vectors.
4180   void fcmgt(const PRegisterWithLaneSize& pd,
4181              const PRegisterZ& pg,
4182              const ZRegister& zn,
4183              const ZRegister& zm);
4184 
4185   // Floating-point complex multiply-add with rotate (predicated).
4186   void fcmla(const ZRegister& zda,
4187              const PRegisterM& pg,
4188              const ZRegister& zn,
4189              const ZRegister& zm,
4190              int rot);
4191 
4192   // Floating-point complex multiply-add by indexed values with rotate.
4193   void fcmla(const ZRegister& zda,
4194              const ZRegister& zn,
4195              const ZRegister& zm,
4196              int index,
4197              int rot);
4198 
4199   // Floating-point compare vector with zero.
4200   void fcmle(const PRegisterWithLaneSize& pd,
4201              const PRegisterZ& pg,
4202              const ZRegister& zn,
4203              double zero);
4204 
4205   // Floating-point compare vector with zero.
4206   void fcmlt(const PRegisterWithLaneSize& pd,
4207              const PRegisterZ& pg,
4208              const ZRegister& zn,
4209              double zero);
4210 
4211   // Floating-point compare vector with zero.
4212   void fcmne(const PRegisterWithLaneSize& pd,
4213              const PRegisterZ& pg,
4214              const ZRegister& zn,
4215              double zero);
4216 
4217   // Floating-point compare vectors.
4218   void fcmne(const PRegisterWithLaneSize& pd,
4219              const PRegisterZ& pg,
4220              const ZRegister& zn,
4221              const ZRegister& zm);
4222 
4223   // Floating-point compare vectors.
4224   void fcmuo(const PRegisterWithLaneSize& pd,
4225              const PRegisterZ& pg,
4226              const ZRegister& zn,
4227              const ZRegister& zm);
4228 
4229   // Copy floating-point immediate to vector elements (predicated).
4230   void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm);
4231 
4232   // Copy half-precision floating-point immediate to vector elements
4233   // (predicated).
fcpy(const ZRegister & zd,const PRegisterM & pg,Float16 imm)4234   void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) {
4235     fcpy(zd, pg, FPToDouble(imm, kIgnoreDefaultNaN));
4236   }
4237 
4238   // Floating-point convert precision (predicated).
4239   void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4240 
4241   // Floating-point convert to signed integer, rounding toward zero
4242   // (predicated).
4243   void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4244 
4245   // Floating-point convert to unsigned integer, rounding toward zero
4246   // (predicated).
4247   void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4248 
4249   // Floating-point divide by vector (predicated).
4250   void fdiv(const ZRegister& zd,
4251             const PRegisterM& pg,
4252             const ZRegister& zn,
4253             const ZRegister& zm);
4254 
4255   // Floating-point reversed divide by vector (predicated).
4256   void fdivr(const ZRegister& zd,
4257              const PRegisterM& pg,
4258              const ZRegister& zn,
4259              const ZRegister& zm);
4260 
4261   // Broadcast floating-point immediate to vector elements.
4262   void fdup(const ZRegister& zd, double imm);
4263 
4264   // Broadcast half-precision floating-point immediate to vector elements.
fdup(const ZRegister & zd,Float16 imm)4265   void fdup(const ZRegister& zd, Float16 imm) {
4266     fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
4267   }
4268 
4269   // Floating-point exponential accelerator.
4270   void fexpa(const ZRegister& zd, const ZRegister& zn);
4271 
4272   // Floating-point fused multiply-add vectors (predicated), writing
4273   // multiplicand [Zdn = Za + Zdn * Zm].
4274   void fmad(const ZRegister& zdn,
4275             const PRegisterM& pg,
4276             const ZRegister& zm,
4277             const ZRegister& za);
4278 
4279   // Floating-point maximum with immediate (predicated).
4280   void fmax(const ZRegister& zd,
4281             const PRegisterM& pg,
4282             const ZRegister& zn,
4283             double imm);
4284 
4285   // Floating-point maximum (predicated).
4286   void fmax(const ZRegister& zd,
4287             const PRegisterM& pg,
4288             const ZRegister& zn,
4289             const ZRegister& zm);
4290 
4291   // Floating-point maximum number with immediate (predicated).
4292   void fmaxnm(const ZRegister& zd,
4293               const PRegisterM& pg,
4294               const ZRegister& zn,
4295               double imm);
4296 
4297   // Floating-point maximum number (predicated).
4298   void fmaxnm(const ZRegister& zd,
4299               const PRegisterM& pg,
4300               const ZRegister& zn,
4301               const ZRegister& zm);
4302 
4303   // Floating-point maximum number recursive reduction to scalar.
4304   void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4305 
4306   // Floating-point maximum recursive reduction to scalar.
4307   void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4308 
4309   // Floating-point minimum with immediate (predicated).
4310   void fmin(const ZRegister& zd,
4311             const PRegisterM& pg,
4312             const ZRegister& zn,
4313             double imm);
4314 
4315   // Floating-point minimum (predicated).
4316   void fmin(const ZRegister& zd,
4317             const PRegisterM& pg,
4318             const ZRegister& zn,
4319             const ZRegister& zm);
4320 
4321   // Floating-point minimum number with immediate (predicated).
4322   void fminnm(const ZRegister& zd,
4323               const PRegisterM& pg,
4324               const ZRegister& zn,
4325               double imm);
4326 
4327   // Floating-point minimum number (predicated).
4328   void fminnm(const ZRegister& zd,
4329               const PRegisterM& pg,
4330               const ZRegister& zn,
4331               const ZRegister& zm);
4332 
4333   // Floating-point minimum number recursive reduction to scalar.
4334   void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4335 
4336   // Floating-point minimum recursive reduction to scalar.
4337   void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4338 
4339   // Floating-point fused multiply-add vectors (predicated), writing addend
4340   // [Zda = Zda + Zn * Zm].
4341   void fmla(const ZRegister& zda,
4342             const PRegisterM& pg,
4343             const ZRegister& zn,
4344             const ZRegister& zm);
4345 
4346   // Floating-point fused multiply-add by indexed elements
4347   // (Zda = Zda + Zn * Zm[indexed]).
4348   void fmla(const ZRegister& zda,
4349             const ZRegister& zn,
4350             const ZRegister& zm,
4351             int index);
4352 
4353   // Floating-point fused multiply-subtract vectors (predicated), writing
4354   // addend [Zda = Zda + -Zn * Zm].
4355   void fmls(const ZRegister& zda,
4356             const PRegisterM& pg,
4357             const ZRegister& zn,
4358             const ZRegister& zm);
4359 
4360   // Floating-point fused multiply-subtract by indexed elements
4361   // (Zda = Zda + -Zn * Zm[indexed]).
4362   void fmls(const ZRegister& zda,
4363             const ZRegister& zn,
4364             const ZRegister& zm,
4365             int index);
4366 
4367   // Move 8-bit floating-point immediate to vector elements (unpredicated).
4368   void fmov(const ZRegister& zd, double imm);
4369 
4370   // Move 8-bit floating-point immediate to vector elements (predicated).
4371   void fmov(const ZRegister& zd, const PRegisterM& pg, double imm);
4372 
4373   // Floating-point fused multiply-subtract vectors (predicated), writing
4374   // multiplicand [Zdn = Za + -Zdn * Zm].
4375   void fmsb(const ZRegister& zdn,
4376             const PRegisterM& pg,
4377             const ZRegister& zm,
4378             const ZRegister& za);
4379 
4380   // Floating-point multiply by immediate (predicated).
4381   void fmul(const ZRegister& zd,
4382             const PRegisterM& pg,
4383             const ZRegister& zn,
4384             double imm);
4385 
4386   // Floating-point multiply vectors (predicated).
4387   void fmul(const ZRegister& zd,
4388             const PRegisterM& pg,
4389             const ZRegister& zn,
4390             const ZRegister& zm);
4391 
4392   // Floating-point multiply by indexed elements.
4393   void fmul(const ZRegister& zd,
4394             const ZRegister& zn,
4395             const ZRegister& zm,
4396             unsigned index);
4397 
4398   // Floating-point multiply vectors (unpredicated).
4399   void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4400 
4401   // Floating-point multiply-extended vectors (predicated).
4402   void fmulx(const ZRegister& zd,
4403              const PRegisterM& pg,
4404              const ZRegister& zn,
4405              const ZRegister& zm);
4406 
4407   // Floating-point negate (predicated).
4408   void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4409 
4410   // Floating-point negated fused multiply-add vectors (predicated), writing
4411   // multiplicand [Zdn = -Za + -Zdn * Zm].
4412   void fnmad(const ZRegister& zdn,
4413              const PRegisterM& pg,
4414              const ZRegister& zm,
4415              const ZRegister& za);
4416 
4417   // Floating-point negated fused multiply-add vectors (predicated), writing
4418   // addend [Zda = -Zda + -Zn * Zm].
4419   void fnmla(const ZRegister& zda,
4420              const PRegisterM& pg,
4421              const ZRegister& zn,
4422              const ZRegister& zm);
4423 
4424   // Floating-point negated fused multiply-subtract vectors (predicated),
4425   // writing addend [Zda = -Zda + Zn * Zm].
4426   void fnmls(const ZRegister& zda,
4427              const PRegisterM& pg,
4428              const ZRegister& zn,
4429              const ZRegister& zm);
4430 
4431   // Floating-point negated fused multiply-subtract vectors (predicated),
4432   // writing multiplicand [Zdn = -Za + Zdn * Zm].
4433   void fnmsb(const ZRegister& zdn,
4434              const PRegisterM& pg,
4435              const ZRegister& zm,
4436              const ZRegister& za);
4437 
4438   // Floating-point reciprocal estimate (unpredicated).
4439   void frecpe(const ZRegister& zd, const ZRegister& zn);
4440 
4441   // Floating-point reciprocal step (unpredicated).
4442   void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4443 
4444   // Floating-point reciprocal exponent (predicated).
4445   void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4446 
4447   // Floating-point round to integral value (predicated).
4448   void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4449 
4450   // Floating-point round to integral value (predicated).
4451   void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4452 
4453   // Floating-point round to integral value (predicated).
4454   void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4455 
4456   // Floating-point round to integral value (predicated).
4457   void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4458 
4459   // Floating-point round to integral value (predicated).
4460   void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4461 
4462   // Floating-point round to integral value (predicated).
4463   void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4464 
4465   // Floating-point round to integral value (predicated).
4466   void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4467 
4468   // Floating-point reciprocal square root estimate (unpredicated).
4469   void frsqrte(const ZRegister& zd, const ZRegister& zn);
4470 
4471   // Floating-point reciprocal square root step (unpredicated).
4472   void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4473 
4474   // Floating-point adjust exponent by vector (predicated).
4475   void fscale(const ZRegister& zd,
4476               const PRegisterM& pg,
4477               const ZRegister& zn,
4478               const ZRegister& zm);
4479 
4480   // Floating-point square root (predicated).
4481   void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
4482 
4483   // Floating-point subtract immediate (predicated).
4484   void fsub(const ZRegister& zd,
4485             const PRegisterM& pg,
4486             const ZRegister& zn,
4487             double imm);
4488 
4489   // Floating-point subtract vectors (predicated).
4490   void fsub(const ZRegister& zd,
4491             const PRegisterM& pg,
4492             const ZRegister& zn,
4493             const ZRegister& zm);
4494 
4495   // Floating-point subtract vectors (unpredicated).
4496   void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4497 
4498   // Floating-point reversed subtract from immediate (predicated).
4499   void fsubr(const ZRegister& zd,
4500              const PRegisterM& pg,
4501              const ZRegister& zn,
4502              double imm);
4503 
4504   // Floating-point reversed subtract vectors (predicated).
4505   void fsubr(const ZRegister& zd,
4506              const PRegisterM& pg,
4507              const ZRegister& zn,
4508              const ZRegister& zm);
4509 
4510   // Floating-point trigonometric multiply-add coefficient.
4511   void ftmad(const ZRegister& zd,
4512              const ZRegister& zn,
4513              const ZRegister& zm,
4514              int imm3);
4515 
4516   // Floating-point trigonometric starting value.
4517   void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4518 
4519   // Floating-point trigonometric select coefficient.
4520   void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4521 
4522   // Increment scalar by multiple of predicate constraint element count.
4523   void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4524 
4525   // Increment scalar by multiple of predicate constraint element count.
4526   void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4527 
4528   // Increment vector by multiple of predicate constraint element count.
4529   void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4530 
4531   // Increment scalar by multiple of predicate constraint element count.
4532   void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4533 
4534   // Increment vector by multiple of predicate constraint element count.
4535   void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4536 
4537   // Increment scalar by active predicate element count.
4538   void incp(const Register& rdn, const PRegisterWithLaneSize& pg);
4539 
4540   // Increment vector by active predicate element count.
4541   void incp(const ZRegister& zdn, const PRegister& pg);
4542 
4543   // Increment scalar by multiple of predicate constraint element count.
4544   void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1);
4545 
4546   // Increment vector by multiple of predicate constraint element count.
4547   void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
4548 
4549   // Create index starting from and incremented by immediate.
4550   void index(const ZRegister& zd, int start, int step);
4551 
4552   // Create index starting from and incremented by general-purpose register.
4553   void index(const ZRegister& zd, const Register& rn, const Register& rm);
4554 
4555   // Create index starting from general-purpose register and incremented by
4556   // immediate.
4557   void index(const ZRegister& zd, const Register& rn, int imm5);
4558 
4559   // Create index starting from immediate and incremented by general-purpose
4560   // register.
4561   void index(const ZRegister& zd, int imm5, const Register& rm);
4562 
4563   // Insert general-purpose register in shifted vector.
4564   void insr(const ZRegister& zdn, const Register& rm);
4565 
4566   // Insert SIMD&FP scalar register in shifted vector.
4567   void insr(const ZRegister& zdn, const VRegister& vm);
4568 
4569   // Extract element after last to general-purpose register.
4570   void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn);
4571 
4572   // Extract element after last to SIMD&FP scalar register.
4573   void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4574 
4575   // Extract last element to general-purpose register.
4576   void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn);
4577 
4578   // Extract last element to SIMD&FP scalar register.
4579   void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
4580 
4581   // Contiguous/gather load bytes to vector.
4582   void ld1b(const ZRegister& zt,
4583             const PRegisterZ& pg,
4584             const SVEMemOperand& addr);
4585 
4586   // Contiguous/gather load halfwords to vector.
4587   void ld1h(const ZRegister& zt,
4588             const PRegisterZ& pg,
4589             const SVEMemOperand& addr);
4590 
4591   // Contiguous/gather load words to vector.
4592   void ld1w(const ZRegister& zt,
4593             const PRegisterZ& pg,
4594             const SVEMemOperand& addr);
4595 
4596   // Contiguous/gather load doublewords to vector.
4597   void ld1d(const ZRegister& zt,
4598             const PRegisterZ& pg,
4599             const SVEMemOperand& addr);
4600 
4601   // TODO: Merge other loads into the SVEMemOperand versions.
4602 
4603   // Load and broadcast unsigned byte to vector.
4604   void ld1rb(const ZRegister& zt,
4605              const PRegisterZ& pg,
4606              const SVEMemOperand& addr);
4607 
4608   // Load and broadcast unsigned halfword to vector.
4609   void ld1rh(const ZRegister& zt,
4610              const PRegisterZ& pg,
4611              const SVEMemOperand& addr);
4612 
4613   // Load and broadcast unsigned word to vector.
4614   void ld1rw(const ZRegister& zt,
4615              const PRegisterZ& pg,
4616              const SVEMemOperand& addr);
4617 
4618   // Load and broadcast doubleword to vector.
4619   void ld1rd(const ZRegister& zt,
4620              const PRegisterZ& pg,
4621              const SVEMemOperand& addr);
4622 
4623   // Contiguous load and replicate sixteen bytes.
4624   void ld1rqb(const ZRegister& zt,
4625               const PRegisterZ& pg,
4626               const SVEMemOperand& addr);
4627 
4628   // Contiguous load and replicate eight halfwords.
4629   void ld1rqh(const ZRegister& zt,
4630               const PRegisterZ& pg,
4631               const SVEMemOperand& addr);
4632 
4633   // Contiguous load and replicate four words.
4634   void ld1rqw(const ZRegister& zt,
4635               const PRegisterZ& pg,
4636               const SVEMemOperand& addr);
4637 
4638   // Contiguous load and replicate two doublewords.
4639   void ld1rqd(const ZRegister& zt,
4640               const PRegisterZ& pg,
4641               const SVEMemOperand& addr);
4642 
4643   // Contiguous load and replicate thirty-two bytes.
4644   void ld1rob(const ZRegister& zt,
4645               const PRegisterZ& pg,
4646               const SVEMemOperand& addr);
4647 
4648   // Contiguous load and replicate sixteen halfwords.
4649   void ld1roh(const ZRegister& zt,
4650               const PRegisterZ& pg,
4651               const SVEMemOperand& addr);
4652 
4653   // Contiguous load and replicate eight words.
4654   void ld1row(const ZRegister& zt,
4655               const PRegisterZ& pg,
4656               const SVEMemOperand& addr);
4657 
4658   // Contiguous load and replicate four doublewords.
4659   void ld1rod(const ZRegister& zt,
4660               const PRegisterZ& pg,
4661               const SVEMemOperand& addr);
4662 
4663   // Load and broadcast signed byte to vector.
4664   void ld1rsb(const ZRegister& zt,
4665               const PRegisterZ& pg,
4666               const SVEMemOperand& addr);
4667 
4668   // Load and broadcast signed halfword to vector.
4669   void ld1rsh(const ZRegister& zt,
4670               const PRegisterZ& pg,
4671               const SVEMemOperand& addr);
4672 
4673   // Load and broadcast signed word to vector.
4674   void ld1rsw(const ZRegister& zt,
4675               const PRegisterZ& pg,
4676               const SVEMemOperand& addr);
4677 
4678   // Contiguous/gather load signed bytes to vector.
4679   void ld1sb(const ZRegister& zt,
4680              const PRegisterZ& pg,
4681              const SVEMemOperand& addr);
4682 
4683   // Contiguous/gather load signed halfwords to vector.
4684   void ld1sh(const ZRegister& zt,
4685              const PRegisterZ& pg,
4686              const SVEMemOperand& addr);
4687 
4688   // Contiguous/gather load signed words to vector.
4689   void ld1sw(const ZRegister& zt,
4690              const PRegisterZ& pg,
4691              const SVEMemOperand& addr);
4692 
4693   // TODO: Merge other loads into the SVEMemOperand versions.
4694 
4695   // Contiguous load two-byte structures to two vectors.
4696   void ld2b(const ZRegister& zt1,
4697             const ZRegister& zt2,
4698             const PRegisterZ& pg,
4699             const SVEMemOperand& addr);
4700 
4701   // Contiguous load two-halfword structures to two vectors.
4702   void ld2h(const ZRegister& zt1,
4703             const ZRegister& zt2,
4704             const PRegisterZ& pg,
4705             const SVEMemOperand& addr);
4706 
4707   // Contiguous load two-word structures to two vectors.
4708   void ld2w(const ZRegister& zt1,
4709             const ZRegister& zt2,
4710             const PRegisterZ& pg,
4711             const SVEMemOperand& addr);
4712 
4713   // Contiguous load two-doubleword structures to two vectors.
4714   void ld2d(const ZRegister& zt1,
4715             const ZRegister& zt2,
4716             const PRegisterZ& pg,
4717             const SVEMemOperand& addr);
4718 
4719   // Contiguous load three-byte structures to three vectors.
4720   void ld3b(const ZRegister& zt1,
4721             const ZRegister& zt2,
4722             const ZRegister& zt3,
4723             const PRegisterZ& pg,
4724             const SVEMemOperand& addr);
4725 
4726   // Contiguous load three-halfword structures to three vectors.
4727   void ld3h(const ZRegister& zt1,
4728             const ZRegister& zt2,
4729             const ZRegister& zt3,
4730             const PRegisterZ& pg,
4731             const SVEMemOperand& addr);
4732 
4733   // Contiguous load three-word structures to three vectors.
4734   void ld3w(const ZRegister& zt1,
4735             const ZRegister& zt2,
4736             const ZRegister& zt3,
4737             const PRegisterZ& pg,
4738             const SVEMemOperand& addr);
4739 
4740   // Contiguous load three-doubleword structures to three vectors.
4741   void ld3d(const ZRegister& zt1,
4742             const ZRegister& zt2,
4743             const ZRegister& zt3,
4744             const PRegisterZ& pg,
4745             const SVEMemOperand& addr);
4746 
4747   // Contiguous load four-byte structures to four vectors.
4748   void ld4b(const ZRegister& zt1,
4749             const ZRegister& zt2,
4750             const ZRegister& zt3,
4751             const ZRegister& zt4,
4752             const PRegisterZ& pg,
4753             const SVEMemOperand& addr);
4754 
4755   // Contiguous load four-halfword structures to four vectors.
4756   void ld4h(const ZRegister& zt1,
4757             const ZRegister& zt2,
4758             const ZRegister& zt3,
4759             const ZRegister& zt4,
4760             const PRegisterZ& pg,
4761             const SVEMemOperand& addr);
4762 
4763   // Contiguous load four-word structures to four vectors.
4764   void ld4w(const ZRegister& zt1,
4765             const ZRegister& zt2,
4766             const ZRegister& zt3,
4767             const ZRegister& zt4,
4768             const PRegisterZ& pg,
4769             const SVEMemOperand& addr);
4770 
4771   // Contiguous load four-doubleword structures to four vectors.
4772   void ld4d(const ZRegister& zt1,
4773             const ZRegister& zt2,
4774             const ZRegister& zt3,
4775             const ZRegister& zt4,
4776             const PRegisterZ& pg,
4777             const SVEMemOperand& addr);
4778 
4779   // Contiguous load first-fault unsigned bytes to vector.
4780   void ldff1b(const ZRegister& zt,
4781               const PRegisterZ& pg,
4782               const SVEMemOperand& addr);
4783 
4784   // Contiguous load first-fault unsigned halfwords to vector.
4785   void ldff1h(const ZRegister& zt,
4786               const PRegisterZ& pg,
4787               const SVEMemOperand& addr);
4788 
4789   // Contiguous load first-fault unsigned words to vector.
4790   void ldff1w(const ZRegister& zt,
4791               const PRegisterZ& pg,
4792               const SVEMemOperand& addr);
4793 
4794   // Contiguous load first-fault doublewords to vector.
4795   void ldff1d(const ZRegister& zt,
4796               const PRegisterZ& pg,
4797               const SVEMemOperand& addr);
4798 
4799   // Contiguous load first-fault signed bytes to vector.
4800   void ldff1sb(const ZRegister& zt,
4801                const PRegisterZ& pg,
4802                const SVEMemOperand& addr);
4803 
4804   // Contiguous load first-fault signed halfwords to vector.
4805   void ldff1sh(const ZRegister& zt,
4806                const PRegisterZ& pg,
4807                const SVEMemOperand& addr);
4808 
4809   // Contiguous load first-fault signed words to vector.
4810   void ldff1sw(const ZRegister& zt,
4811                const PRegisterZ& pg,
4812                const SVEMemOperand& addr);
4813 
4814   // Gather load first-fault unsigned bytes to vector.
4815   void ldff1b(const ZRegister& zt,
4816               const PRegisterZ& pg,
4817               const Register& xn,
4818               const ZRegister& zm);
4819 
4820   // Gather load first-fault unsigned bytes to vector (immediate index).
4821   void ldff1b(const ZRegister& zt,
4822               const PRegisterZ& pg,
4823               const ZRegister& zn,
4824               int imm5);
4825 
4826   // Gather load first-fault doublewords to vector (vector index).
4827   void ldff1d(const ZRegister& zt,
4828               const PRegisterZ& pg,
4829               const Register& xn,
4830               const ZRegister& zm);
4831 
4832   // Gather load first-fault doublewords to vector (immediate index).
4833   void ldff1d(const ZRegister& zt,
4834               const PRegisterZ& pg,
4835               const ZRegister& zn,
4836               int imm5);
4837 
4838   // Gather load first-fault unsigned halfwords to vector (vector index).
4839   void ldff1h(const ZRegister& zt,
4840               const PRegisterZ& pg,
4841               const Register& xn,
4842               const ZRegister& zm);
4843 
4844   // Gather load first-fault unsigned halfwords to vector (immediate index).
4845   void ldff1h(const ZRegister& zt,
4846               const PRegisterZ& pg,
4847               const ZRegister& zn,
4848               int imm5);
4849 
4850   // Gather load first-fault signed bytes to vector (vector index).
4851   void ldff1sb(const ZRegister& zt,
4852                const PRegisterZ& pg,
4853                const Register& xn,
4854                const ZRegister& zm);
4855 
4856   // Gather load first-fault signed bytes to vector (immediate index).
4857   void ldff1sb(const ZRegister& zt,
4858                const PRegisterZ& pg,
4859                const ZRegister& zn,
4860                int imm5);
4861 
4862   // Gather load first-fault signed halfwords to vector (vector index).
4863   void ldff1sh(const ZRegister& zt,
4864                const PRegisterZ& pg,
4865                const Register& xn,
4866                const ZRegister& zm);
4867 
4868   // Gather load first-fault signed halfwords to vector (immediate index).
4869   void ldff1sh(const ZRegister& zt,
4870                const PRegisterZ& pg,
4871                const ZRegister& zn,
4872                int imm5);
4873 
4874   // Gather load first-fault signed words to vector (vector index).
4875   void ldff1sw(const ZRegister& zt,
4876                const PRegisterZ& pg,
4877                const Register& xn,
4878                const ZRegister& zm);
4879 
4880   // Gather load first-fault signed words to vector (immediate index).
4881   void ldff1sw(const ZRegister& zt,
4882                const PRegisterZ& pg,
4883                const ZRegister& zn,
4884                int imm5);
4885 
4886   // Gather load first-fault unsigned words to vector (vector index).
4887   void ldff1w(const ZRegister& zt,
4888               const PRegisterZ& pg,
4889               const Register& xn,
4890               const ZRegister& zm);
4891 
4892   // Gather load first-fault unsigned words to vector (immediate index).
4893   void ldff1w(const ZRegister& zt,
4894               const PRegisterZ& pg,
4895               const ZRegister& zn,
4896               int imm5);
4897 
4898   // Contiguous load non-fault unsigned bytes to vector (immediate index).
4899   void ldnf1b(const ZRegister& zt,
4900               const PRegisterZ& pg,
4901               const SVEMemOperand& addr);
4902 
4903   // Contiguous load non-fault doublewords to vector (immediate index).
4904   void ldnf1d(const ZRegister& zt,
4905               const PRegisterZ& pg,
4906               const SVEMemOperand& addr);
4907 
4908   // Contiguous load non-fault unsigned halfwords to vector (immediate
4909   // index).
4910   void ldnf1h(const ZRegister& zt,
4911               const PRegisterZ& pg,
4912               const SVEMemOperand& addr);
4913 
4914   // Contiguous load non-fault signed bytes to vector (immediate index).
4915   void ldnf1sb(const ZRegister& zt,
4916                const PRegisterZ& pg,
4917                const SVEMemOperand& addr);
4918 
4919   // Contiguous load non-fault signed halfwords to vector (immediate index).
4920   void ldnf1sh(const ZRegister& zt,
4921                const PRegisterZ& pg,
4922                const SVEMemOperand& addr);
4923 
4924   // Contiguous load non-fault signed words to vector (immediate index).
4925   void ldnf1sw(const ZRegister& zt,
4926                const PRegisterZ& pg,
4927                const SVEMemOperand& addr);
4928 
4929   // Contiguous load non-fault unsigned words to vector (immediate index).
4930   void ldnf1w(const ZRegister& zt,
4931               const PRegisterZ& pg,
4932               const SVEMemOperand& addr);
4933 
4934   // Contiguous load non-temporal bytes to vector.
4935   void ldnt1b(const ZRegister& zt,
4936               const PRegisterZ& pg,
4937               const SVEMemOperand& addr);
4938 
4939   // Contiguous load non-temporal halfwords to vector.
4940   void ldnt1h(const ZRegister& zt,
4941               const PRegisterZ& pg,
4942               const SVEMemOperand& addr);
4943 
4944   // Contiguous load non-temporal words to vector.
4945   void ldnt1w(const ZRegister& zt,
4946               const PRegisterZ& pg,
4947               const SVEMemOperand& addr);
4948 
4949   // Contiguous load non-temporal doublewords to vector.
4950   void ldnt1d(const ZRegister& zt,
4951               const PRegisterZ& pg,
4952               const SVEMemOperand& addr);
4953 
4954   // Load SVE predicate/vector register.
4955   void ldr(const CPURegister& rt, const SVEMemOperand& addr);
4956 
4957   // Logical shift left by immediate (predicated).
4958   void lsl(const ZRegister& zd,
4959            const PRegisterM& pg,
4960            const ZRegister& zn,
4961            int shift);
4962 
4963   // Logical shift left by 64-bit wide elements (predicated).
4964   void lsl(const ZRegister& zd,
4965            const PRegisterM& pg,
4966            const ZRegister& zn,
4967            const ZRegister& zm);
4968 
4969   // Logical shift left by immediate (unpredicated).
4970   void lsl(const ZRegister& zd, const ZRegister& zn, int shift);
4971 
4972   // Logical shift left by 64-bit wide elements (unpredicated).
4973   void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4974 
4975   // Reversed logical shift left by vector (predicated).
4976   void lslr(const ZRegister& zd,
4977             const PRegisterM& pg,
4978             const ZRegister& zn,
4979             const ZRegister& zm);
4980 
4981   // Logical shift right by immediate (predicated).
4982   void lsr(const ZRegister& zd,
4983            const PRegisterM& pg,
4984            const ZRegister& zn,
4985            int shift);
4986 
4987   // Logical shift right by 64-bit wide elements (predicated).
4988   void lsr(const ZRegister& zd,
4989            const PRegisterM& pg,
4990            const ZRegister& zn,
4991            const ZRegister& zm);
4992 
4993   // Logical shift right by immediate (unpredicated).
4994   void lsr(const ZRegister& zd, const ZRegister& zn, int shift);
4995 
4996   // Logical shift right by 64-bit wide elements (unpredicated).
4997   void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
4998 
4999   // Reversed logical shift right by vector (predicated).
5000   void lsrr(const ZRegister& zd,
5001             const PRegisterM& pg,
5002             const ZRegister& zn,
5003             const ZRegister& zm);
5004 
5005   // Bitwise invert predicate.
5006   void not_(const PRegisterWithLaneSize& pd,
5007             const PRegisterZ& pg,
5008             const PRegisterWithLaneSize& pn);
5009 
5010   // Bitwise invert predicate, setting the condition flags.
5011   void nots(const PRegisterWithLaneSize& pd,
5012             const PRegisterZ& pg,
5013             const PRegisterWithLaneSize& pn);
5014 
5015   // Multiply-add vectors (predicated), writing multiplicand
5016   // [Zdn = Za + Zdn * Zm].
5017   void mad(const ZRegister& zdn,
5018            const PRegisterM& pg,
5019            const ZRegister& zm,
5020            const ZRegister& za);
5021 
5022   // Multiply-add vectors (predicated), writing addend
5023   // [Zda = Zda + Zn * Zm].
5024   void mla(const ZRegister& zda,
5025            const PRegisterM& pg,
5026            const ZRegister& zn,
5027            const ZRegister& zm);
5028 
5029   // Multiply-subtract vectors (predicated), writing addend
5030   // [Zda = Zda - Zn * Zm].
5031   void mls(const ZRegister& zda,
5032            const PRegisterM& pg,
5033            const ZRegister& zn,
5034            const ZRegister& zm);
5035 
5036   // Move predicates (unpredicated)
5037   void mov(const PRegister& pd, const PRegister& pn);
5038 
5039   // Move predicates (merging)
5040   void mov(const PRegisterWithLaneSize& pd,
5041            const PRegisterM& pg,
5042            const PRegisterWithLaneSize& pn);
5043 
5044   // Move predicates (zeroing)
5045   void mov(const PRegisterWithLaneSize& pd,
5046            const PRegisterZ& pg,
5047            const PRegisterWithLaneSize& pn);
5048 
5049   // Move general-purpose register to vector elements (unpredicated)
5050   void mov(const ZRegister& zd, const Register& xn);
5051 
5052   // Move SIMD&FP scalar register to vector elements (unpredicated)
5053   void mov(const ZRegister& zd, const VRegister& vn);
5054 
5055   // Move vector register (unpredicated)
5056   void mov(const ZRegister& zd, const ZRegister& zn);
5057 
5058   // Move indexed element to vector elements (unpredicated)
5059   void mov(const ZRegister& zd, const ZRegister& zn, unsigned index);
5060 
5061   // Move general-purpose register to vector elements (predicated)
5062   void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn);
5063 
5064   // Move SIMD&FP scalar register to vector elements (predicated)
5065   void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn);
5066 
5067   // Move vector elements (predicated)
5068   void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5069 
5070   // Move signed integer immediate to vector elements (predicated)
5071   void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1);
5072 
5073   // Move signed immediate to vector elements (unpredicated).
5074   void mov(const ZRegister& zd, int imm8, int shift);
5075 
5076   // Move logical bitmask immediate to vector (unpredicated).
5077   void mov(const ZRegister& zd, uint64_t imm);
5078 
5079   // Move predicate (unpredicated), setting the condition flags
5080   void movs(const PRegister& pd, const PRegister& pn);
5081 
5082   // Move predicates (zeroing), setting the condition flags
5083   void movs(const PRegisterWithLaneSize& pd,
5084             const PRegisterZ& pg,
5085             const PRegisterWithLaneSize& pn);
5086 
5087   // Move prefix (predicated).
5088   void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn);
5089 
5090   // Move prefix (unpredicated).
5091   void movprfx(const ZRegister& zd, const ZRegister& zn);
5092 
5093   // Multiply-subtract vectors (predicated), writing multiplicand
5094   // [Zdn = Za - Zdn * Zm].
5095   void msb(const ZRegister& zdn,
5096            const PRegisterM& pg,
5097            const ZRegister& zm,
5098            const ZRegister& za);
5099 
5100   // Multiply vectors (predicated).
5101   void mul(const ZRegister& zd,
5102            const PRegisterM& pg,
5103            const ZRegister& zn,
5104            const ZRegister& zm);
5105 
5106   // Multiply by immediate (unpredicated).
5107   void mul(const ZRegister& zd, const ZRegister& zn, int imm8);
5108 
5109   // Bitwise NAND predicates.
5110   void nand(const PRegisterWithLaneSize& pd,
5111             const PRegisterZ& pg,
5112             const PRegisterWithLaneSize& pn,
5113             const PRegisterWithLaneSize& pm);
5114 
5115   // Bitwise NAND predicates.
5116   void nands(const PRegisterWithLaneSize& pd,
5117              const PRegisterZ& pg,
5118              const PRegisterWithLaneSize& pn,
5119              const PRegisterWithLaneSize& pm);
5120 
5121   // Negate (predicated).
5122   void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5123 
5124   // Bitwise NOR predicates.
5125   void nor(const PRegisterWithLaneSize& pd,
5126            const PRegisterZ& pg,
5127            const PRegisterWithLaneSize& pn,
5128            const PRegisterWithLaneSize& pm);
5129 
5130   // Bitwise NOR predicates.
5131   void nors(const PRegisterWithLaneSize& pd,
5132             const PRegisterZ& pg,
5133             const PRegisterWithLaneSize& pn,
5134             const PRegisterWithLaneSize& pm);
5135 
5136   // Bitwise invert vector (predicated).
5137   void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5138 
5139   // Bitwise OR inverted predicate.
5140   void orn(const PRegisterWithLaneSize& pd,
5141            const PRegisterZ& pg,
5142            const PRegisterWithLaneSize& pn,
5143            const PRegisterWithLaneSize& pm);
5144 
5145   // Bitwise OR inverted predicate.
5146   void orns(const PRegisterWithLaneSize& pd,
5147             const PRegisterZ& pg,
5148             const PRegisterWithLaneSize& pn,
5149             const PRegisterWithLaneSize& pm);
5150 
5151   // Bitwise OR with inverted immediate (unpredicated).
5152   void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5153 
5154   // Bitwise OR predicate.
5155   void orr(const PRegisterWithLaneSize& pd,
5156            const PRegisterZ& pg,
5157            const PRegisterWithLaneSize& pn,
5158            const PRegisterWithLaneSize& pm);
5159 
5160   // Bitwise OR vectors (predicated).
5161   void orr(const ZRegister& zd,
5162            const PRegisterM& pg,
5163            const ZRegister& zn,
5164            const ZRegister& zm);
5165 
5166   // Bitwise OR with immediate (unpredicated).
5167   void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm);
5168 
5169   // Bitwise OR vectors (unpredicated).
5170   void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5171 
5172   // Bitwise OR predicate.
5173   void orrs(const PRegisterWithLaneSize& pd,
5174             const PRegisterZ& pg,
5175             const PRegisterWithLaneSize& pn,
5176             const PRegisterWithLaneSize& pm);
5177 
5178   // Bitwise OR reduction to scalar.
5179   void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5180 
5181   // Set all predicate elements to false.
5182   void pfalse(const PRegisterWithLaneSize& pd);
5183 
5184   // Set the first active predicate element to true.
5185   void pfirst(const PRegisterWithLaneSize& pd,
5186               const PRegister& pg,
5187               const PRegisterWithLaneSize& pn);
5188 
5189   // Find next active predicate.
5190   void pnext(const PRegisterWithLaneSize& pd,
5191              const PRegister& pg,
5192              const PRegisterWithLaneSize& pn);
5193 
5194   // Prefetch bytes.
5195   void prfb(PrefetchOperation prfop,
5196             const PRegister& pg,
5197             const SVEMemOperand& addr);
5198 
5199   // Prefetch halfwords.
5200   void prfh(PrefetchOperation prfop,
5201             const PRegister& pg,
5202             const SVEMemOperand& addr);
5203 
5204   // Prefetch words.
5205   void prfw(PrefetchOperation prfop,
5206             const PRegister& pg,
5207             const SVEMemOperand& addr);
5208 
5209   // Prefetch doublewords.
5210   void prfd(PrefetchOperation prfop,
5211             const PRegister& pg,
5212             const SVEMemOperand& addr);
5213 
5214   // Set condition flags for predicate.
5215   void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn);
5216 
5217   // Initialise predicate from named constraint.
5218   void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5219 
5220   // Initialise predicate from named constraint.
5221   void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL);
5222 
5223   // Unpack and widen half of predicate.
5224   void punpkhi(const PRegisterWithLaneSize& pd,
5225                const PRegisterWithLaneSize& pn);
5226 
5227   // Unpack and widen half of predicate.
5228   void punpklo(const PRegisterWithLaneSize& pd,
5229                const PRegisterWithLaneSize& pn);
5230 
5231   // Reverse bits (predicated).
5232   void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5233 
5234   // Read the first-fault register.
5235   void rdffr(const PRegisterWithLaneSize& pd);
5236 
5237   // Return predicate of succesfully loaded elements.
5238   void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5239 
5240   // Return predicate of succesfully loaded elements.
5241   void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg);
5242 
5243   // Read multiple of vector register size to scalar register.
5244   void rdvl(const Register& xd, int imm6);
5245 
5246   // Reverse all elements in a predicate.
5247   void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn);
5248 
5249   // Reverse all elements in a vector (unpredicated).
5250   void rev(const ZRegister& zd, const ZRegister& zn);
5251 
5252   // Reverse bytes / halfwords / words within elements (predicated).
5253   void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5254 
5255   // Reverse bytes / halfwords / words within elements (predicated).
5256   void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5257 
5258   // Reverse bytes / halfwords / words within elements (predicated).
5259   void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5260 
5261   // Signed absolute difference (predicated).
5262   void sabd(const ZRegister& zd,
5263             const PRegisterM& pg,
5264             const ZRegister& zn,
5265             const ZRegister& zm);
5266 
5267   // Signed add reduction to scalar.
5268   void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5269 
5270   // Signed integer convert to floating-point (predicated).
5271   void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5272 
5273   // Signed divide (predicated).
5274   void sdiv(const ZRegister& zd,
5275             const PRegisterM& pg,
5276             const ZRegister& zn,
5277             const ZRegister& zm);
5278 
5279   // Signed reversed divide (predicated).
5280   void sdivr(const ZRegister& zd,
5281              const PRegisterM& pg,
5282              const ZRegister& zn,
5283              const ZRegister& zm);
5284 
5285   // Signed dot product by indexed quadtuplet.
5286   void sdot(const ZRegister& zda,
5287             const ZRegister& zn,
5288             const ZRegister& zm,
5289             int index);
5290 
5291   // Signed dot product.
5292   void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5293 
5294   // Conditionally select elements from two predicates.
5295   void sel(const PRegisterWithLaneSize& pd,
5296            const PRegister& pg,
5297            const PRegisterWithLaneSize& pn,
5298            const PRegisterWithLaneSize& pm);
5299 
5300   // Conditionally select elements from two vectors.
5301   void sel(const ZRegister& zd,
5302            const PRegister& pg,
5303            const ZRegister& zn,
5304            const ZRegister& zm);
5305 
5306   // Initialise the first-fault register to all true.
5307   void setffr();
5308 
5309   // Signed maximum vectors (predicated).
5310   void smax(const ZRegister& zd,
5311             const PRegisterM& pg,
5312             const ZRegister& zn,
5313             const ZRegister& zm);
5314 
5315   // Signed maximum with immediate (unpredicated).
5316   void smax(const ZRegister& zd, const ZRegister& zn, int imm8);
5317 
5318   // Signed maximum reduction to scalar.
5319   void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5320 
5321   // Signed minimum vectors (predicated).
5322   void smin(const ZRegister& zd,
5323             const PRegisterM& pg,
5324             const ZRegister& zn,
5325             const ZRegister& zm);
5326 
5327   // Signed minimum with immediate (unpredicated).
5328   void smin(const ZRegister& zd, const ZRegister& zn, int imm8);
5329 
5330   // Signed minimum reduction to scalar.
5331   void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5332 
5333   // Signed multiply returning high half (predicated).
5334   void smulh(const ZRegister& zd,
5335              const PRegisterM& pg,
5336              const ZRegister& zn,
5337              const ZRegister& zm);
5338 
5339   // Splice two vectors under predicate control.
5340   void splice(const ZRegister& zd,
5341               const PRegister& pg,
5342               const ZRegister& zn,
5343               const ZRegister& zm);
5344 
5345   // Splice two vectors under predicate control (constructive).
5346   void splice_con(const ZRegister& zd,
5347                   const PRegister& pg,
5348                   const ZRegister& zn,
5349                   const ZRegister& zm);
5350 
5351   // Signed saturating add vectors (unpredicated).
5352   void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5353 
5354   // Signed saturating add immediate (unpredicated).
5355   void sqadd(const ZRegister& zd,
5356              const ZRegister& zn,
5357              int imm8,
5358              int shift = -1);
5359 
5360   // Signed saturating decrement scalar by multiple of 8-bit predicate
5361   // constraint element count.
5362   void sqdecb(const Register& xd,
5363               const Register& wn,
5364               int pattern,
5365               int multiplier);
5366 
5367   // Signed saturating decrement scalar by multiple of 8-bit predicate
5368   // constraint element count.
5369   void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5370 
5371   // Signed saturating decrement scalar by multiple of 64-bit predicate
5372   // constraint element count.
5373   void sqdecd(const Register& xd,
5374               const Register& wn,
5375               int pattern = SVE_ALL,
5376               int multiplier = 1);
5377 
5378   // Signed saturating decrement scalar by multiple of 64-bit predicate
5379   // constraint element count.
5380   void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5381 
5382   // Signed saturating decrement vector by multiple of 64-bit predicate
5383   // constraint element count.
5384   void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5385 
5386   // Signed saturating decrement scalar by multiple of 16-bit predicate
5387   // constraint element count.
5388   void sqdech(const Register& xd,
5389               const Register& wn,
5390               int pattern = SVE_ALL,
5391               int multiplier = 1);
5392 
5393   // Signed saturating decrement scalar by multiple of 16-bit predicate
5394   // constraint element count.
5395   void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5396 
5397   // Signed saturating decrement vector by multiple of 16-bit predicate
5398   // constraint element count.
5399   void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5400 
5401   // Signed saturating decrement scalar by active predicate element count.
5402   void sqdecp(const Register& xd,
5403               const PRegisterWithLaneSize& pg,
5404               const Register& wn);
5405 
5406   // Signed saturating decrement scalar by active predicate element count.
5407   void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg);
5408 
5409   // Signed saturating decrement vector by active predicate element count.
5410   void sqdecp(const ZRegister& zdn, const PRegister& pg);
5411 
5412   // Signed saturating decrement scalar by multiple of 32-bit predicate
5413   // constraint element count.
5414   void sqdecw(const Register& xd,
5415               const Register& wn,
5416               int pattern = SVE_ALL,
5417               int multiplier = 1);
5418 
5419   // Signed saturating decrement scalar by multiple of 32-bit predicate
5420   // constraint element count.
5421   void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5422 
5423   // Signed saturating decrement vector by multiple of 32-bit predicate
5424   // constraint element count.
5425   void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5426 
5427   // Signed saturating increment scalar by multiple of 8-bit predicate
5428   // constraint element count.
5429   void sqincb(const Register& xd,
5430               const Register& wn,
5431               int pattern = SVE_ALL,
5432               int multiplier = 1);
5433 
5434   // Signed saturating increment scalar by multiple of 8-bit predicate
5435   // constraint element count.
5436   void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5437 
5438   // Signed saturating increment scalar by multiple of 64-bit predicate
5439   // constraint element count.
5440   void sqincd(const Register& xd,
5441               const Register& wn,
5442               int pattern,
5443               int multiplier);
5444 
5445   // Signed saturating increment scalar by multiple of 64-bit predicate
5446   // constraint element count.
5447   void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5448 
5449   // Signed saturating increment vector by multiple of 64-bit predicate
5450   // constraint element count.
5451   void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5452 
5453   // Signed saturating increment scalar by multiple of 16-bit predicate
5454   // constraint element count.
5455   void sqinch(const Register& xd,
5456               const Register& wn,
5457               int pattern = SVE_ALL,
5458               int multiplier = 1);
5459 
5460   // Signed saturating increment scalar by multiple of 16-bit predicate
5461   // constraint element count.
5462   void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5463 
5464   // Signed saturating increment vector by multiple of 16-bit predicate
5465   // constraint element count.
5466   void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5467 
5468   // Signed saturating increment scalar by active predicate element count.
5469   void sqincp(const Register& xd,
5470               const PRegisterWithLaneSize& pg,
5471               const Register& wn);
5472 
5473   // Signed saturating increment scalar by active predicate element count.
5474   void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg);
5475 
5476   // Signed saturating increment vector by active predicate element count.
5477   void sqincp(const ZRegister& zdn, const PRegister& pg);
5478 
5479   // Signed saturating increment scalar by multiple of 32-bit predicate
5480   // constraint element count.
5481   void sqincw(const Register& xd,
5482               const Register& wn,
5483               int pattern = SVE_ALL,
5484               int multiplier = 1);
5485 
5486   // Signed saturating increment scalar by multiple of 32-bit predicate
5487   // constraint element count.
5488   void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5489 
5490   // Signed saturating increment vector by multiple of 32-bit predicate
5491   // constraint element count.
5492   void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5493 
5494   // Signed saturating subtract vectors (unpredicated).
5495   void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5496 
5497   // Signed saturating subtract immediate (unpredicated).
5498   void sqsub(const ZRegister& zd,
5499              const ZRegister& zn,
5500              int imm8,
5501              int shift = -1);
5502 
5503   // Contiguous/scatter store bytes from vector.
5504   void st1b(const ZRegister& zt,
5505             const PRegister& pg,
5506             const SVEMemOperand& addr);
5507 
5508   // Contiguous/scatter store halfwords from vector.
5509   void st1h(const ZRegister& zt,
5510             const PRegister& pg,
5511             const SVEMemOperand& addr);
5512 
5513   // Contiguous/scatter store words from vector.
5514   void st1w(const ZRegister& zt,
5515             const PRegister& pg,
5516             const SVEMemOperand& addr);
5517 
5518   // Contiguous/scatter store doublewords from vector.
5519   void st1d(const ZRegister& zt,
5520             const PRegister& pg,
5521             const SVEMemOperand& addr);
5522 
5523   // Contiguous store two-byte structures from two vectors.
5524   void st2b(const ZRegister& zt1,
5525             const ZRegister& zt2,
5526             const PRegister& pg,
5527             const SVEMemOperand& addr);
5528 
5529   // Contiguous store two-halfword structures from two vectors.
5530   void st2h(const ZRegister& zt1,
5531             const ZRegister& zt2,
5532             const PRegister& pg,
5533             const SVEMemOperand& addr);
5534 
5535   // Contiguous store two-word structures from two vectors.
5536   void st2w(const ZRegister& zt1,
5537             const ZRegister& zt2,
5538             const PRegister& pg,
5539             const SVEMemOperand& addr);
5540 
5541   // Contiguous store two-doubleword structures from two vectors,
5542   void st2d(const ZRegister& zt1,
5543             const ZRegister& zt2,
5544             const PRegister& pg,
5545             const SVEMemOperand& addr);
5546 
5547   // Contiguous store three-byte structures from three vectors.
5548   void st3b(const ZRegister& zt1,
5549             const ZRegister& zt2,
5550             const ZRegister& zt3,
5551             const PRegister& pg,
5552             const SVEMemOperand& addr);
5553 
5554   // Contiguous store three-halfword structures from three vectors.
5555   void st3h(const ZRegister& zt1,
5556             const ZRegister& zt2,
5557             const ZRegister& zt3,
5558             const PRegister& pg,
5559             const SVEMemOperand& addr);
5560 
5561   // Contiguous store three-word structures from three vectors.
5562   void st3w(const ZRegister& zt1,
5563             const ZRegister& zt2,
5564             const ZRegister& zt3,
5565             const PRegister& pg,
5566             const SVEMemOperand& addr);
5567 
5568   // Contiguous store three-doubleword structures from three vectors.
5569   void st3d(const ZRegister& zt1,
5570             const ZRegister& zt2,
5571             const ZRegister& zt3,
5572             const PRegister& pg,
5573             const SVEMemOperand& addr);
5574 
5575   // Contiguous store four-byte structures from four vectors.
5576   void st4b(const ZRegister& zt1,
5577             const ZRegister& zt2,
5578             const ZRegister& zt3,
5579             const ZRegister& zt4,
5580             const PRegister& pg,
5581             const SVEMemOperand& addr);
5582 
5583   // Contiguous store four-halfword structures from four vectors.
5584   void st4h(const ZRegister& zt1,
5585             const ZRegister& zt2,
5586             const ZRegister& zt3,
5587             const ZRegister& zt4,
5588             const PRegister& pg,
5589             const SVEMemOperand& addr);
5590 
5591   // Contiguous store four-word structures from four vectors.
5592   void st4w(const ZRegister& zt1,
5593             const ZRegister& zt2,
5594             const ZRegister& zt3,
5595             const ZRegister& zt4,
5596             const PRegister& pg,
5597             const SVEMemOperand& addr);
5598 
5599   // Contiguous store four-doubleword structures from four vectors.
5600   void st4d(const ZRegister& zt1,
5601             const ZRegister& zt2,
5602             const ZRegister& zt3,
5603             const ZRegister& zt4,
5604             const PRegister& pg,
5605             const SVEMemOperand& addr);
5606 
5607   // Contiguous store non-temporal bytes from vector.
5608   void stnt1b(const ZRegister& zt,
5609               const PRegister& pg,
5610               const SVEMemOperand& addr);
5611 
5612   // Contiguous store non-temporal halfwords from vector.
5613   void stnt1h(const ZRegister& zt,
5614               const PRegister& pg,
5615               const SVEMemOperand& addr);
5616 
5617   // Contiguous store non-temporal words from vector.
5618   void stnt1w(const ZRegister& zt,
5619               const PRegister& pg,
5620               const SVEMemOperand& addr);
5621 
5622   // Contiguous store non-temporal doublewords from vector.
5623   void stnt1d(const ZRegister& zt,
5624               const PRegister& pg,
5625               const SVEMemOperand& addr);
5626 
5627   // Store SVE predicate/vector register.
5628   void str(const CPURegister& rt, const SVEMemOperand& addr);
5629 
5630   // Subtract vectors (predicated).
5631   void sub(const ZRegister& zd,
5632            const PRegisterM& pg,
5633            const ZRegister& zn,
5634            const ZRegister& zm);
5635 
5636   // Subtract vectors (unpredicated).
5637   void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5638 
5639   // Subtract immediate (unpredicated).
5640   void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5641 
5642   // Reversed subtract vectors (predicated).
5643   void subr(const ZRegister& zd,
5644             const PRegisterM& pg,
5645             const ZRegister& zn,
5646             const ZRegister& zm);
5647 
5648   // Reversed subtract from immediate (unpredicated).
5649   void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1);
5650 
5651   // Signed unpack and extend half of vector.
5652   void sunpkhi(const ZRegister& zd, const ZRegister& zn);
5653 
5654   // Signed unpack and extend half of vector.
5655   void sunpklo(const ZRegister& zd, const ZRegister& zn);
5656 
5657   // Signed byte extend (predicated).
5658   void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5659 
5660   // Signed halfword extend (predicated).
5661   void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5662 
5663   // Signed word extend (predicated).
5664   void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5665 
5666   // Programmable table lookup/permute using vector of indices into a
5667   // vector.
5668   void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5669 
5670   // Interleave even or odd elements from two predicates.
5671   void trn1(const PRegisterWithLaneSize& pd,
5672             const PRegisterWithLaneSize& pn,
5673             const PRegisterWithLaneSize& pm);
5674 
5675   // Interleave even or odd elements from two vectors.
5676   void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5677 
5678   // Interleave even or odd elements from two predicates.
5679   void trn2(const PRegisterWithLaneSize& pd,
5680             const PRegisterWithLaneSize& pn,
5681             const PRegisterWithLaneSize& pm);
5682 
5683   // Interleave even or odd elements from two vectors.
5684   void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5685 
5686   // Unsigned absolute difference (predicated).
5687   void uabd(const ZRegister& zd,
5688             const PRegisterM& pg,
5689             const ZRegister& zn,
5690             const ZRegister& zm);
5691 
5692   // Unsigned add reduction to scalar.
5693   void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn);
5694 
5695   // Unsigned integer convert to floating-point (predicated).
5696   void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5697 
5698   // Unsigned divide (predicated).
5699   void udiv(const ZRegister& zd,
5700             const PRegisterM& pg,
5701             const ZRegister& zn,
5702             const ZRegister& zm);
5703 
5704   // Unsigned reversed divide (predicated).
5705   void udivr(const ZRegister& zd,
5706              const PRegisterM& pg,
5707              const ZRegister& zn,
5708              const ZRegister& zm);
5709 
5710   // Unsigned dot product by indexed quadtuplet.
5711   void udot(const ZRegister& zda,
5712             const ZRegister& zn,
5713             const ZRegister& zm,
5714             int index);
5715 
5716   // Unsigned dot product.
5717   void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5718 
5719   // Unsigned maximum vectors (predicated).
5720   void umax(const ZRegister& zd,
5721             const PRegisterM& pg,
5722             const ZRegister& zn,
5723             const ZRegister& zm);
5724 
5725   // Unsigned maximum with immediate (unpredicated).
5726   void umax(const ZRegister& zd, const ZRegister& zn, int imm8);
5727 
5728   // Unsigned maximum reduction to scalar.
5729   void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5730 
5731   // Unsigned minimum vectors (predicated).
5732   void umin(const ZRegister& zd,
5733             const PRegisterM& pg,
5734             const ZRegister& zn,
5735             const ZRegister& zm);
5736 
5737   // Unsigned minimum with immediate (unpredicated).
5738   void umin(const ZRegister& zd, const ZRegister& zn, int imm8);
5739 
5740   // Unsigned minimum reduction to scalar.
5741   void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn);
5742 
5743   // Unsigned multiply returning high half (predicated).
5744   void umulh(const ZRegister& zd,
5745              const PRegisterM& pg,
5746              const ZRegister& zn,
5747              const ZRegister& zm);
5748 
5749   // Unsigned saturating add vectors (unpredicated).
5750   void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5751 
5752   // Unsigned saturating add immediate (unpredicated).
5753   void uqadd(const ZRegister& zd,
5754              const ZRegister& zn,
5755              int imm8,
5756              int shift = -1);
5757 
5758   // Unsigned saturating decrement scalar by multiple of 8-bit predicate
5759   // constraint element count.
5760   void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5761 
5762   // Unsigned saturating decrement scalar by multiple of 64-bit predicate
5763   // constraint element count.
5764   void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5765 
5766   // Unsigned saturating decrement vector by multiple of 64-bit predicate
5767   // constraint element count.
5768   void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5769 
5770   // Unsigned saturating decrement scalar by multiple of 16-bit predicate
5771   // constraint element count.
5772   void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5773 
5774   // Unsigned saturating decrement vector by multiple of 16-bit predicate
5775   // constraint element count.
5776   void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5777 
5778   // Unsigned saturating decrement scalar by active predicate element count.
5779   void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg);
5780 
5781   // Unsigned saturating decrement vector by active predicate element count.
5782   void uqdecp(const ZRegister& zdn, const PRegister& pg);
5783 
5784   // Unsigned saturating decrement scalar by multiple of 32-bit predicate
5785   // constraint element count.
5786   void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5787 
5788   // Unsigned saturating decrement vector by multiple of 32-bit predicate
5789   // constraint element count.
5790   void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5791 
5792   // Unsigned saturating increment scalar by multiple of 8-bit predicate
5793   // constraint element count.
5794   void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5795 
5796   // Unsigned saturating increment scalar by multiple of 64-bit predicate
5797   // constraint element count.
5798   void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5799 
5800   // Unsigned saturating increment vector by multiple of 64-bit predicate
5801   // constraint element count.
5802   void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5803 
5804   // Unsigned saturating increment scalar by multiple of 16-bit predicate
5805   // constraint element count.
5806   void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5807 
5808   // Unsigned saturating increment vector by multiple of 16-bit predicate
5809   // constraint element count.
5810   void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5811 
5812   // Unsigned saturating increment scalar by active predicate element count.
5813   void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg);
5814 
5815   // Unsigned saturating increment vector by active predicate element count.
5816   void uqincp(const ZRegister& zdn, const PRegister& pg);
5817 
5818   // Unsigned saturating increment scalar by multiple of 32-bit predicate
5819   // constraint element count.
5820   void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1);
5821 
5822   // Unsigned saturating increment vector by multiple of 32-bit predicate
5823   // constraint element count.
5824   void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1);
5825 
5826   // Unsigned saturating subtract vectors (unpredicated).
5827   void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5828 
5829   // Unsigned saturating subtract immediate (unpredicated).
5830   void uqsub(const ZRegister& zd,
5831              const ZRegister& zn,
5832              int imm8,
5833              int shift = -1);
5834 
5835   // Unsigned unpack and extend half of vector.
5836   void uunpkhi(const ZRegister& zd, const ZRegister& zn);
5837 
5838   // Unsigned unpack and extend half of vector.
5839   void uunpklo(const ZRegister& zd, const ZRegister& zn);
5840 
5841   // Unsigned byte extend (predicated).
5842   void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5843 
5844   // Unsigned halfword extend (predicated).
5845   void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5846 
5847   // Unsigned word extend (predicated).
5848   void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
5849 
5850   // Concatenate even or odd elements from two predicates.
5851   void uzp1(const PRegisterWithLaneSize& pd,
5852             const PRegisterWithLaneSize& pn,
5853             const PRegisterWithLaneSize& pm);
5854 
5855   // Concatenate even or odd elements from two vectors.
5856   void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5857 
5858   // Concatenate even or odd elements from two predicates.
5859   void uzp2(const PRegisterWithLaneSize& pd,
5860             const PRegisterWithLaneSize& pn,
5861             const PRegisterWithLaneSize& pm);
5862 
5863   // Concatenate even or odd elements from two vectors.
5864   void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5865 
5866   // While incrementing signed scalar less than or equal to scalar.
5867   void whilele(const PRegisterWithLaneSize& pd,
5868                const Register& rn,
5869                const Register& rm);
5870 
5871   // While incrementing unsigned scalar lower than scalar.
5872   void whilelo(const PRegisterWithLaneSize& pd,
5873                const Register& rn,
5874                const Register& rm);
5875 
5876   // While incrementing unsigned scalar lower or same as scalar.
5877   void whilels(const PRegisterWithLaneSize& pd,
5878                const Register& rn,
5879                const Register& rm);
5880 
5881   // While incrementing signed scalar less than scalar.
5882   void whilelt(const PRegisterWithLaneSize& pd,
5883                const Register& rn,
5884                const Register& rm);
5885 
5886   // Write the first-fault register.
5887   void wrffr(const PRegisterWithLaneSize& pn);
5888 
5889   // Interleave elements from two half predicates.
5890   void zip1(const PRegisterWithLaneSize& pd,
5891             const PRegisterWithLaneSize& pn,
5892             const PRegisterWithLaneSize& pm);
5893 
5894   // Interleave elements from two half vectors.
5895   void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5896 
5897   // Interleave elements from two half predicates.
5898   void zip2(const PRegisterWithLaneSize& pd,
5899             const PRegisterWithLaneSize& pn,
5900             const PRegisterWithLaneSize& pm);
5901 
5902   // Interleave elements from two half vectors.
5903   void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5904 
5905   // Add with carry long (bottom).
5906   void adclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5907 
5908   // Add with carry long (top).
5909   void adclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
5910 
5911   // Add narrow high part (bottom).
5912   void addhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5913 
5914   // Add narrow high part (top).
5915   void addhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5916 
5917   // Add pairwise.
5918   void addp(const ZRegister& zd,
5919             const PRegisterM& pg,
5920             const ZRegister& zn,
5921             const ZRegister& zm);
5922 
5923   // Bitwise clear and exclusive OR.
5924   void bcax(const ZRegister& zd,
5925             const ZRegister& zn,
5926             const ZRegister& zm,
5927             const ZRegister& zk);
5928 
5929   // Scatter lower bits into positions selected by bitmask.
5930   void bdep(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5931 
5932   // Gather lower bits from positions selected by bitmask.
5933   void bext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5934 
5935   // Group bits to right or left as selected by bitmask.
5936   void bgrp(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5937 
5938   // Bitwise select.
5939   void bsl(const ZRegister& zd,
5940            const ZRegister& zn,
5941            const ZRegister& zm,
5942            const ZRegister& zk);
5943 
5944   // Bitwise select with first input inverted.
5945   void bsl1n(const ZRegister& zd,
5946              const ZRegister& zn,
5947              const ZRegister& zm,
5948              const ZRegister& zk);
5949 
5950   // Bitwise select with second input inverted.
5951   void bsl2n(const ZRegister& zd,
5952              const ZRegister& zn,
5953              const ZRegister& zm,
5954              const ZRegister& zk);
5955 
5956   // Complex integer add with rotate.
5957   void cadd(const ZRegister& zd,
5958             const ZRegister& zn,
5959             const ZRegister& zm,
5960             int rot);
5961 
5962   // Complex integer dot product (indexed).
5963   void cdot(const ZRegister& zda,
5964             const ZRegister& zn,
5965             const ZRegister& zm,
5966             int index,
5967             int rot);
5968 
5969   // Complex integer dot product.
5970   void cdot(const ZRegister& zda,
5971             const ZRegister& zn,
5972             const ZRegister& zm,
5973             int rot);
5974 
5975   // Complex integer multiply-add with rotate (indexed).
5976   void cmla(const ZRegister& zda,
5977             const ZRegister& zn,
5978             const ZRegister& zm,
5979             int index,
5980             int rot);
5981 
5982   // Complex integer multiply-add with rotate.
5983   void cmla(const ZRegister& zda,
5984             const ZRegister& zn,
5985             const ZRegister& zm,
5986             int rot);
5987 
5988   // Bitwise exclusive OR of three vectors.
5989   void eor3(const ZRegister& zd,
5990             const ZRegister& zn,
5991             const ZRegister& zm,
5992             const ZRegister& zk);
5993 
5994   // Interleaving exclusive OR (bottom, top).
5995   void eorbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5996 
5997   // Interleaving exclusive OR (top, bottom).
5998   void eortb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
5999 
6000   // Floating-point add pairwise.
6001   void faddp(const ZRegister& zd,
6002              const PRegisterM& pg,
6003              const ZRegister& zn,
6004              const ZRegister& zm);
6005 
6006   // Floating-point up convert long (top, predicated).
6007   void fcvtlt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6008 
6009   // Floating-point down convert and narrow (top, predicated).
6010   void fcvtnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6011 
6012   // Floating-point down convert, rounding to odd (predicated).
6013   void fcvtx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6014 
6015   // Floating-point down convert, rounding to odd (top, predicated).
6016   void fcvtxnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6017 
6018   // Floating-point base 2 logarithm as integer.
6019   void flogb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6020 
6021   // Floating-point maximum number pairwise.
6022   void fmaxnmp(const ZRegister& zd,
6023                const PRegisterM& pg,
6024                const ZRegister& zn,
6025                const ZRegister& zm);
6026 
6027   // Floating-point maximum pairwise.
6028   void fmaxp(const ZRegister& zd,
6029              const PRegisterM& pg,
6030              const ZRegister& zn,
6031              const ZRegister& zm);
6032 
6033   // Floating-point minimum number pairwise.
6034   void fminnmp(const ZRegister& zd,
6035                const PRegisterM& pg,
6036                const ZRegister& zn,
6037                const ZRegister& zm);
6038 
6039   // Floating-point minimum pairwise.
6040   void fminp(const ZRegister& zd,
6041              const PRegisterM& pg,
6042              const ZRegister& zn,
6043              const ZRegister& zm);
6044 
6045   // Half-precision floating-point multiply-add long to single-precision
6046   // (bottom).
6047   void fmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6048 
6049   // Half-precision floating-point multiply-add long to single-precision
6050   // (top).
6051   void fmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6052 
6053   // Half-precision floating-point multiply-subtract long from
6054   // single-precision (bottom).
6055   void fmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6056 
6057   // Half-precision floating-point multiply-subtract long from
6058   // single-precision (top, indexed).
6059   void fmlslt(const ZRegister& zda,
6060               const ZRegister& zn,
6061               const ZRegister& zm,
6062               int index);
6063 
6064   // Half-precision floating-point multiply-add long to single-precision
6065   // (bottom, indexed).
6066   void fmlalb(const ZRegister& zda,
6067               const ZRegister& zn,
6068               const ZRegister& zm,
6069               int index);
6070 
6071   // Half-precision floating-point multiply-add long to single-precision
6072   // (top, indexed).
6073   void fmlalt(const ZRegister& zda,
6074               const ZRegister& zn,
6075               const ZRegister& zm,
6076               int index);
6077 
6078   // Half-precision floating-point multiply-subtract long from
6079   // single-precision (bottom, indexed).
6080   void fmlslb(const ZRegister& zda,
6081               const ZRegister& zn,
6082               const ZRegister& zm,
6083               int index);
6084 
6085   // Half-precision floating-point multiply-subtract long from
6086   // single-precision (top).
6087   void fmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6088 
6089   // Count matching elements in vector.
6090   void histcnt(const ZRegister& zd,
6091                const PRegisterZ& pg,
6092                const ZRegister& zn,
6093                const ZRegister& zm);
6094 
6095   // Count matching elements in vector segments.
6096   void histseg(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6097 
6098   // Gather load non-temporal signed bytes.
6099   void ldnt1sb(const ZRegister& zt,
6100                const PRegisterZ& pg,
6101                const SVEMemOperand& addr);
6102 
6103   // Gather load non-temporal signed halfwords.
6104   void ldnt1sh(const ZRegister& zt,
6105                const PRegisterZ& pg,
6106                const SVEMemOperand& addr);
6107 
6108   // Gather load non-temporal signed words.
6109   void ldnt1sw(const ZRegister& zt,
6110                const PRegisterZ& pg,
6111                const SVEMemOperand& addr);
6112 
6113   // Detect any matching elements, setting the condition flags.
6114   void match(const PRegisterWithLaneSize& pd,
6115              const PRegisterZ& pg,
6116              const ZRegister& zn,
6117              const ZRegister& zm);
6118 
6119   // Multiply-add to accumulator (indexed).
6120   void mla(const ZRegister& zda,
6121            const ZRegister& zn,
6122            const ZRegister& zm,
6123            int index);
6124 
6125   // Multiply-subtract from accumulator (indexed).
6126   void mls(const ZRegister& zda,
6127            const ZRegister& zn,
6128            const ZRegister& zm,
6129            int index);
6130 
6131   // Multiply (indexed).
6132   void mul(const ZRegister& zd,
6133            const ZRegister& zn,
6134            const ZRegister& zm,
6135            int index);
6136 
6137   // Multiply vectors (unpredicated).
6138   void mul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6139 
6140   // Bitwise inverted select.
6141   void nbsl(const ZRegister& zd,
6142             const ZRegister& zn,
6143             const ZRegister& zm,
6144             const ZRegister& zk);
6145 
6146   // Detect no matching elements, setting the condition flags.
6147   void nmatch(const PRegisterWithLaneSize& pd,
6148               const PRegisterZ& pg,
6149               const ZRegister& zn,
6150               const ZRegister& zm);
6151 
6152   // Polynomial multiply vectors (unpredicated).
6153   void pmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6154 
6155   // Polynomial multiply long (bottom).
6156   void pmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6157 
6158   // Polynomial multiply long (top).
6159   void pmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6160 
6161   // Rounding add narrow high part (bottom).
6162   void raddhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6163 
6164   // Rounding add narrow high part (top).
6165   void raddhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6166 
6167   // Rounding shift right narrow by immediate (bottom).
6168   void rshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6169 
6170   // Rounding shift right narrow by immediate (top).
6171   void rshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6172 
6173   // Rounding subtract narrow high part (bottom).
6174   void rsubhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6175 
6176   // Rounding subtract narrow high part (top).
6177   void rsubhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6178 
6179   // Signed absolute difference and accumulate.
6180   void saba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6181 
6182   // Signed absolute difference and accumulate long (bottom).
6183   void sabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6184 
6185   // Signed absolute difference and accumulate long (top).
6186   void sabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6187 
6188   // Signed absolute difference long (bottom).
6189   void sabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6190 
6191   // Signed absolute difference long (top).
6192   void sabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6193 
6194   // Signed add and accumulate long pairwise.
6195   void sadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
6196 
6197   // Signed add long (bottom).
6198   void saddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6199 
6200   // Signed add long (bottom + top).
6201   void saddlbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6202 
6203   // Signed add long (top).
6204   void saddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6205 
6206   // Signed add wide (bottom).
6207   void saddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6208 
6209   // Signed add wide (top).
6210   void saddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6211 
6212   // Subtract with carry long (bottom).
6213   void sbclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6214 
6215   // Subtract with carry long (top).
6216   void sbclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6217 
6218   // Signed halving addition.
6219   void shadd(const ZRegister& zd,
6220              const PRegisterM& pg,
6221              const ZRegister& zn,
6222              const ZRegister& zm);
6223 
6224   // Shift right narrow by immediate (bottom).
6225   void shrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6226 
6227   // Shift right narrow by immediate (top).
6228   void shrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6229 
6230   // Signed halving subtract.
6231   void shsub(const ZRegister& zd,
6232              const PRegisterM& pg,
6233              const ZRegister& zn,
6234              const ZRegister& zm);
6235 
6236   // Signed halving subtract reversed vectors.
6237   void shsubr(const ZRegister& zd,
6238               const PRegisterM& pg,
6239               const ZRegister& zn,
6240               const ZRegister& zm);
6241 
6242   // Shift left and insert (immediate).
6243   void sli(const ZRegister& zd, const ZRegister& zn, int shift);
6244 
6245   // Signed maximum pairwise.
6246   void smaxp(const ZRegister& zd,
6247              const PRegisterM& pg,
6248              const ZRegister& zn,
6249              const ZRegister& zm);
6250 
6251   // Signed minimum pairwise.
6252   void sminp(const ZRegister& zd,
6253              const PRegisterM& pg,
6254              const ZRegister& zn,
6255              const ZRegister& zm);
6256 
6257   // Signed multiply-add long to accumulator (bottom, indexed).
6258   void smlalb(const ZRegister& zda,
6259               const ZRegister& zn,
6260               const ZRegister& zm,
6261               int index);
6262 
6263   // Signed multiply-add long to accumulator (bottom).
6264   void smlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6265 
6266   // Signed multiply-add long to accumulator (top, indexed).
6267   void smlalt(const ZRegister& zda,
6268               const ZRegister& zn,
6269               const ZRegister& zm,
6270               int index);
6271 
6272   // Signed multiply-add long to accumulator (top).
6273   void smlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6274 
6275   // Signed multiply-subtract long from accumulator (bottom, indexed).
6276   void smlslb(const ZRegister& zda,
6277               const ZRegister& zn,
6278               const ZRegister& zm,
6279               int index);
6280 
6281   // Signed multiply-subtract long from accumulator (bottom).
6282   void smlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6283 
6284   // Signed multiply-subtract long from accumulator (top, indexed).
6285   void smlslt(const ZRegister& zda,
6286               const ZRegister& zn,
6287               const ZRegister& zm,
6288               int index);
6289 
6290   // Signed multiply-subtract long from accumulator (top).
6291   void smlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6292 
6293   // Signed multiply returning high half (unpredicated).
6294   void smulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6295 
6296   // Signed multiply long (bottom, indexed).
6297   void smullb(const ZRegister& zd,
6298               const ZRegister& zn,
6299               const ZRegister& zm,
6300               int index);
6301 
6302   // Signed multiply long (bottom).
6303   void smullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6304 
6305   // Signed multiply long (top, indexed).
6306   void smullt(const ZRegister& zd,
6307               const ZRegister& zn,
6308               const ZRegister& zm,
6309               int index);
6310 
6311   // Signed multiply long (top).
6312   void smullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6313 
6314   // Signed saturating absolute value.
6315   void sqabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6316 
6317   // Signed saturating addition (predicated).
6318   void sqadd(const ZRegister& zd,
6319              const PRegisterM& pg,
6320              const ZRegister& zn,
6321              const ZRegister& zm);
6322 
6323   // Saturating complex integer add with rotate.
6324   void sqcadd(const ZRegister& zd,
6325               const ZRegister& zn,
6326               const ZRegister& zm,
6327               int rot);
6328 
6329   // Signed saturating doubling multiply-add long to accumulator (bottom,
6330   // indexed).
6331   void sqdmlalb(const ZRegister& zda,
6332                 const ZRegister& zn,
6333                 const ZRegister& zm,
6334                 int index);
6335 
6336   // Signed saturating doubling multiply-add long to accumulator (bottom).
6337   void sqdmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6338 
6339   // Signed saturating doubling multiply-add long to accumulator (bottom x
6340   // top).
6341   void sqdmlalbt(const ZRegister& zda,
6342                  const ZRegister& zn,
6343                  const ZRegister& zm);
6344 
6345   // Signed saturating doubling multiply-add long to accumulator (top,
6346   // indexed).
6347   void sqdmlalt(const ZRegister& zda,
6348                 const ZRegister& zn,
6349                 const ZRegister& zm,
6350                 int index);
6351 
6352   // Signed saturating doubling multiply-add long to accumulator (top).
6353   void sqdmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6354 
6355   // Signed saturating doubling multiply-subtract long from accumulator
6356   // (bottom, indexed).
6357   void sqdmlslb(const ZRegister& zda,
6358                 const ZRegister& zn,
6359                 const ZRegister& zm,
6360                 int index);
6361 
6362   // Signed saturating doubling multiply-subtract long from accumulator
6363   // (bottom).
6364   void sqdmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6365 
6366   // Signed saturating doubling multiply-subtract long from accumulator
6367   // (bottom x top).
6368   void sqdmlslbt(const ZRegister& zda,
6369                  const ZRegister& zn,
6370                  const ZRegister& zm);
6371 
6372   // Signed saturating doubling multiply-subtract long from accumulator
6373   // (top, indexed).
6374   void sqdmlslt(const ZRegister& zda,
6375                 const ZRegister& zn,
6376                 const ZRegister& zm,
6377                 int index);
6378 
6379   // Signed saturating doubling multiply-subtract long from accumulator
6380   // (top).
6381   void sqdmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6382 
6383   // Signed saturating doubling multiply high (indexed).
6384   void sqdmulh(const ZRegister& zd,
6385                const ZRegister& zn,
6386                const ZRegister& zm,
6387                int index);
6388 
6389   // Signed saturating doubling multiply high (unpredicated).
6390   void sqdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6391 
6392   // Signed saturating doubling multiply long (bottom, indexed).
6393   void sqdmullb(const ZRegister& zd,
6394                 const ZRegister& zn,
6395                 const ZRegister& zm,
6396                 int index);
6397 
6398   // Signed saturating doubling multiply long (bottom).
6399   void sqdmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6400 
6401   // Signed saturating doubling multiply long (top, indexed).
6402   void sqdmullt(const ZRegister& zd,
6403                 const ZRegister& zn,
6404                 const ZRegister& zm,
6405                 int index);
6406 
6407   // Signed saturating doubling multiply long (top).
6408   void sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6409 
6410   // Signed saturating negate.
6411   void sqneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6412 
6413   // Saturating rounding doubling complex integer multiply-add high with
6414   // rotate (indexed).
6415   void sqrdcmlah(const ZRegister& zda,
6416                  const ZRegister& zn,
6417                  const ZRegister& zm,
6418                  int index,
6419                  int rot);
6420 
6421   // Saturating rounding doubling complex integer multiply-add high with
6422   // rotate.
6423   void sqrdcmlah(const ZRegister& zda,
6424                  const ZRegister& zn,
6425                  const ZRegister& zm,
6426                  int rot);
6427 
6428   // Signed saturating rounding doubling multiply-add high to accumulator
6429   // (indexed).
6430   void sqrdmlah(const ZRegister& zda,
6431                 const ZRegister& zn,
6432                 const ZRegister& zm,
6433                 int index);
6434 
6435   // Signed saturating rounding doubling multiply-add high to accumulator
6436   // (unpredicated).
6437   void sqrdmlah(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6438 
6439   // Signed saturating rounding doubling multiply-subtract high from
6440   // accumulator (indexed).
6441   void sqrdmlsh(const ZRegister& zda,
6442                 const ZRegister& zn,
6443                 const ZRegister& zm,
6444                 int index);
6445 
6446   // Signed saturating rounding doubling multiply-subtract high from
6447   // accumulator (unpredicated).
6448   void sqrdmlsh(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6449 
6450   // Signed saturating rounding doubling multiply high (indexed).
6451   void sqrdmulh(const ZRegister& zd,
6452                 const ZRegister& zn,
6453                 const ZRegister& zm,
6454                 int index);
6455 
6456   // Signed saturating rounding doubling multiply high (unpredicated).
6457   void sqrdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6458 
6459   // Signed saturating rounding shift left by vector (predicated).
6460   void sqrshl(const ZRegister& zd,
6461               const PRegisterM& pg,
6462               const ZRegister& zn,
6463               const ZRegister& zm);
6464 
6465   // Signed saturating rounding shift left reversed vectors (predicated).
6466   void sqrshlr(const ZRegister& zd,
6467                const PRegisterM& pg,
6468                const ZRegister& zn,
6469                const ZRegister& zm);
6470 
6471   // Signed saturating rounding shift right narrow by immediate (bottom).
6472   void sqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6473 
6474   // Signed saturating rounding shift right narrow by immediate (top).
6475   void sqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6476 
6477   // Signed saturating rounding shift right unsigned narrow by immediate
6478   // (bottom).
6479   void sqrshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
6480 
6481   // Signed saturating rounding shift right unsigned narrow by immediate
6482   // (top).
6483   void sqrshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
6484 
6485   // Signed saturating shift left by immediate.
6486   void sqshl(const ZRegister& zd,
6487              const PRegisterM& pg,
6488              const ZRegister& zn,
6489              int shift);
6490 
6491   // Signed saturating shift left by vector (predicated).
6492   void sqshl(const ZRegister& zd,
6493              const PRegisterM& pg,
6494              const ZRegister& zn,
6495              const ZRegister& zm);
6496 
6497   // Signed saturating shift left reversed vectors (predicated).
6498   void sqshlr(const ZRegister& zd,
6499               const PRegisterM& pg,
6500               const ZRegister& zn,
6501               const ZRegister& zm);
6502 
6503   // Signed saturating shift left unsigned by immediate.
6504   void sqshlu(const ZRegister& zd,
6505               const PRegisterM& pg,
6506               const ZRegister& zn,
6507               int shift);
6508 
6509   // Signed saturating shift right narrow by immediate (bottom).
6510   void sqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6511 
6512   // Signed saturating shift right narrow by immediate (top).
6513   void sqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6514 
6515   // Signed saturating shift right unsigned narrow by immediate (bottom).
6516   void sqshrunb(const ZRegister& zd, const ZRegister& zn, int shift);
6517 
6518   // Signed saturating shift right unsigned narrow by immediate (top).
6519   void sqshrunt(const ZRegister& zd, const ZRegister& zn, int shift);
6520 
6521   // Signed saturating subtraction (predicated).
6522   void sqsub(const ZRegister& zd,
6523              const PRegisterM& pg,
6524              const ZRegister& zn,
6525              const ZRegister& zm);
6526 
6527   // Signed saturating subtraction reversed vectors (predicated).
6528   void sqsubr(const ZRegister& zd,
6529               const PRegisterM& pg,
6530               const ZRegister& zn,
6531               const ZRegister& zm);
6532 
6533   // Signed saturating extract narrow (bottom).
6534   void sqxtnb(const ZRegister& zd, const ZRegister& zn);
6535 
6536   // Signed saturating extract narrow (top).
6537   void sqxtnt(const ZRegister& zd, const ZRegister& zn);
6538 
6539   // Signed saturating unsigned extract narrow (bottom).
6540   void sqxtunb(const ZRegister& zd, const ZRegister& zn);
6541 
6542   // Signed saturating unsigned extract narrow (top).
6543   void sqxtunt(const ZRegister& zd, const ZRegister& zn);
6544 
6545   // Signed rounding halving addition.
6546   void srhadd(const ZRegister& zd,
6547               const PRegisterM& pg,
6548               const ZRegister& zn,
6549               const ZRegister& zm);
6550 
6551   // Shift right and insert (immediate).
6552   void sri(const ZRegister& zd, const ZRegister& zn, int shift);
6553 
6554   // Signed rounding shift left by vector (predicated).
6555   void srshl(const ZRegister& zd,
6556              const PRegisterM& pg,
6557              const ZRegister& zn,
6558              const ZRegister& zm);
6559 
6560   // Signed rounding shift left reversed vectors (predicated).
6561   void srshlr(const ZRegister& zd,
6562               const PRegisterM& pg,
6563               const ZRegister& zn,
6564               const ZRegister& zm);
6565 
6566   // Signed rounding shift right by immediate.
6567   void srshr(const ZRegister& zd,
6568              const PRegisterM& pg,
6569              const ZRegister& zn,
6570              int shift);
6571 
6572   // Signed rounding shift right and accumulate (immediate).
6573   void srsra(const ZRegister& zda, const ZRegister& zn, int shift);
6574 
6575   // Signed shift left long by immediate (bottom).
6576   void sshllb(const ZRegister& zd, const ZRegister& zn, int shift);
6577 
6578   // Signed shift left long by immediate (top).
6579   void sshllt(const ZRegister& zd, const ZRegister& zn, int shift);
6580 
6581   // Signed shift right and accumulate (immediate).
6582   void ssra(const ZRegister& zda, const ZRegister& zn, int shift);
6583 
6584   // Signed subtract long (bottom).
6585   void ssublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6586 
6587   // Signed subtract long (bottom - top).
6588   void ssublbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6589 
6590   // Signed subtract long (top).
6591   void ssublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6592 
6593   // Signed subtract long (top - bottom).
6594   void ssubltb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6595 
6596   // Signed subtract wide (bottom).
6597   void ssubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6598 
6599   // Signed subtract wide (top).
6600   void ssubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6601 
6602   // Subtract narrow high part (bottom).
6603   void subhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6604 
6605   // Subtract narrow high part (top).
6606   void subhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6607 
6608   // Signed saturating addition of unsigned value.
6609   void suqadd(const ZRegister& zd,
6610               const PRegisterM& pg,
6611               const ZRegister& zn,
6612               const ZRegister& zm);
6613 
6614   // Programmable table lookup in one or two vector table (zeroing).
6615   void tbl(const ZRegister& zd,
6616            const ZRegister& zn1,
6617            const ZRegister& zn2,
6618            const ZRegister& zm);
6619 
6620   // Programmable table lookup in single vector table (merging).
6621   void tbx(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6622 
6623   // Unsigned absolute difference and accumulate.
6624   void uaba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6625 
6626   // Unsigned absolute difference and accumulate long (bottom).
6627   void uabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6628 
6629   // Unsigned absolute difference and accumulate long (top).
6630   void uabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6631 
6632   // Unsigned absolute difference long (bottom).
6633   void uabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6634 
6635   // Unsigned absolute difference long (top).
6636   void uabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6637 
6638   // Unsigned add and accumulate long pairwise.
6639   void uadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn);
6640 
6641   // Unsigned add long (bottom).
6642   void uaddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6643 
6644   // Unsigned add long (top).
6645   void uaddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6646 
6647   // Unsigned add wide (bottom).
6648   void uaddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6649 
6650   // Unsigned add wide (top).
6651   void uaddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6652 
6653   // Unsigned halving addition.
6654   void uhadd(const ZRegister& zd,
6655              const PRegisterM& pg,
6656              const ZRegister& zn,
6657              const ZRegister& zm);
6658 
6659   // Unsigned halving subtract.
6660   void uhsub(const ZRegister& zd,
6661              const PRegisterM& pg,
6662              const ZRegister& zn,
6663              const ZRegister& zm);
6664 
6665   // Unsigned halving subtract reversed vectors.
6666   void uhsubr(const ZRegister& zd,
6667               const PRegisterM& pg,
6668               const ZRegister& zn,
6669               const ZRegister& zm);
6670 
6671   // Unsigned maximum pairwise.
6672   void umaxp(const ZRegister& zd,
6673              const PRegisterM& pg,
6674              const ZRegister& zn,
6675              const ZRegister& zm);
6676 
6677   // Unsigned minimum pairwise.
6678   void uminp(const ZRegister& zd,
6679              const PRegisterM& pg,
6680              const ZRegister& zn,
6681              const ZRegister& zm);
6682 
6683   // Unsigned multiply-add long to accumulator (bottom, indexed).
6684   void umlalb(const ZRegister& zda,
6685               const ZRegister& zn,
6686               const ZRegister& zm,
6687               int index);
6688 
6689   // Unsigned multiply-add long to accumulator (bottom).
6690   void umlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6691 
6692   // Unsigned multiply-add long to accumulator (top, indexed).
6693   void umlalt(const ZRegister& zda,
6694               const ZRegister& zn,
6695               const ZRegister& zm,
6696               int index);
6697 
6698   // Unsigned multiply-add long to accumulator (top).
6699   void umlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6700 
6701   // Unsigned multiply-subtract long from accumulator (bottom, indexed).
6702   void umlslb(const ZRegister& zda,
6703               const ZRegister& zn,
6704               const ZRegister& zm,
6705               int index);
6706 
6707   // Unsigned multiply-subtract long from accumulator (bottom).
6708   void umlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6709 
6710   // Unsigned multiply-subtract long from accumulator (top, indexed).
6711   void umlslt(const ZRegister& zda,
6712               const ZRegister& zn,
6713               const ZRegister& zm,
6714               int index);
6715 
6716   // Unsigned multiply-subtract long from accumulator (top).
6717   void umlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6718 
6719   // Unsigned multiply returning high half (unpredicated).
6720   void umulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6721 
6722   // Unsigned multiply long (bottom, indexed).
6723   void umullb(const ZRegister& zd,
6724               const ZRegister& zn,
6725               const ZRegister& zm,
6726               int index);
6727 
6728   // Unsigned multiply long (bottom).
6729   void umullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6730 
6731   // Unsigned multiply long (top, indexed).
6732   void umullt(const ZRegister& zd,
6733               const ZRegister& zn,
6734               const ZRegister& zm,
6735               int index);
6736 
6737   // Unsigned multiply long (top).
6738   void umullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6739 
6740   // Unsigned saturating addition (predicated).
6741   void uqadd(const ZRegister& zd,
6742              const PRegisterM& pg,
6743              const ZRegister& zn,
6744              const ZRegister& zm);
6745 
6746   // Unsigned saturating rounding shift left by vector (predicated).
6747   void uqrshl(const ZRegister& zd,
6748               const PRegisterM& pg,
6749               const ZRegister& zn,
6750               const ZRegister& zm);
6751 
6752   // Unsigned saturating rounding shift left reversed vectors (predicated).
6753   void uqrshlr(const ZRegister& zd,
6754                const PRegisterM& pg,
6755                const ZRegister& zn,
6756                const ZRegister& zm);
6757 
6758   // Unsigned saturating rounding shift right narrow by immediate (bottom).
6759   void uqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6760 
6761   // Unsigned saturating rounding shift right narrow by immediate (top).
6762   void uqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6763 
6764   // Unsigned saturating shift left by immediate.
6765   void uqshl(const ZRegister& zd,
6766              const PRegisterM& pg,
6767              const ZRegister& zn,
6768              int shift);
6769 
6770   // Unsigned saturating shift left by vector (predicated).
6771   void uqshl(const ZRegister& zd,
6772              const PRegisterM& pg,
6773              const ZRegister& zn,
6774              const ZRegister& zm);
6775 
6776   // Unsigned saturating shift left reversed vectors (predicated).
6777   void uqshlr(const ZRegister& zd,
6778               const PRegisterM& pg,
6779               const ZRegister& zn,
6780               const ZRegister& zm);
6781 
6782   // Unsigned saturating shift right narrow by immediate (bottom).
6783   void uqshrnb(const ZRegister& zd, const ZRegister& zn, int shift);
6784 
6785   // Unsigned saturating shift right narrow by immediate (top).
6786   void uqshrnt(const ZRegister& zd, const ZRegister& zn, int shift);
6787 
6788   // Unsigned saturating subtraction (predicated).
6789   void uqsub(const ZRegister& zd,
6790              const PRegisterM& pg,
6791              const ZRegister& zn,
6792              const ZRegister& zm);
6793 
6794   // Unsigned saturating subtraction reversed vectors (predicated).
6795   void uqsubr(const ZRegister& zd,
6796               const PRegisterM& pg,
6797               const ZRegister& zn,
6798               const ZRegister& zm);
6799 
6800   // Unsigned saturating extract narrow (bottom).
6801   void uqxtnb(const ZRegister& zd, const ZRegister& zn);
6802 
6803   // Unsigned saturating extract narrow (top).
6804   void uqxtnt(const ZRegister& zd, const ZRegister& zn);
6805 
6806   // Unsigned reciprocal estimate (predicated).
6807   void urecpe(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6808 
6809   // Unsigned rounding halving addition.
6810   void urhadd(const ZRegister& zd,
6811               const PRegisterM& pg,
6812               const ZRegister& zn,
6813               const ZRegister& zm);
6814 
6815   // Unsigned rounding shift left by vector (predicated).
6816   void urshl(const ZRegister& zd,
6817              const PRegisterM& pg,
6818              const ZRegister& zn,
6819              const ZRegister& zm);
6820 
6821   // Unsigned rounding shift left reversed vectors (predicated).
6822   void urshlr(const ZRegister& zd,
6823               const PRegisterM& pg,
6824               const ZRegister& zn,
6825               const ZRegister& zm);
6826 
6827   // Unsigned rounding shift right by immediate.
6828   void urshr(const ZRegister& zd,
6829              const PRegisterM& pg,
6830              const ZRegister& zn,
6831              int shift);
6832 
6833   // Unsigned reciprocal square root estimate (predicated).
6834   void ursqrte(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn);
6835 
6836   // Unsigned rounding shift right and accumulate (immediate).
6837   void ursra(const ZRegister& zda, const ZRegister& zn, int shift);
6838 
6839   // Unsigned shift left long by immediate (bottom).
6840   void ushllb(const ZRegister& zd, const ZRegister& zn, int shift);
6841 
6842   // Unsigned shift left long by immediate (top).
6843   void ushllt(const ZRegister& zd, const ZRegister& zn, int shift);
6844 
6845   // Unsigned saturating addition of signed value.
6846   void usqadd(const ZRegister& zd,
6847               const PRegisterM& pg,
6848               const ZRegister& zn,
6849               const ZRegister& zm);
6850 
6851   // Unsigned shift right and accumulate (immediate).
6852   void usra(const ZRegister& zda, const ZRegister& zn, int shift);
6853 
6854   // Unsigned subtract long (bottom).
6855   void usublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6856 
6857   // Unsigned subtract long (top).
6858   void usublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6859 
6860   // Unsigned subtract wide (bottom).
6861   void usubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6862 
6863   // Unsigned subtract wide (top).
6864   void usubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm);
6865 
6866   // While decrementing signed scalar greater than or equal to scalar.
6867   void whilege(const PRegisterWithLaneSize& pd,
6868                const Register& rn,
6869                const Register& rm);
6870 
6871   // While decrementing signed scalar greater than scalar.
6872   void whilegt(const PRegisterWithLaneSize& pd,
6873                const Register& rn,
6874                const Register& rm);
6875 
6876   // While decrementing unsigned scalar higher than scalar.
6877   void whilehi(const PRegisterWithLaneSize& pd,
6878                const Register& rn,
6879                const Register& rm);
6880 
6881   // While decrementing unsigned scalar higher or same as scalar.
6882   void whilehs(const PRegisterWithLaneSize& pd,
6883                const Register& rn,
6884                const Register& rm);
6885 
6886   // While free of read-after-write conflicts.
6887   void whilerw(const PRegisterWithLaneSize& pd,
6888                const Register& rn,
6889                const Register& rm);
6890 
6891   // While free of write-after-read/write conflicts.
6892   void whilewr(const PRegisterWithLaneSize& pd,
6893                const Register& rn,
6894                const Register& rm);
6895 
6896   // Bitwise exclusive OR and rotate right by immediate.
6897   void xar(const ZRegister& zd,
6898            const ZRegister& zn,
6899            const ZRegister& zm,
6900            int shift);
6901 
6902   // Floating-point matrix multiply-accumulate.
6903   void fmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6904 
6905   // Signed integer matrix multiply-accumulate.
6906   void smmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6907 
6908   // Unsigned by signed integer matrix multiply-accumulate.
6909   void usmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6910 
6911   // Unsigned integer matrix multiply-accumulate.
6912   void ummla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6913 
6914   // Unsigned by signed integer dot product.
6915   void usdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm);
6916 
6917   // Unsigned by signed integer indexed dot product.
6918   void usdot(const ZRegister& zda,
6919              const ZRegister& zn,
6920              const ZRegister& zm,
6921              int index);
6922 
6923   // Signed by unsigned integer indexed dot product.
6924   void sudot(const ZRegister& zda,
6925              const ZRegister& zn,
6926              const ZRegister& zm,
6927              int index);
6928 
6929   // Emit generic instructions.
6930 
6931   // Emit raw instructions into the instruction stream.
dci(Instr raw_inst)6932   void dci(Instr raw_inst) { Emit(raw_inst); }
6933 
6934   // Emit 32 bits of data into the instruction stream.
dc32(uint32_t data)6935   void dc32(uint32_t data) { dc(data); }
6936 
6937   // Emit 64 bits of data into the instruction stream.
dc64(uint64_t data)6938   void dc64(uint64_t data) { dc(data); }
6939 
6940   // Emit data in the instruction stream.
6941   template <typename T>
dc(T data)6942   void dc(T data) {
6943     VIXL_ASSERT(AllowAssembler());
6944     GetBuffer()->Emit<T>(data);
6945   }
6946 
6947   // Copy a string into the instruction stream, including the terminating NULL
6948   // character. The instruction pointer is then aligned correctly for
6949   // subsequent instructions.
EmitString(const char * string)6950   void EmitString(const char* string) {
6951     VIXL_ASSERT(string != NULL);
6952     VIXL_ASSERT(AllowAssembler());
6953 
6954     GetBuffer()->EmitString(string);
6955     GetBuffer()->Align();
6956   }
6957 
6958   // Code generation helpers.
6959   static bool OneInstrMoveImmediateHelper(Assembler* assm,
6960                                           const Register& dst,
6961                                           uint64_t imm);
6962 
6963   // Register encoding.
6964   template <int hibit, int lobit>
Rx(CPURegister rx)6965   static Instr Rx(CPURegister rx) {
6966     VIXL_ASSERT(rx.GetCode() != kSPRegInternalCode);
6967     return ImmUnsignedField<hibit, lobit>(rx.GetCode());
6968   }
6969 
6970 #define CPU_REGISTER_FIELD_NAMES(V) V(d) V(n) V(m) V(a) V(t) V(t2) V(s)
6971 #define REGISTER_ENCODER(N)                                           \
6972   static Instr R##N(CPURegister r##N) {                               \
6973     return Rx<R##N##_offset + R##N##_width - 1, R##N##_offset>(r##N); \
6974   }
CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)6975   CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER)
6976 #undef REGISTER_ENCODER
6977 #undef CPU_REGISTER_FIELD_NAMES
6978 
6979   static Instr RmNot31(CPURegister rm) {
6980     VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
6981     VIXL_ASSERT(!rm.IsZero());
6982     return Rm(rm);
6983   }
6984 
6985   // These encoding functions allow the stack pointer to be encoded, and
6986   // disallow the zero register.
RdSP(Register rd)6987   static Instr RdSP(Register rd) {
6988     VIXL_ASSERT(!rd.IsZero());
6989     return (rd.GetCode() & kRegCodeMask) << Rd_offset;
6990   }
6991 
RnSP(Register rn)6992   static Instr RnSP(Register rn) {
6993     VIXL_ASSERT(!rn.IsZero());
6994     return (rn.GetCode() & kRegCodeMask) << Rn_offset;
6995   }
6996 
RmSP(Register rm)6997   static Instr RmSP(Register rm) {
6998     VIXL_ASSERT(!rm.IsZero());
6999     return (rm.GetCode() & kRegCodeMask) << Rm_offset;
7000   }
7001 
Pd(PRegister pd)7002   static Instr Pd(PRegister pd) {
7003     return Rx<Pd_offset + Pd_width - 1, Pd_offset>(pd);
7004   }
7005 
Pm(PRegister pm)7006   static Instr Pm(PRegister pm) {
7007     return Rx<Pm_offset + Pm_width - 1, Pm_offset>(pm);
7008   }
7009 
Pn(PRegister pn)7010   static Instr Pn(PRegister pn) {
7011     return Rx<Pn_offset + Pn_width - 1, Pn_offset>(pn);
7012   }
7013 
PgLow8(PRegister pg)7014   static Instr PgLow8(PRegister pg) {
7015     // Governing predicates can be merging, zeroing, or unqualified. They should
7016     // never have a lane size.
7017     VIXL_ASSERT(!pg.HasLaneSize());
7018     return Rx<PgLow8_offset + PgLow8_width - 1, PgLow8_offset>(pg);
7019   }
7020 
7021   template <int hibit, int lobit>
Pg(PRegister pg)7022   static Instr Pg(PRegister pg) {
7023     // Governing predicates can be merging, zeroing, or unqualified. They should
7024     // never have a lane size.
7025     VIXL_ASSERT(!pg.HasLaneSize());
7026     return Rx<hibit, lobit>(pg);
7027   }
7028 
7029   // Flags encoding.
Flags(FlagsUpdate S)7030   static Instr Flags(FlagsUpdate S) {
7031     if (S == SetFlags) {
7032       return 1 << FlagsUpdate_offset;
7033     } else if (S == LeaveFlags) {
7034       return 0 << FlagsUpdate_offset;
7035     }
7036     VIXL_UNREACHABLE();
7037     return 0;
7038   }
7039 
Cond(Condition cond)7040   static Instr Cond(Condition cond) { return cond << Condition_offset; }
7041 
7042   // Generic immediate encoding.
7043   template <int hibit, int lobit>
ImmField(int64_t imm)7044   static Instr ImmField(int64_t imm) {
7045     VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
7046     VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
7047     int fieldsize = hibit - lobit + 1;
7048     VIXL_ASSERT(IsIntN(fieldsize, imm));
7049     return static_cast<Instr>(TruncateToUintN(fieldsize, imm) << lobit);
7050   }
7051 
7052   // For unsigned immediate encoding.
7053   // TODO: Handle signed and unsigned immediate in satisfactory way.
7054   template <int hibit, int lobit>
ImmUnsignedField(uint64_t imm)7055   static Instr ImmUnsignedField(uint64_t imm) {
7056     VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
7057     VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
7058     VIXL_ASSERT(IsUintN(hibit - lobit + 1, imm));
7059     return static_cast<Instr>(imm << lobit);
7060   }
7061 
7062   // PC-relative address encoding.
ImmPCRelAddress(int64_t imm21)7063   static Instr ImmPCRelAddress(int64_t imm21) {
7064     VIXL_ASSERT(IsInt21(imm21));
7065     Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
7066     Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
7067     Instr immlo = imm << ImmPCRelLo_offset;
7068     return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
7069   }
7070 
7071   // Branch encoding.
ImmUncondBranch(int64_t imm26)7072   static Instr ImmUncondBranch(int64_t imm26) {
7073     VIXL_ASSERT(IsInt26(imm26));
7074     return TruncateToUint26(imm26) << ImmUncondBranch_offset;
7075   }
7076 
ImmCondBranch(int64_t imm19)7077   static Instr ImmCondBranch(int64_t imm19) {
7078     VIXL_ASSERT(IsInt19(imm19));
7079     return TruncateToUint19(imm19) << ImmCondBranch_offset;
7080   }
7081 
ImmCmpBranch(int64_t imm19)7082   static Instr ImmCmpBranch(int64_t imm19) {
7083     VIXL_ASSERT(IsInt19(imm19));
7084     return TruncateToUint19(imm19) << ImmCmpBranch_offset;
7085   }
7086 
ImmTestBranch(int64_t imm14)7087   static Instr ImmTestBranch(int64_t imm14) {
7088     VIXL_ASSERT(IsInt14(imm14));
7089     return TruncateToUint14(imm14) << ImmTestBranch_offset;
7090   }
7091 
ImmTestBranchBit(unsigned bit_pos)7092   static Instr ImmTestBranchBit(unsigned bit_pos) {
7093     VIXL_ASSERT(IsUint6(bit_pos));
7094     // Subtract five from the shift offset, as we need bit 5 from bit_pos.
7095     unsigned bit5 = bit_pos << (ImmTestBranchBit5_offset - 5);
7096     unsigned bit40 = bit_pos << ImmTestBranchBit40_offset;
7097     bit5 &= ImmTestBranchBit5_mask;
7098     bit40 &= ImmTestBranchBit40_mask;
7099     return bit5 | bit40;
7100   }
7101 
7102   // Data Processing encoding.
SF(Register rd)7103   static Instr SF(Register rd) {
7104     return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
7105   }
7106 
ImmAddSub(int imm)7107   static Instr ImmAddSub(int imm) {
7108     VIXL_ASSERT(IsImmAddSub(imm));
7109     if (IsUint12(imm)) {  // No shift required.
7110       imm <<= ImmAddSub_offset;
7111     } else {
7112       imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ImmAddSubShift_offset);
7113     }
7114     return imm;
7115   }
7116 
SVEImmSetBits(unsigned imms,unsigned lane_size)7117   static Instr SVEImmSetBits(unsigned imms, unsigned lane_size) {
7118     VIXL_ASSERT(IsUint6(imms));
7119     VIXL_ASSERT((lane_size == kDRegSize) || IsUint6(imms + 3));
7120     USE(lane_size);
7121     return imms << SVEImmSetBits_offset;
7122   }
7123 
SVEImmRotate(unsigned immr,unsigned lane_size)7124   static Instr SVEImmRotate(unsigned immr, unsigned lane_size) {
7125     VIXL_ASSERT(IsUintN(WhichPowerOf2(lane_size), immr));
7126     USE(lane_size);
7127     return immr << SVEImmRotate_offset;
7128   }
7129 
SVEBitN(unsigned bitn)7130   static Instr SVEBitN(unsigned bitn) {
7131     VIXL_ASSERT(IsUint1(bitn));
7132     return bitn << SVEBitN_offset;
7133   }
7134 
7135   static Instr SVEDtype(unsigned msize_in_bytes_log2,
7136                         unsigned esize_in_bytes_log2,
7137                         bool is_signed,
7138                         int dtype_h_lsb = 23,
7139                         int dtype_l_lsb = 21) {
7140     VIXL_ASSERT(msize_in_bytes_log2 <= kDRegSizeInBytesLog2);
7141     VIXL_ASSERT(esize_in_bytes_log2 <= kDRegSizeInBytesLog2);
7142     Instr dtype_h = msize_in_bytes_log2;
7143     Instr dtype_l = esize_in_bytes_log2;
7144     // Signed forms use the encodings where msize would be greater than esize.
7145     if (is_signed) {
7146       dtype_h = dtype_h ^ 0x3;
7147       dtype_l = dtype_l ^ 0x3;
7148     }
7149     VIXL_ASSERT(IsUint2(dtype_h));
7150     VIXL_ASSERT(IsUint2(dtype_l));
7151     VIXL_ASSERT((dtype_h > dtype_l) == is_signed);
7152 
7153     return (dtype_h << dtype_h_lsb) | (dtype_l << dtype_l_lsb);
7154   }
7155 
SVEDtypeSplit(unsigned msize_in_bytes_log2,unsigned esize_in_bytes_log2,bool is_signed)7156   static Instr SVEDtypeSplit(unsigned msize_in_bytes_log2,
7157                              unsigned esize_in_bytes_log2,
7158                              bool is_signed) {
7159     return SVEDtype(msize_in_bytes_log2,
7160                     esize_in_bytes_log2,
7161                     is_signed,
7162                     23,
7163                     13);
7164   }
7165 
ImmS(unsigned imms,unsigned reg_size)7166   static Instr ImmS(unsigned imms, unsigned reg_size) {
7167     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
7168                 ((reg_size == kWRegSize) && IsUint5(imms)));
7169     USE(reg_size);
7170     return imms << ImmS_offset;
7171   }
7172 
ImmR(unsigned immr,unsigned reg_size)7173   static Instr ImmR(unsigned immr, unsigned reg_size) {
7174     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
7175                 ((reg_size == kWRegSize) && IsUint5(immr)));
7176     USE(reg_size);
7177     VIXL_ASSERT(IsUint6(immr));
7178     return immr << ImmR_offset;
7179   }
7180 
ImmSetBits(unsigned imms,unsigned reg_size)7181   static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
7182     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7183     VIXL_ASSERT(IsUint6(imms));
7184     VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
7185     USE(reg_size);
7186     return imms << ImmSetBits_offset;
7187   }
7188 
ImmRotate(unsigned immr,unsigned reg_size)7189   static Instr ImmRotate(unsigned immr, unsigned reg_size) {
7190     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7191     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
7192                 ((reg_size == kWRegSize) && IsUint5(immr)));
7193     USE(reg_size);
7194     return immr << ImmRotate_offset;
7195   }
7196 
ImmLLiteral(int64_t imm19)7197   static Instr ImmLLiteral(int64_t imm19) {
7198     VIXL_ASSERT(IsInt19(imm19));
7199     return TruncateToUint19(imm19) << ImmLLiteral_offset;
7200   }
7201 
BitN(unsigned bitn,unsigned reg_size)7202   static Instr BitN(unsigned bitn, unsigned reg_size) {
7203     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
7204     VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
7205     USE(reg_size);
7206     return bitn << BitN_offset;
7207   }
7208 
ShiftDP(Shift shift)7209   static Instr ShiftDP(Shift shift) {
7210     VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
7211     return shift << ShiftDP_offset;
7212   }
7213 
ImmDPShift(unsigned amount)7214   static Instr ImmDPShift(unsigned amount) {
7215     VIXL_ASSERT(IsUint6(amount));
7216     return amount << ImmDPShift_offset;
7217   }
7218 
ExtendMode(Extend extend)7219   static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; }
7220 
ImmExtendShift(unsigned left_shift)7221   static Instr ImmExtendShift(unsigned left_shift) {
7222     VIXL_ASSERT(left_shift <= 4);
7223     return left_shift << ImmExtendShift_offset;
7224   }
7225 
ImmCondCmp(unsigned imm)7226   static Instr ImmCondCmp(unsigned imm) {
7227     VIXL_ASSERT(IsUint5(imm));
7228     return imm << ImmCondCmp_offset;
7229   }
7230 
Nzcv(StatusFlags nzcv)7231   static Instr Nzcv(StatusFlags nzcv) {
7232     return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
7233   }
7234 
7235   // MemOperand offset encoding.
ImmLSUnsigned(int64_t imm12)7236   static Instr ImmLSUnsigned(int64_t imm12) {
7237     VIXL_ASSERT(IsUint12(imm12));
7238     return TruncateToUint12(imm12) << ImmLSUnsigned_offset;
7239   }
7240 
ImmLS(int64_t imm9)7241   static Instr ImmLS(int64_t imm9) {
7242     VIXL_ASSERT(IsInt9(imm9));
7243     return TruncateToUint9(imm9) << ImmLS_offset;
7244   }
7245 
ImmLSPair(int64_t imm7,unsigned access_size_in_bytes_log2)7246   static Instr ImmLSPair(int64_t imm7, unsigned access_size_in_bytes_log2) {
7247     VIXL_ASSERT(IsMultiple(imm7, 1 << access_size_in_bytes_log2));
7248     int64_t scaled_imm7 = imm7 / (1 << access_size_in_bytes_log2);
7249     VIXL_ASSERT(IsInt7(scaled_imm7));
7250     return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
7251   }
7252 
ImmShiftLS(unsigned shift_amount)7253   static Instr ImmShiftLS(unsigned shift_amount) {
7254     VIXL_ASSERT(IsUint1(shift_amount));
7255     return shift_amount << ImmShiftLS_offset;
7256   }
7257 
ImmLSPAC(int64_t imm10)7258   static Instr ImmLSPAC(int64_t imm10) {
7259     VIXL_ASSERT(IsMultiple(imm10, 1 << 3));
7260     int64_t scaled_imm10 = imm10 / (1 << 3);
7261     VIXL_ASSERT(IsInt10(scaled_imm10));
7262     uint32_t s_bit = (scaled_imm10 >> 9) & 1;
7263     return (s_bit << ImmLSPACHi_offset) |
7264            (TruncateToUint9(scaled_imm10) << ImmLSPACLo_offset);
7265   }
7266 
ImmPrefetchOperation(int imm5)7267   static Instr ImmPrefetchOperation(int imm5) {
7268     VIXL_ASSERT(IsUint5(imm5));
7269     return imm5 << ImmPrefetchOperation_offset;
7270   }
7271 
ImmException(int imm16)7272   static Instr ImmException(int imm16) {
7273     VIXL_ASSERT(IsUint16(imm16));
7274     return imm16 << ImmException_offset;
7275   }
7276 
ImmUdf(int imm16)7277   static Instr ImmUdf(int imm16) {
7278     VIXL_ASSERT(IsUint16(imm16));
7279     return imm16 << ImmUdf_offset;
7280   }
7281 
ImmSystemRegister(int imm16)7282   static Instr ImmSystemRegister(int imm16) {
7283     VIXL_ASSERT(IsUint16(imm16));
7284     return imm16 << ImmSystemRegister_offset;
7285   }
7286 
ImmRMIFRotation(int imm6)7287   static Instr ImmRMIFRotation(int imm6) {
7288     VIXL_ASSERT(IsUint6(imm6));
7289     return imm6 << ImmRMIFRotation_offset;
7290   }
7291 
ImmHint(int imm7)7292   static Instr ImmHint(int imm7) {
7293     VIXL_ASSERT(IsUint7(imm7));
7294     return imm7 << ImmHint_offset;
7295   }
7296 
CRm(int imm4)7297   static Instr CRm(int imm4) {
7298     VIXL_ASSERT(IsUint4(imm4));
7299     return imm4 << CRm_offset;
7300   }
7301 
CRn(int imm4)7302   static Instr CRn(int imm4) {
7303     VIXL_ASSERT(IsUint4(imm4));
7304     return imm4 << CRn_offset;
7305   }
7306 
SysOp(int imm14)7307   static Instr SysOp(int imm14) {
7308     VIXL_ASSERT(IsUint14(imm14));
7309     return imm14 << SysOp_offset;
7310   }
7311 
ImmSysOp1(int imm3)7312   static Instr ImmSysOp1(int imm3) {
7313     VIXL_ASSERT(IsUint3(imm3));
7314     return imm3 << SysOp1_offset;
7315   }
7316 
ImmSysOp2(int imm3)7317   static Instr ImmSysOp2(int imm3) {
7318     VIXL_ASSERT(IsUint3(imm3));
7319     return imm3 << SysOp2_offset;
7320   }
7321 
ImmBarrierDomain(int imm2)7322   static Instr ImmBarrierDomain(int imm2) {
7323     VIXL_ASSERT(IsUint2(imm2));
7324     return imm2 << ImmBarrierDomain_offset;
7325   }
7326 
ImmBarrierType(int imm2)7327   static Instr ImmBarrierType(int imm2) {
7328     VIXL_ASSERT(IsUint2(imm2));
7329     return imm2 << ImmBarrierType_offset;
7330   }
7331 
7332   // Move immediates encoding.
ImmMoveWide(uint64_t imm)7333   static Instr ImmMoveWide(uint64_t imm) {
7334     VIXL_ASSERT(IsUint16(imm));
7335     return static_cast<Instr>(imm << ImmMoveWide_offset);
7336   }
7337 
ShiftMoveWide(int64_t shift)7338   static Instr ShiftMoveWide(int64_t shift) {
7339     VIXL_ASSERT(IsUint2(shift));
7340     return static_cast<Instr>(shift << ShiftMoveWide_offset);
7341   }
7342 
7343   // FP Immediates.
7344   static Instr ImmFP16(Float16 imm);
7345   static Instr ImmFP32(float imm);
7346   static Instr ImmFP64(double imm);
7347 
7348   // FP register type.
FPType(VRegister fd)7349   static Instr FPType(VRegister fd) {
7350     VIXL_ASSERT(fd.IsScalar());
7351     switch (fd.GetSizeInBits()) {
7352       case 16:
7353         return FP16;
7354       case 32:
7355         return FP32;
7356       case 64:
7357         return FP64;
7358       default:
7359         VIXL_UNREACHABLE();
7360         return 0;
7361     }
7362   }
7363 
FPScale(unsigned scale)7364   static Instr FPScale(unsigned scale) {
7365     VIXL_ASSERT(IsUint6(scale));
7366     return scale << FPScale_offset;
7367   }
7368 
7369   // Immediate field checking helpers.
7370   static bool IsImmAddSub(int64_t immediate);
7371   static bool IsImmConditionalCompare(int64_t immediate);
7372   static bool IsImmFP16(Float16 imm);
7373   static bool IsImmFP32(float imm);
7374   static bool IsImmFP64(double imm);
7375   static bool IsImmLogical(uint64_t value,
7376                            unsigned width,
7377                            unsigned* n = NULL,
7378                            unsigned* imm_s = NULL,
7379                            unsigned* imm_r = NULL);
7380   static bool IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2);
7381   static bool IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2);
7382   static bool IsImmLSUnscaled(int64_t offset);
7383   static bool IsImmMovn(uint64_t imm, unsigned reg_size);
7384   static bool IsImmMovz(uint64_t imm, unsigned reg_size);
7385 
7386   // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)7387   static Instr VFormat(VRegister vd) {
7388     if (vd.Is64Bits()) {
7389       switch (vd.GetLanes()) {
7390         case 2:
7391           return NEON_2S;
7392         case 4:
7393           return NEON_4H;
7394         case 8:
7395           return NEON_8B;
7396         default:
7397           return 0xffffffff;
7398       }
7399     } else {
7400       VIXL_ASSERT(vd.Is128Bits());
7401       switch (vd.GetLanes()) {
7402         case 2:
7403           return NEON_2D;
7404         case 4:
7405           return NEON_4S;
7406         case 8:
7407           return NEON_8H;
7408         case 16:
7409           return NEON_16B;
7410         default:
7411           return 0xffffffff;
7412       }
7413     }
7414   }
7415 
7416   // Instruction bits for vector format in floating point data processing
7417   // operations.
FPFormat(VRegister vd)7418   static Instr FPFormat(VRegister vd) {
7419     switch (vd.GetLanes()) {
7420       case 1:
7421         // Floating point scalar formats.
7422         switch (vd.GetSizeInBits()) {
7423           case 16:
7424             return FP16;
7425           case 32:
7426             return FP32;
7427           case 64:
7428             return FP64;
7429           default:
7430             VIXL_UNREACHABLE();
7431         }
7432         break;
7433       case 2:
7434         // Two lane floating point vector formats.
7435         switch (vd.GetSizeInBits()) {
7436           case 64:
7437             return NEON_FP_2S;
7438           case 128:
7439             return NEON_FP_2D;
7440           default:
7441             VIXL_UNREACHABLE();
7442         }
7443         break;
7444       case 4:
7445         // Four lane floating point vector formats.
7446         switch (vd.GetSizeInBits()) {
7447           case 64:
7448             return NEON_FP_4H;
7449           case 128:
7450             return NEON_FP_4S;
7451           default:
7452             VIXL_UNREACHABLE();
7453         }
7454         break;
7455       case 8:
7456         // Eight lane floating point vector format.
7457         VIXL_ASSERT(vd.Is128Bits());
7458         return NEON_FP_8H;
7459       default:
7460         VIXL_UNREACHABLE();
7461         return 0;
7462     }
7463     VIXL_UNREACHABLE();
7464     return 0;
7465   }
7466 
7467   // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)7468   static Instr LSVFormat(VRegister vd) {
7469     if (vd.Is64Bits()) {
7470       switch (vd.GetLanes()) {
7471         case 1:
7472           return LS_NEON_1D;
7473         case 2:
7474           return LS_NEON_2S;
7475         case 4:
7476           return LS_NEON_4H;
7477         case 8:
7478           return LS_NEON_8B;
7479         default:
7480           return 0xffffffff;
7481       }
7482     } else {
7483       VIXL_ASSERT(vd.Is128Bits());
7484       switch (vd.GetLanes()) {
7485         case 2:
7486           return LS_NEON_2D;
7487         case 4:
7488           return LS_NEON_4S;
7489         case 8:
7490           return LS_NEON_8H;
7491         case 16:
7492           return LS_NEON_16B;
7493         default:
7494           return 0xffffffff;
7495       }
7496     }
7497   }
7498 
7499   // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)7500   static Instr SFormat(VRegister vd) {
7501     VIXL_ASSERT(vd.GetLanes() == 1);
7502     switch (vd.GetSizeInBytes()) {
7503       case 1:
7504         return NEON_B;
7505       case 2:
7506         return NEON_H;
7507       case 4:
7508         return NEON_S;
7509       case 8:
7510         return NEON_D;
7511       default:
7512         return 0xffffffff;
7513     }
7514   }
7515 
7516   template <typename T>
SVESize(const T & rd)7517   static Instr SVESize(const T& rd) {
7518     VIXL_ASSERT(rd.IsZRegister() || rd.IsPRegister());
7519     VIXL_ASSERT(rd.HasLaneSize());
7520     switch (rd.GetLaneSizeInBytes()) {
7521       case 1:
7522         return SVE_B;
7523       case 2:
7524         return SVE_H;
7525       case 4:
7526         return SVE_S;
7527       case 8:
7528         return SVE_D;
7529       default:
7530         return 0xffffffff;
7531     }
7532   }
7533 
ImmSVEPredicateConstraint(int pattern)7534   static Instr ImmSVEPredicateConstraint(int pattern) {
7535     VIXL_ASSERT(IsUint5(pattern));
7536     return (pattern << ImmSVEPredicateConstraint_offset) &
7537            ImmSVEPredicateConstraint_mask;
7538   }
7539 
ImmNEONHLM(int index,int num_bits)7540   static Instr ImmNEONHLM(int index, int num_bits) {
7541     int h, l, m;
7542     if (num_bits == 3) {
7543       VIXL_ASSERT(IsUint3(index));
7544       h = (index >> 2) & 1;
7545       l = (index >> 1) & 1;
7546       m = (index >> 0) & 1;
7547     } else if (num_bits == 2) {
7548       VIXL_ASSERT(IsUint2(index));
7549       h = (index >> 1) & 1;
7550       l = (index >> 0) & 1;
7551       m = 0;
7552     } else {
7553       VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
7554       h = (index >> 0) & 1;
7555       l = 0;
7556       m = 0;
7557     }
7558     return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
7559   }
7560 
ImmRotFcadd(int rot)7561   static Instr ImmRotFcadd(int rot) {
7562     VIXL_ASSERT(rot == 90 || rot == 270);
7563     return (((rot == 270) ? 1 : 0) << ImmRotFcadd_offset);
7564   }
7565 
ImmRotFcmlaSca(int rot)7566   static Instr ImmRotFcmlaSca(int rot) {
7567     VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
7568     return (rot / 90) << ImmRotFcmlaSca_offset;
7569   }
7570 
ImmRotFcmlaVec(int rot)7571   static Instr ImmRotFcmlaVec(int rot) {
7572     VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270);
7573     return (rot / 90) << ImmRotFcmlaVec_offset;
7574   }
7575 
ImmNEONExt(int imm4)7576   static Instr ImmNEONExt(int imm4) {
7577     VIXL_ASSERT(IsUint4(imm4));
7578     return imm4 << ImmNEONExt_offset;
7579   }
7580 
ImmNEON5(Instr format,int index)7581   static Instr ImmNEON5(Instr format, int index) {
7582     VIXL_ASSERT(IsUint4(index));
7583     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
7584     int imm5 = (index << (s + 1)) | (1 << s);
7585     return imm5 << ImmNEON5_offset;
7586   }
7587 
ImmNEON4(Instr format,int index)7588   static Instr ImmNEON4(Instr format, int index) {
7589     VIXL_ASSERT(IsUint4(index));
7590     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
7591     int imm4 = index << s;
7592     return imm4 << ImmNEON4_offset;
7593   }
7594 
ImmNEONabcdefgh(int imm8)7595   static Instr ImmNEONabcdefgh(int imm8) {
7596     VIXL_ASSERT(IsUint8(imm8));
7597     Instr instr;
7598     instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
7599     instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
7600     return instr;
7601   }
7602 
NEONCmode(int cmode)7603   static Instr NEONCmode(int cmode) {
7604     VIXL_ASSERT(IsUint4(cmode));
7605     return cmode << NEONCmode_offset;
7606   }
7607 
NEONModImmOp(int op)7608   static Instr NEONModImmOp(int op) {
7609     VIXL_ASSERT(IsUint1(op));
7610     return op << NEONModImmOp_offset;
7611   }
7612 
7613   // Size of the code generated since label to the current position.
GetSizeOfCodeGeneratedSince(Label * label)7614   size_t GetSizeOfCodeGeneratedSince(Label* label) const {
7615     VIXL_ASSERT(label->IsBound());
7616     return GetBuffer().GetOffsetFrom(label->GetLocation());
7617   }
7618   VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince",
7619                   size_t SizeOfCodeGeneratedSince(Label* label) const) {
7620     return GetSizeOfCodeGeneratedSince(label);
7621   }
7622 
7623   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
7624                   size_t GetBufferCapacity() const) {
7625     return GetBuffer().GetCapacity();
7626   }
7627   VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) {
7628     return GetBuffer().GetCapacity();
7629   }
7630 
7631   VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
7632                   size_t GetRemainingBufferSpace() const) {
7633     return GetBuffer().GetRemainingBytes();
7634   }
7635   VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
7636                   size_t RemainingBufferSpace() const) {
7637     return GetBuffer().GetRemainingBytes();
7638   }
7639 
GetPic()7640   PositionIndependentCodeOption GetPic() const { return pic_; }
7641   VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) {
7642     return GetPic();
7643   }
7644 
GetCPUFeatures()7645   CPUFeatures* GetCPUFeatures() { return &cpu_features_; }
7646 
SetCPUFeatures(const CPUFeatures & cpu_features)7647   void SetCPUFeatures(const CPUFeatures& cpu_features) {
7648     cpu_features_ = cpu_features;
7649   }
7650 
AllowPageOffsetDependentCode()7651   bool AllowPageOffsetDependentCode() const {
7652     return (GetPic() == PageOffsetDependentCode) ||
7653            (GetPic() == PositionDependentCode);
7654   }
7655 
AppropriateZeroRegFor(const CPURegister & reg)7656   static Register AppropriateZeroRegFor(const CPURegister& reg) {
7657     return reg.Is64Bits() ? Register(xzr) : Register(wzr);
7658   }
7659 
7660  protected:
7661   void LoadStore(const CPURegister& rt,
7662                  const MemOperand& addr,
7663                  LoadStoreOp op,
7664                  LoadStoreScalingOption option = PreferScaledOffset);
7665 
7666   void LoadStorePAC(const Register& xt,
7667                     const MemOperand& addr,
7668                     LoadStorePACOp op);
7669 
7670   void LoadStorePair(const CPURegister& rt,
7671                      const CPURegister& rt2,
7672                      const MemOperand& addr,
7673                      LoadStorePairOp op);
7674   void LoadStoreStruct(const VRegister& vt,
7675                        const MemOperand& addr,
7676                        NEONLoadStoreMultiStructOp op);
7677   void LoadStoreStruct1(const VRegister& vt,
7678                         int reg_count,
7679                         const MemOperand& addr);
7680   void LoadStoreStructSingle(const VRegister& vt,
7681                              uint32_t lane,
7682                              const MemOperand& addr,
7683                              NEONLoadStoreSingleStructOp op);
7684   void LoadStoreStructSingleAllLanes(const VRegister& vt,
7685                                      const MemOperand& addr,
7686                                      NEONLoadStoreSingleStructOp op);
7687   void LoadStoreStructVerify(const VRegister& vt,
7688                              const MemOperand& addr,
7689                              Instr op);
7690 
7691   // Set `is_load` to false in default as it's only used in the
7692   // scalar-plus-vector form.
7693   Instr SVEMemOperandHelper(unsigned msize_in_bytes_log2,
7694                             int num_regs,
7695                             const SVEMemOperand& addr,
7696                             bool is_load = false);
7697 
7698   // E.g. st1b, st1h, ...
7699   // This supports both contiguous and scatter stores.
7700   void SVESt1Helper(unsigned msize_in_bytes_log2,
7701                     const ZRegister& zt,
7702                     const PRegister& pg,
7703                     const SVEMemOperand& addr);
7704 
7705   // E.g. ld1b, ld1h, ...
7706   // This supports both contiguous and gather loads.
7707   void SVELd1Helper(unsigned msize_in_bytes_log2,
7708                     const ZRegister& zt,
7709                     const PRegisterZ& pg,
7710                     const SVEMemOperand& addr,
7711                     bool is_signed);
7712 
7713   // E.g. ld1rb, ld1rh, ...
7714   void SVELd1BroadcastHelper(unsigned msize_in_bytes_log2,
7715                              const ZRegister& zt,
7716                              const PRegisterZ& pg,
7717                              const SVEMemOperand& addr,
7718                              bool is_signed);
7719 
7720   // E.g. ldff1b, ldff1h, ...
7721   // This supports both contiguous and gather loads.
7722   void SVELdff1Helper(unsigned msize_in_bytes_log2,
7723                       const ZRegister& zt,
7724                       const PRegisterZ& pg,
7725                       const SVEMemOperand& addr,
7726                       bool is_signed);
7727 
7728   // Common code for the helpers above.
7729   void SVELdSt1Helper(unsigned msize_in_bytes_log2,
7730                       const ZRegister& zt,
7731                       const PRegister& pg,
7732                       const SVEMemOperand& addr,
7733                       bool is_signed,
7734                       Instr op);
7735 
7736   // Common code for the helpers above.
7737   void SVEScatterGatherHelper(unsigned msize_in_bytes_log2,
7738                               const ZRegister& zt,
7739                               const PRegister& pg,
7740                               const SVEMemOperand& addr,
7741                               bool is_load,
7742                               bool is_signed,
7743                               bool is_first_fault);
7744 
7745   // E.g. st2b, st3h, ...
7746   void SVESt234Helper(int num_regs,
7747                       const ZRegister& zt1,
7748                       const PRegister& pg,
7749                       const SVEMemOperand& addr);
7750 
7751   // E.g. ld2b, ld3h, ...
7752   void SVELd234Helper(int num_regs,
7753                       const ZRegister& zt1,
7754                       const PRegisterZ& pg,
7755                       const SVEMemOperand& addr);
7756 
7757   // Common code for the helpers above.
7758   void SVELdSt234Helper(int num_regs,
7759                         const ZRegister& zt1,
7760                         const PRegister& pg,
7761                         const SVEMemOperand& addr,
7762                         Instr op);
7763 
7764   // E.g. ld1qb, ld1qh, ldnt1b, ...
7765   void SVELd1St1ScaImmHelper(const ZRegister& zt,
7766                              const PRegister& pg,
7767                              const SVEMemOperand& addr,
7768                              Instr regoffset_op,
7769                              Instr immoffset_op,
7770                              int imm_divisor = 1);
7771 
7772   void SVELd1VecScaHelper(const ZRegister& zt,
7773                           const PRegister& pg,
7774                           const SVEMemOperand& addr,
7775                           uint32_t msize,
7776                           bool is_signed);
7777   void SVESt1VecScaHelper(const ZRegister& zt,
7778                           const PRegister& pg,
7779                           const SVEMemOperand& addr,
7780                           uint32_t msize);
7781 
7782   void Prefetch(PrefetchOperation op,
7783                 const MemOperand& addr,
7784                 LoadStoreScalingOption option = PreferScaledOffset);
7785   void Prefetch(int op,
7786                 const MemOperand& addr,
7787                 LoadStoreScalingOption option = PreferScaledOffset);
7788 
7789   // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
7790   // reports a bogus uninitialised warning then.
7791   void Logical(const Register& rd,
7792                const Register& rn,
7793                const Operand operand,
7794                LogicalOp op);
7795 
7796   void SVELogicalImmediate(const ZRegister& zd, uint64_t imm, Instr op);
7797 
7798   void LogicalImmediate(const Register& rd,
7799                         const Register& rn,
7800                         unsigned n,
7801                         unsigned imm_s,
7802                         unsigned imm_r,
7803                         LogicalOp op);
7804 
7805   void ConditionalCompare(const Register& rn,
7806                           const Operand& operand,
7807                           StatusFlags nzcv,
7808                           Condition cond,
7809                           ConditionalCompareOp op);
7810 
7811   void AddSubWithCarry(const Register& rd,
7812                        const Register& rn,
7813                        const Operand& operand,
7814                        FlagsUpdate S,
7815                        AddSubWithCarryOp op);
7816 
7817   void CompareVectors(const PRegisterWithLaneSize& pd,
7818                       const PRegisterZ& pg,
7819                       const ZRegister& zn,
7820                       const ZRegister& zm,
7821                       SVEIntCompareVectorsOp op);
7822 
7823   void CompareVectors(const PRegisterWithLaneSize& pd,
7824                       const PRegisterZ& pg,
7825                       const ZRegister& zn,
7826                       int imm,
7827                       SVEIntCompareSignedImmOp op);
7828 
7829   void CompareVectors(const PRegisterWithLaneSize& pd,
7830                       const PRegisterZ& pg,
7831                       const ZRegister& zn,
7832                       unsigned imm,
7833                       SVEIntCompareUnsignedImmOp op);
7834 
7835   void SVEIntAddSubtractImmUnpredicatedHelper(
7836       SVEIntAddSubtractImm_UnpredicatedOp op,
7837       const ZRegister& zd,
7838       int imm8,
7839       int shift);
7840 
7841   void SVEElementCountToRegisterHelper(Instr op,
7842                                        const Register& rd,
7843                                        int pattern,
7844                                        int multiplier);
7845 
7846   Instr EncodeSVEShiftLeftImmediate(int shift, int lane_size_in_bits);
7847 
7848   Instr EncodeSVEShiftRightImmediate(int shift, int lane_size_in_bits);
7849 
7850   void SVEBitwiseShiftImmediate(const ZRegister& zd,
7851                                 const ZRegister& zn,
7852                                 Instr encoded_imm,
7853                                 Instr op);
7854 
7855   void SVEBitwiseShiftImmediatePred(const ZRegister& zdn,
7856                                     const PRegisterM& pg,
7857                                     Instr encoded_imm,
7858                                     Instr op);
7859 
7860   Instr SVEMulIndexHelper(unsigned lane_size_in_bytes_log2,
7861                           const ZRegister& zm,
7862                           int index,
7863                           Instr op_h,
7864                           Instr op_s,
7865                           Instr op_d);
7866 
7867   Instr SVEMulLongIndexHelper(const ZRegister& zm, int index);
7868 
7869   Instr SVEMulComplexIndexHelper(const ZRegister& zm, int index);
7870 
7871   void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop,
7872                                                    const PRegister& pg,
7873                                                    const SVEMemOperand& addr,
7874                                                    int prefetch_size);
7875 
7876   void SVEContiguousPrefetchScalarPlusVectorHelper(PrefetchOperation prfop,
7877                                                    const PRegister& pg,
7878                                                    const SVEMemOperand& addr,
7879                                                    int prefetch_size);
7880 
7881   void SVEGatherPrefetchVectorPlusImmediateHelper(PrefetchOperation prfop,
7882                                                   const PRegister& pg,
7883                                                   const SVEMemOperand& addr,
7884                                                   int prefetch_size);
7885 
7886   void SVEGatherPrefetchScalarPlusImmediateHelper(PrefetchOperation prfop,
7887                                                   const PRegister& pg,
7888                                                   const SVEMemOperand& addr,
7889                                                   int prefetch_size);
7890 
7891   void SVEPrefetchHelper(PrefetchOperation prfop,
7892                          const PRegister& pg,
7893                          const SVEMemOperand& addr,
7894                          int prefetch_size);
7895 
SVEImmPrefetchOperation(PrefetchOperation prfop)7896   static Instr SVEImmPrefetchOperation(PrefetchOperation prfop) {
7897     // SVE only supports PLD and PST, not PLI.
7898     VIXL_ASSERT(((prfop >= PLDL1KEEP) && (prfop <= PLDL3STRM)) ||
7899                 ((prfop >= PSTL1KEEP) && (prfop <= PSTL3STRM)));
7900     // Check that we can simply map bits.
7901     VIXL_STATIC_ASSERT(PLDL1KEEP == 0b00000);
7902     VIXL_STATIC_ASSERT(PSTL1KEEP == 0b10000);
7903     // Remaining operations map directly.
7904     return ((prfop & 0b10000) >> 1) | (prfop & 0b00111);
7905   }
7906 
7907   // Functions for emulating operands not directly supported by the instruction
7908   // set.
7909   void EmitShift(const Register& rd,
7910                  const Register& rn,
7911                  Shift shift,
7912                  unsigned amount);
7913   void EmitExtendShift(const Register& rd,
7914                        const Register& rn,
7915                        Extend extend,
7916                        unsigned left_shift);
7917 
7918   void AddSub(const Register& rd,
7919               const Register& rn,
7920               const Operand& operand,
7921               FlagsUpdate S,
7922               AddSubOp op);
7923 
7924   void NEONTable(const VRegister& vd,
7925                  const VRegister& vn,
7926                  const VRegister& vm,
7927                  NEONTableOp op);
7928 
7929   // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
7930   // registers. Only simple loads are supported; sign- and zero-extension (such
7931   // as in LDPSW_x or LDRB_w) are not supported.
7932   static LoadStoreOp LoadOpFor(const CPURegister& rt);
7933   static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
7934                                        const CPURegister& rt2);
7935   static LoadStoreOp StoreOpFor(const CPURegister& rt);
7936   static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
7937                                         const CPURegister& rt2);
7938   static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
7939       const CPURegister& rt, const CPURegister& rt2);
7940   static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
7941       const CPURegister& rt, const CPURegister& rt2);
7942   static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
7943 
7944   // Convenience pass-through for CPU feature checks.
7945   bool CPUHas(CPUFeatures::Feature feature0,
7946               CPUFeatures::Feature feature1 = CPUFeatures::kNone,
7947               CPUFeatures::Feature feature2 = CPUFeatures::kNone,
7948               CPUFeatures::Feature feature3 = CPUFeatures::kNone) const {
7949     return cpu_features_.Has(feature0, feature1, feature2, feature3);
7950   }
7951 
7952   // Determine whether the target CPU has the specified registers, based on the
7953   // currently-enabled CPU features. Presence of a register does not imply
7954   // support for arbitrary operations on it. For example, CPUs with FP have H
7955   // registers, but most half-precision operations require the FPHalf feature.
7956   //
7957   // These are used to check CPU features in loads and stores that have the same
7958   // entry point for both integer and FP registers.
7959   bool CPUHas(const CPURegister& rt) const;
7960   bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const;
7961 
7962   bool CPUHas(SystemRegister sysreg) const;
7963 
7964  private:
7965   static uint32_t FP16ToImm8(Float16 imm);
7966   static uint32_t FP32ToImm8(float imm);
7967   static uint32_t FP64ToImm8(double imm);
7968 
7969   // Instruction helpers.
7970   void MoveWide(const Register& rd,
7971                 uint64_t imm,
7972                 int shift,
7973                 MoveWideImmediateOp mov_op);
7974   void DataProcShiftedRegister(const Register& rd,
7975                                const Register& rn,
7976                                const Operand& operand,
7977                                FlagsUpdate S,
7978                                Instr op);
7979   void DataProcExtendedRegister(const Register& rd,
7980                                 const Register& rn,
7981                                 const Operand& operand,
7982                                 FlagsUpdate S,
7983                                 Instr op);
7984   void LoadStorePairNonTemporal(const CPURegister& rt,
7985                                 const CPURegister& rt2,
7986                                 const MemOperand& addr,
7987                                 LoadStorePairNonTemporalOp op);
7988   void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
7989   void ConditionalSelect(const Register& rd,
7990                          const Register& rn,
7991                          const Register& rm,
7992                          Condition cond,
7993                          ConditionalSelectOp op);
7994   void DataProcessing1Source(const Register& rd,
7995                              const Register& rn,
7996                              DataProcessing1SourceOp op);
7997   void DataProcessing3Source(const Register& rd,
7998                              const Register& rn,
7999                              const Register& rm,
8000                              const Register& ra,
8001                              DataProcessing3SourceOp op);
8002   void FPDataProcessing1Source(const VRegister& fd,
8003                                const VRegister& fn,
8004                                FPDataProcessing1SourceOp op);
8005   void FPDataProcessing3Source(const VRegister& fd,
8006                                const VRegister& fn,
8007                                const VRegister& fm,
8008                                const VRegister& fa,
8009                                FPDataProcessing3SourceOp op);
8010   void NEONAcrossLanesL(const VRegister& vd,
8011                         const VRegister& vn,
8012                         NEONAcrossLanesOp op);
8013   void NEONAcrossLanes(const VRegister& vd,
8014                        const VRegister& vn,
8015                        NEONAcrossLanesOp op,
8016                        Instr op_half);
8017   void NEONModifiedImmShiftLsl(const VRegister& vd,
8018                                const int imm8,
8019                                const int left_shift,
8020                                NEONModifiedImmediateOp op);
8021   void NEONModifiedImmShiftMsl(const VRegister& vd,
8022                                const int imm8,
8023                                const int shift_amount,
8024                                NEONModifiedImmediateOp op);
8025   void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop);
8026   void NEON3Same(const VRegister& vd,
8027                  const VRegister& vn,
8028                  const VRegister& vm,
8029                  NEON3SameOp vop);
8030   void NEON3SameFP16(const VRegister& vd,
8031                      const VRegister& vn,
8032                      const VRegister& vm,
8033                      Instr op);
8034   void NEONFP3Same(const VRegister& vd,
8035                    const VRegister& vn,
8036                    const VRegister& vm,
8037                    Instr op);
8038   void NEON3DifferentL(const VRegister& vd,
8039                        const VRegister& vn,
8040                        const VRegister& vm,
8041                        NEON3DifferentOp vop);
8042   void NEON3DifferentW(const VRegister& vd,
8043                        const VRegister& vn,
8044                        const VRegister& vm,
8045                        NEON3DifferentOp vop);
8046   void NEON3DifferentHN(const VRegister& vd,
8047                         const VRegister& vn,
8048                         const VRegister& vm,
8049                         NEON3DifferentOp vop);
8050   void NEONFP2RegMisc(const VRegister& vd,
8051                       const VRegister& vn,
8052                       NEON2RegMiscOp vop,
8053                       double value = 0.0);
8054   void NEONFP2RegMiscFP16(const VRegister& vd,
8055                           const VRegister& vn,
8056                           NEON2RegMiscFP16Op vop,
8057                           double value = 0.0);
8058   void NEON2RegMisc(const VRegister& vd,
8059                     const VRegister& vn,
8060                     NEON2RegMiscOp vop,
8061                     int value = 0);
8062   void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
8063   void NEONFP2RegMiscFP16(const VRegister& vd, const VRegister& vn, Instr op);
8064   void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
8065   void NEONPerm(const VRegister& vd,
8066                 const VRegister& vn,
8067                 const VRegister& vm,
8068                 NEONPermOp op);
8069   void NEONFPByElement(const VRegister& vd,
8070                        const VRegister& vn,
8071                        const VRegister& vm,
8072                        int vm_index,
8073                        NEONByIndexedElementOp op,
8074                        NEONByIndexedElementOp op_half);
8075   void NEONByElement(const VRegister& vd,
8076                      const VRegister& vn,
8077                      const VRegister& vm,
8078                      int vm_index,
8079                      NEONByIndexedElementOp op);
8080   void NEONByElementL(const VRegister& vd,
8081                       const VRegister& vn,
8082                       const VRegister& vm,
8083                       int vm_index,
8084                       NEONByIndexedElementOp op);
8085   void NEONShiftImmediate(const VRegister& vd,
8086                           const VRegister& vn,
8087                           NEONShiftImmediateOp op,
8088                           int immh_immb);
8089   void NEONShiftLeftImmediate(const VRegister& vd,
8090                               const VRegister& vn,
8091                               int shift,
8092                               NEONShiftImmediateOp op);
8093   void NEONShiftRightImmediate(const VRegister& vd,
8094                                const VRegister& vn,
8095                                int shift,
8096                                NEONShiftImmediateOp op);
8097   void NEONShiftImmediateL(const VRegister& vd,
8098                            const VRegister& vn,
8099                            int shift,
8100                            NEONShiftImmediateOp op);
8101   void NEONShiftImmediateN(const VRegister& vd,
8102                            const VRegister& vn,
8103                            int shift,
8104                            NEONShiftImmediateOp op);
8105   void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
8106 
8107   // If *shift is -1, find values of *imm8 and *shift such that IsInt8(*imm8)
8108   // and *shift is either 0 or 8. Otherwise, leave the values unchanged.
8109   void ResolveSVEImm8Shift(int* imm8, int* shift);
8110 
8111   Instr LoadStoreStructAddrModeField(const MemOperand& addr);
8112 
8113   // Encode the specified MemOperand for the specified access size and scaling
8114   // preference.
8115   Instr LoadStoreMemOperand(const MemOperand& addr,
8116                             unsigned access_size_in_bytes_log2,
8117                             LoadStoreScalingOption option);
8118 
8119   // Link the current (not-yet-emitted) instruction to the specified label, then
8120   // return an offset to be encoded in the instruction. If the label is not yet
8121   // bound, an offset of 0 is returned.
8122   ptrdiff_t LinkAndGetByteOffsetTo(Label* label);
8123   ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label);
8124   ptrdiff_t LinkAndGetPageOffsetTo(Label* label);
8125 
8126   // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
8127   template <int element_shift>
8128   ptrdiff_t LinkAndGetOffsetTo(Label* label);
8129 
8130   // Literal load offset are in words (32-bit).
8131   ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
8132 
8133   // Emit the instruction in buffer_.
Emit(Instr instruction)8134   void Emit(Instr instruction) {
8135     VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
8136     VIXL_ASSERT(AllowAssembler());
8137     GetBuffer()->Emit32(instruction);
8138   }
8139 
8140   PositionIndependentCodeOption pic_;
8141 
8142   CPUFeatures cpu_features_;
8143 };
8144 
8145 
8146 template <typename T>
UpdateValue(T new_value,const Assembler * assembler)8147 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) {
8148   return UpdateValue(new_value,
8149                      assembler->GetBuffer().GetStartAddress<uint8_t*>());
8150 }
8151 
8152 
8153 template <typename T>
UpdateValue(T high64,T low64,const Assembler * assembler)8154 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) {
8155   return UpdateValue(high64,
8156                      low64,
8157                      assembler->GetBuffer().GetStartAddress<uint8_t*>());
8158 }
8159 
8160 
8161 }  // namespace aarch64
8162 
8163 // Required InvalSet template specialisations.
8164 // TODO: These template specialisations should not live in this file.  Move
8165 // Label out of the aarch64 namespace in order to share its implementation
8166 // later.
8167 #define INVAL_SET_TEMPLATE_PARAMETERS                                \
8168   ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t,         \
8169       aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \
8170       aarch64::Label::kReclaimFactor
8171 template <>
GetKey(const ptrdiff_t & element)8172 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::GetKey(
8173     const ptrdiff_t& element) {
8174   return element;
8175 }
8176 template <>
SetKey(ptrdiff_t * element,ptrdiff_t key)8177 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(ptrdiff_t* element,
8178                                                             ptrdiff_t key) {
8179   *element = key;
8180 }
8181 #undef INVAL_SET_TEMPLATE_PARAMETERS
8182 
8183 }  // namespace vixl
8184 
8185 #endif  // VIXL_AARCH64_ASSEMBLER_AARCH64_H_
8186