• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_
28 #define VIXL_AARCH64_ASSEMBLER_AARCH64_H_
29 
30 #include "../assembler-base-vixl.h"
31 #include "../code-generation-scopes-vixl.h"
32 #include "../globals-vixl.h"
33 #include "../invalset-vixl.h"
34 #include "../utils-vixl.h"
35 
36 #include "operands-aarch64.h"
37 
38 namespace vixl {
39 namespace aarch64 {
40 
41 class LabelTestHelper;  // Forward declaration.
42 
43 
44 class Label {
45  public:
Label()46   Label() : location_(kLocationUnbound) {}
~Label()47   ~Label() {
48     // All links to a label must have been resolved before it is destructed.
49     VIXL_ASSERT(!IsLinked());
50   }
51 
IsBound()52   bool IsBound() const { return location_ >= 0; }
IsLinked()53   bool IsLinked() const { return !links_.empty(); }
54 
GetLocation()55   ptrdiff_t GetLocation() const { return location_; }
56   VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) {
57     return GetLocation();
58   }
59 
60   static const int kNPreallocatedLinks = 4;
61   static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX;
62   static const size_t kReclaimFrom = 512;
63   static const size_t kReclaimFactor = 2;
64 
65   typedef InvalSet<ptrdiff_t,
66                    kNPreallocatedLinks,
67                    ptrdiff_t,
68                    kInvalidLinkKey,
69                    kReclaimFrom,
70                    kReclaimFactor> LinksSetBase;
71   typedef InvalSetIterator<LinksSetBase> LabelLinksIteratorBase;
72 
73  private:
74   class LinksSet : public LinksSetBase {
75    public:
LinksSet()76     LinksSet() : LinksSetBase() {}
77   };
78 
79   // Allows iterating over the links of a label. The behaviour is undefined if
80   // the list of links is modified in any way while iterating.
81   class LabelLinksIterator : public LabelLinksIteratorBase {
82    public:
LabelLinksIterator(Label * label)83     explicit LabelLinksIterator(Label* label)
84         : LabelLinksIteratorBase(&label->links_) {}
85 
86     // TODO: Remove these and use the STL-like interface instead.
87     using LabelLinksIteratorBase::Advance;
88     using LabelLinksIteratorBase::Current;
89   };
90 
Bind(ptrdiff_t location)91   void Bind(ptrdiff_t location) {
92     // Labels can only be bound once.
93     VIXL_ASSERT(!IsBound());
94     location_ = location;
95   }
96 
AddLink(ptrdiff_t instruction)97   void AddLink(ptrdiff_t instruction) {
98     // If a label is bound, the assembler already has the information it needs
99     // to write the instruction, so there is no need to add it to links_.
100     VIXL_ASSERT(!IsBound());
101     links_.insert(instruction);
102   }
103 
DeleteLink(ptrdiff_t instruction)104   void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); }
105 
ClearAllLinks()106   void ClearAllLinks() { links_.clear(); }
107 
108   // TODO: The comment below considers average case complexity for our
109   // usual use-cases. The elements of interest are:
110   // - Branches to a label are emitted in order: branch instructions to a label
111   // are generated at an offset in the code generation buffer greater than any
112   // other branch to that same label already generated. As an example, this can
113   // be broken when an instruction is patched to become a branch. Note that the
114   // code will still work, but the complexity considerations below may locally
115   // not apply any more.
116   // - Veneers are generated in order: for multiple branches of the same type
117   // branching to the same unbound label going out of range, veneers are
118   // generated in growing order of the branch instruction offset from the start
119   // of the buffer.
120   //
121   // When creating a veneer for a branch going out of range, the link for this
122   // branch needs to be removed from this `links_`. Since all branches are
123   // tracked in one underlying InvalSet, the complexity for this deletion is the
124   // same as for finding the element, ie. O(n), where n is the number of links
125   // in the set.
126   // This could be reduced to O(1) by using the same trick as used when tracking
127   // branch information for veneers: split the container to use one set per type
128   // of branch. With that setup, when a veneer is created and the link needs to
129   // be deleted, if the two points above hold, it must be the minimum element of
130   // the set for its type of branch, and that minimum element will be accessible
131   // in O(1).
132 
133   // The offsets of the instructions that have linked to this label.
134   LinksSet links_;
135   // The label location.
136   ptrdiff_t location_;
137 
138   static const ptrdiff_t kLocationUnbound = -1;
139 
140 // It is not safe to copy labels, so disable the copy constructor and operator
141 // by declaring them private (without an implementation).
142 #if __cplusplus >= 201103L
143   Label(const Label&) = delete;
144   void operator=(const Label&) = delete;
145 #else
146   Label(const Label&);
147   void operator=(const Label&);
148 #endif
149 
150   // The Assembler class is responsible for binding and linking labels, since
151   // the stored offsets need to be consistent with the Assembler's buffer.
152   friend class Assembler;
153   // The MacroAssembler and VeneerPool handle resolution of branches to distant
154   // targets.
155   friend class MacroAssembler;
156   friend class VeneerPool;
157 };
158 
159 
160 class Assembler;
161 class LiteralPool;
162 
163 // A literal is a 32-bit or 64-bit piece of data stored in the instruction
164 // stream and loaded through a pc relative load. The same literal can be
165 // referred to by multiple instructions but a literal can only reside at one
166 // place in memory. A literal can be used by a load before or after being
167 // placed in memory.
168 //
169 // Internally an offset of 0 is associated with a literal which has been
170 // neither used nor placed. Then two possibilities arise:
171 //  1) the label is placed, the offset (stored as offset + 1) is used to
172 //     resolve any subsequent load using the label.
173 //  2) the label is not placed and offset is the offset of the last load using
174 //     the literal (stored as -offset -1). If multiple loads refer to this
175 //     literal then the last load holds the offset of the preceding load and
176 //     all loads form a chain. Once the offset is placed all the loads in the
177 //     chain are resolved and future loads fall back to possibility 1.
178 class RawLiteral {
179  public:
180   enum DeletionPolicy {
181     kDeletedOnPlacementByPool,
182     kDeletedOnPoolDestruction,
183     kManuallyDeleted
184   };
185 
186   RawLiteral(size_t size,
187              LiteralPool* literal_pool,
188              DeletionPolicy deletion_policy = kManuallyDeleted);
189 
190   // The literal pool only sees and deletes `RawLiteral*` pointers, but they are
191   // actually pointing to `Literal<T>` objects.
~RawLiteral()192   virtual ~RawLiteral() {}
193 
GetSize()194   size_t GetSize() const {
195     VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes);
196     VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes);
197     VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) ||
198                 (size_ == kQRegSizeInBytes));
199     return size_;
200   }
201   VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); }
202 
GetRawValue128Low64()203   uint64_t GetRawValue128Low64() const {
204     VIXL_ASSERT(size_ == kQRegSizeInBytes);
205     return low64_;
206   }
207   VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) {
208     return GetRawValue128Low64();
209   }
210 
GetRawValue128High64()211   uint64_t GetRawValue128High64() const {
212     VIXL_ASSERT(size_ == kQRegSizeInBytes);
213     return high64_;
214   }
215   VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) {
216     return GetRawValue128High64();
217   }
218 
GetRawValue64()219   uint64_t GetRawValue64() const {
220     VIXL_ASSERT(size_ == kXRegSizeInBytes);
221     VIXL_ASSERT(high64_ == 0);
222     return low64_;
223   }
224   VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) {
225     return GetRawValue64();
226   }
227 
GetRawValue32()228   uint32_t GetRawValue32() const {
229     VIXL_ASSERT(size_ == kWRegSizeInBytes);
230     VIXL_ASSERT(high64_ == 0);
231     VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_));
232     return static_cast<uint32_t>(low64_);
233   }
234   VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) {
235     return GetRawValue32();
236   }
237 
IsUsed()238   bool IsUsed() const { return offset_ < 0; }
IsPlaced()239   bool IsPlaced() const { return offset_ > 0; }
240 
GetLiteralPool()241   LiteralPool* GetLiteralPool() const { return literal_pool_; }
242 
GetOffset()243   ptrdiff_t GetOffset() const {
244     VIXL_ASSERT(IsPlaced());
245     return offset_ - 1;
246   }
247   VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); }
248 
249  protected:
SetOffset(ptrdiff_t offset)250   void SetOffset(ptrdiff_t offset) {
251     VIXL_ASSERT(offset >= 0);
252     VIXL_ASSERT(IsWordAligned(offset));
253     VIXL_ASSERT(!IsPlaced());
254     offset_ = offset + 1;
255   }
set_offset(ptrdiff_t offset)256   VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) {
257     SetOffset(offset);
258   }
259 
GetLastUse()260   ptrdiff_t GetLastUse() const {
261     VIXL_ASSERT(IsUsed());
262     return -offset_ - 1;
263   }
264   VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); }
265 
SetLastUse(ptrdiff_t offset)266   void SetLastUse(ptrdiff_t offset) {
267     VIXL_ASSERT(offset >= 0);
268     VIXL_ASSERT(IsWordAligned(offset));
269     VIXL_ASSERT(!IsPlaced());
270     offset_ = -offset - 1;
271   }
set_last_use(ptrdiff_t offset)272   VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) {
273     SetLastUse(offset);
274   }
275 
276   size_t size_;
277   ptrdiff_t offset_;
278   uint64_t low64_;
279   uint64_t high64_;
280 
281  private:
282   LiteralPool* literal_pool_;
283   DeletionPolicy deletion_policy_;
284 
285   friend class Assembler;
286   friend class LiteralPool;
287 };
288 
289 
290 template <typename T>
291 class Literal : public RawLiteral {
292  public:
293   explicit Literal(T value,
294                    LiteralPool* literal_pool = NULL,
295                    RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(sizeof (value),literal_pool,ownership)296       : RawLiteral(sizeof(value), literal_pool, ownership) {
297     VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes);
298     UpdateValue(value);
299   }
300 
301   Literal(T high64,
302           T low64,
303           LiteralPool* literal_pool = NULL,
304           RawLiteral::DeletionPolicy ownership = kManuallyDeleted)
RawLiteral(kQRegSizeInBytes,literal_pool,ownership)305       : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) {
306     VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2));
307     UpdateValue(high64, low64);
308   }
309 
~Literal()310   virtual ~Literal() {}
311 
312   // Update the value of this literal, if necessary by rewriting the value in
313   // the pool.
314   // If the literal has already been placed in a literal pool, the address of
315   // the start of the code buffer must be provided, as the literal only knows it
316   // offset from there. This also allows patching the value after the code has
317   // been moved in memory.
318   void UpdateValue(T new_value, uint8_t* code_buffer = NULL) {
319     VIXL_ASSERT(sizeof(new_value) == size_);
320     memcpy(&low64_, &new_value, sizeof(new_value));
321     if (IsPlaced()) {
322       VIXL_ASSERT(code_buffer != NULL);
323       RewriteValueInCode(code_buffer);
324     }
325   }
326 
327   void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) {
328     VIXL_ASSERT(sizeof(low64) == size_ / 2);
329     memcpy(&low64_, &low64, sizeof(low64));
330     memcpy(&high64_, &high64, sizeof(high64));
331     if (IsPlaced()) {
332       VIXL_ASSERT(code_buffer != NULL);
333       RewriteValueInCode(code_buffer);
334     }
335   }
336 
337   void UpdateValue(T new_value, const Assembler* assembler);
338   void UpdateValue(T high64, T low64, const Assembler* assembler);
339 
340  private:
RewriteValueInCode(uint8_t * code_buffer)341   void RewriteValueInCode(uint8_t* code_buffer) {
342     VIXL_ASSERT(IsPlaced());
343     VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes);
344     switch (GetSize()) {
345       case kSRegSizeInBytes:
346         *reinterpret_cast<uint32_t*>(code_buffer + GetOffset()) =
347             GetRawValue32();
348         break;
349       case kDRegSizeInBytes:
350         *reinterpret_cast<uint64_t*>(code_buffer + GetOffset()) =
351             GetRawValue64();
352         break;
353       default:
354         VIXL_ASSERT(GetSize() == kQRegSizeInBytes);
355         uint64_t* base_address =
356             reinterpret_cast<uint64_t*>(code_buffer + GetOffset());
357         *base_address = GetRawValue128Low64();
358         *(base_address + 1) = GetRawValue128High64();
359     }
360   }
361 };
362 
363 
364 // Control whether or not position-independent code should be emitted.
365 enum PositionIndependentCodeOption {
366   // All code generated will be position-independent; all branches and
367   // references to labels generated with the Label class will use PC-relative
368   // addressing.
369   PositionIndependentCode,
370 
371   // Allow VIXL to generate code that refers to absolute addresses. With this
372   // option, it will not be possible to copy the code buffer and run it from a
373   // different address; code must be generated in its final location.
374   PositionDependentCode,
375 
376   // Allow VIXL to assume that the bottom 12 bits of the address will be
377   // constant, but that the top 48 bits may change. This allows `adrp` to
378   // function in systems which copy code between pages, but otherwise maintain
379   // 4KB page alignment.
380   PageOffsetDependentCode
381 };
382 
383 
384 // Control how scaled- and unscaled-offset loads and stores are generated.
385 enum LoadStoreScalingOption {
386   // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
387   // register-offset, pre-index or post-index instructions if necessary.
388   PreferScaledOffset,
389 
390   // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
391   // register-offset, pre-index or post-index instructions if necessary.
392   PreferUnscaledOffset,
393 
394   // Require scaled-immediate-offset instructions.
395   RequireScaledOffset,
396 
397   // Require unscaled-immediate-offset instructions.
398   RequireUnscaledOffset
399 };
400 
401 
402 // Assembler.
403 class Assembler : public vixl::internal::AssemblerBase {
404  public:
405   explicit Assembler(
406       PositionIndependentCodeOption pic = PositionIndependentCode)
pic_(pic)407       : pic_(pic) {}
408   explicit Assembler(
409       size_t capacity,
410       PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(capacity)411       : AssemblerBase(capacity), pic_(pic) {}
412   Assembler(byte* buffer,
413             size_t capacity,
414             PositionIndependentCodeOption pic = PositionIndependentCode)
AssemblerBase(buffer,capacity)415       : AssemblerBase(buffer, capacity), pic_(pic) {}
416 
417   // Upon destruction, the code will assert that one of the following is true:
418   //  * The Assembler object has not been used.
419   //  * Nothing has been emitted since the last Reset() call.
420   //  * Nothing has been emitted since the last FinalizeCode() call.
~Assembler()421   ~Assembler() {}
422 
423   // System functions.
424 
425   // Start generating code from the beginning of the buffer, discarding any code
426   // and data that has already been emitted into the buffer.
427   void Reset();
428 
429   // Label.
430   // Bind a label to the current PC.
431   void bind(Label* label);
432 
433   // Bind a label to a specified offset from the start of the buffer.
434   void BindToOffset(Label* label, ptrdiff_t offset);
435 
436   // Place a literal at the current PC.
437   void place(RawLiteral* literal);
438 
439   VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) {
440     return GetCursorOffset();
441   }
442 
443   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
444                   ptrdiff_t GetBufferEndOffset() const) {
445     return static_cast<ptrdiff_t>(GetBuffer().GetCapacity());
446   }
447   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
448                   ptrdiff_t BufferEndOffset() const) {
449     return GetBuffer().GetCapacity();
450   }
451 
452   // Return the address of a bound label.
453   template <typename T>
GetLabelAddress(const Label * label)454   T GetLabelAddress(const Label* label) const {
455     VIXL_ASSERT(label->IsBound());
456     VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
457     return GetBuffer().GetOffsetAddress<T>(label->GetLocation());
458   }
459 
GetInstructionAt(ptrdiff_t instruction_offset)460   Instruction* GetInstructionAt(ptrdiff_t instruction_offset) {
461     return GetBuffer()->GetOffsetAddress<Instruction*>(instruction_offset);
462   }
463   VIXL_DEPRECATED("GetInstructionAt",
464                   Instruction* InstructionAt(ptrdiff_t instruction_offset)) {
465     return GetInstructionAt(instruction_offset);
466   }
467 
GetInstructionOffset(Instruction * instruction)468   ptrdiff_t GetInstructionOffset(Instruction* instruction) {
469     VIXL_STATIC_ASSERT(sizeof(*instruction) == 1);
470     ptrdiff_t offset =
471         instruction - GetBuffer()->GetStartAddress<Instruction*>();
472     VIXL_ASSERT((0 <= offset) &&
473                 (offset < static_cast<ptrdiff_t>(GetBuffer()->GetCapacity())));
474     return offset;
475   }
476   VIXL_DEPRECATED("GetInstructionOffset",
477                   ptrdiff_t InstructionOffset(Instruction* instruction)) {
478     return GetInstructionOffset(instruction);
479   }
480 
481   // Instruction set functions.
482 
483   // Branch / Jump instructions.
484   // Branch to register.
485   void br(const Register& xn);
486 
487   // Branch with link to register.
488   void blr(const Register& xn);
489 
490   // Branch to register with return hint.
491   void ret(const Register& xn = lr);
492 
493   // Unconditional branch to label.
494   void b(Label* label);
495 
496   // Conditional branch to label.
497   void b(Label* label, Condition cond);
498 
499   // Unconditional branch to PC offset.
500   void b(int64_t imm26);
501 
502   // Conditional branch to PC offset.
503   void b(int64_t imm19, Condition cond);
504 
505   // Branch with link to label.
506   void bl(Label* label);
507 
508   // Branch with link to PC offset.
509   void bl(int64_t imm26);
510 
511   // Compare and branch to label if zero.
512   void cbz(const Register& rt, Label* label);
513 
514   // Compare and branch to PC offset if zero.
515   void cbz(const Register& rt, int64_t imm19);
516 
517   // Compare and branch to label if not zero.
518   void cbnz(const Register& rt, Label* label);
519 
520   // Compare and branch to PC offset if not zero.
521   void cbnz(const Register& rt, int64_t imm19);
522 
523   // Table lookup from one register.
524   void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
525 
526   // Table lookup from two registers.
527   void tbl(const VRegister& vd,
528            const VRegister& vn,
529            const VRegister& vn2,
530            const VRegister& vm);
531 
532   // Table lookup from three registers.
533   void tbl(const VRegister& vd,
534            const VRegister& vn,
535            const VRegister& vn2,
536            const VRegister& vn3,
537            const VRegister& vm);
538 
539   // Table lookup from four registers.
540   void tbl(const VRegister& vd,
541            const VRegister& vn,
542            const VRegister& vn2,
543            const VRegister& vn3,
544            const VRegister& vn4,
545            const VRegister& vm);
546 
547   // Table lookup extension from one register.
548   void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
549 
550   // Table lookup extension from two registers.
551   void tbx(const VRegister& vd,
552            const VRegister& vn,
553            const VRegister& vn2,
554            const VRegister& vm);
555 
556   // Table lookup extension from three registers.
557   void tbx(const VRegister& vd,
558            const VRegister& vn,
559            const VRegister& vn2,
560            const VRegister& vn3,
561            const VRegister& vm);
562 
563   // Table lookup extension from four registers.
564   void tbx(const VRegister& vd,
565            const VRegister& vn,
566            const VRegister& vn2,
567            const VRegister& vn3,
568            const VRegister& vn4,
569            const VRegister& vm);
570 
571   // Test bit and branch to label if zero.
572   void tbz(const Register& rt, unsigned bit_pos, Label* label);
573 
574   // Test bit and branch to PC offset if zero.
575   void tbz(const Register& rt, unsigned bit_pos, int64_t imm14);
576 
577   // Test bit and branch to label if not zero.
578   void tbnz(const Register& rt, unsigned bit_pos, Label* label);
579 
580   // Test bit and branch to PC offset if not zero.
581   void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14);
582 
583   // Address calculation instructions.
584   // Calculate a PC-relative address. Unlike for branches the offset in adr is
585   // unscaled (i.e. the result can be unaligned).
586 
587   // Calculate the address of a label.
588   void adr(const Register& xd, Label* label);
589 
590   // Calculate the address of a PC offset.
591   void adr(const Register& xd, int64_t imm21);
592 
593   // Calculate the page address of a label.
594   void adrp(const Register& xd, Label* label);
595 
596   // Calculate the page address of a PC offset.
597   void adrp(const Register& xd, int64_t imm21);
598 
599   // Data Processing instructions.
600   // Add.
601   void add(const Register& rd, const Register& rn, const Operand& operand);
602 
603   // Add and update status flags.
604   void adds(const Register& rd, const Register& rn, const Operand& operand);
605 
606   // Compare negative.
607   void cmn(const Register& rn, const Operand& operand);
608 
609   // Subtract.
610   void sub(const Register& rd, const Register& rn, const Operand& operand);
611 
612   // Subtract and update status flags.
613   void subs(const Register& rd, const Register& rn, const Operand& operand);
614 
615   // Compare.
616   void cmp(const Register& rn, const Operand& operand);
617 
618   // Negate.
619   void neg(const Register& rd, const Operand& operand);
620 
621   // Negate and update status flags.
622   void negs(const Register& rd, const Operand& operand);
623 
624   // Add with carry bit.
625   void adc(const Register& rd, const Register& rn, const Operand& operand);
626 
627   // Add with carry bit and update status flags.
628   void adcs(const Register& rd, const Register& rn, const Operand& operand);
629 
630   // Subtract with carry bit.
631   void sbc(const Register& rd, const Register& rn, const Operand& operand);
632 
633   // Subtract with carry bit and update status flags.
634   void sbcs(const Register& rd, const Register& rn, const Operand& operand);
635 
636   // Negate with carry bit.
637   void ngc(const Register& rd, const Operand& operand);
638 
639   // Negate with carry bit and update status flags.
640   void ngcs(const Register& rd, const Operand& operand);
641 
642   // Logical instructions.
643   // Bitwise and (A & B).
644   void and_(const Register& rd, const Register& rn, const Operand& operand);
645 
646   // Bitwise and (A & B) and update status flags.
647   void ands(const Register& rd, const Register& rn, const Operand& operand);
648 
649   // Bit test and set flags.
650   void tst(const Register& rn, const Operand& operand);
651 
652   // Bit clear (A & ~B).
653   void bic(const Register& rd, const Register& rn, const Operand& operand);
654 
655   // Bit clear (A & ~B) and update status flags.
656   void bics(const Register& rd, const Register& rn, const Operand& operand);
657 
658   // Bitwise or (A | B).
659   void orr(const Register& rd, const Register& rn, const Operand& operand);
660 
661   // Bitwise nor (A | ~B).
662   void orn(const Register& rd, const Register& rn, const Operand& operand);
663 
664   // Bitwise eor/xor (A ^ B).
665   void eor(const Register& rd, const Register& rn, const Operand& operand);
666 
667   // Bitwise enor/xnor (A ^ ~B).
668   void eon(const Register& rd, const Register& rn, const Operand& operand);
669 
670   // Logical shift left by variable.
671   void lslv(const Register& rd, const Register& rn, const Register& rm);
672 
673   // Logical shift right by variable.
674   void lsrv(const Register& rd, const Register& rn, const Register& rm);
675 
676   // Arithmetic shift right by variable.
677   void asrv(const Register& rd, const Register& rn, const Register& rm);
678 
679   // Rotate right by variable.
680   void rorv(const Register& rd, const Register& rn, const Register& rm);
681 
682   // Bitfield instructions.
683   // Bitfield move.
684   void bfm(const Register& rd,
685            const Register& rn,
686            unsigned immr,
687            unsigned imms);
688 
689   // Signed bitfield move.
690   void sbfm(const Register& rd,
691             const Register& rn,
692             unsigned immr,
693             unsigned imms);
694 
695   // Unsigned bitfield move.
696   void ubfm(const Register& rd,
697             const Register& rn,
698             unsigned immr,
699             unsigned imms);
700 
701   // Bfm aliases.
702   // Bitfield insert.
bfi(const Register & rd,const Register & rn,unsigned lsb,unsigned width)703   void bfi(const Register& rd,
704            const Register& rn,
705            unsigned lsb,
706            unsigned width) {
707     VIXL_ASSERT(width >= 1);
708     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
709     bfm(rd,
710         rn,
711         (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
712         width - 1);
713   }
714 
715   // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,unsigned lsb,unsigned width)716   void bfxil(const Register& rd,
717              const Register& rn,
718              unsigned lsb,
719              unsigned width) {
720     VIXL_ASSERT(width >= 1);
721     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
722     bfm(rd, rn, lsb, lsb + width - 1);
723   }
724 
725   // Sbfm aliases.
726   // Arithmetic shift right.
asr(const Register & rd,const Register & rn,unsigned shift)727   void asr(const Register& rd, const Register& rn, unsigned shift) {
728     VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
729     sbfm(rd, rn, shift, rd.GetSizeInBits() - 1);
730   }
731 
732   // Signed bitfield insert with zero at right.
sbfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)733   void sbfiz(const Register& rd,
734              const Register& rn,
735              unsigned lsb,
736              unsigned width) {
737     VIXL_ASSERT(width >= 1);
738     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
739     sbfm(rd,
740          rn,
741          (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
742          width - 1);
743   }
744 
745   // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)746   void sbfx(const Register& rd,
747             const Register& rn,
748             unsigned lsb,
749             unsigned width) {
750     VIXL_ASSERT(width >= 1);
751     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
752     sbfm(rd, rn, lsb, lsb + width - 1);
753   }
754 
755   // Signed extend byte.
sxtb(const Register & rd,const Register & rn)756   void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); }
757 
758   // Signed extend halfword.
sxth(const Register & rd,const Register & rn)759   void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); }
760 
761   // Signed extend word.
sxtw(const Register & rd,const Register & rn)762   void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); }
763 
764   // Ubfm aliases.
765   // Logical shift left.
lsl(const Register & rd,const Register & rn,unsigned shift)766   void lsl(const Register& rd, const Register& rn, unsigned shift) {
767     unsigned reg_size = rd.GetSizeInBits();
768     VIXL_ASSERT(shift < reg_size);
769     ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
770   }
771 
772   // Logical shift right.
lsr(const Register & rd,const Register & rn,unsigned shift)773   void lsr(const Register& rd, const Register& rn, unsigned shift) {
774     VIXL_ASSERT(shift < static_cast<unsigned>(rd.GetSizeInBits()));
775     ubfm(rd, rn, shift, rd.GetSizeInBits() - 1);
776   }
777 
778   // Unsigned bitfield insert with zero at right.
ubfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)779   void ubfiz(const Register& rd,
780              const Register& rn,
781              unsigned lsb,
782              unsigned width) {
783     VIXL_ASSERT(width >= 1);
784     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
785     ubfm(rd,
786          rn,
787          (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1),
788          width - 1);
789   }
790 
791   // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)792   void ubfx(const Register& rd,
793             const Register& rn,
794             unsigned lsb,
795             unsigned width) {
796     VIXL_ASSERT(width >= 1);
797     VIXL_ASSERT(lsb + width <= static_cast<unsigned>(rn.GetSizeInBits()));
798     ubfm(rd, rn, lsb, lsb + width - 1);
799   }
800 
801   // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)802   void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); }
803 
804   // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)805   void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); }
806 
807   // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)808   void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); }
809 
810   // Extract.
811   void extr(const Register& rd,
812             const Register& rn,
813             const Register& rm,
814             unsigned lsb);
815 
816   // Conditional select: rd = cond ? rn : rm.
817   void csel(const Register& rd,
818             const Register& rn,
819             const Register& rm,
820             Condition cond);
821 
822   // Conditional select increment: rd = cond ? rn : rm + 1.
823   void csinc(const Register& rd,
824              const Register& rn,
825              const Register& rm,
826              Condition cond);
827 
828   // Conditional select inversion: rd = cond ? rn : ~rm.
829   void csinv(const Register& rd,
830              const Register& rn,
831              const Register& rm,
832              Condition cond);
833 
834   // Conditional select negation: rd = cond ? rn : -rm.
835   void csneg(const Register& rd,
836              const Register& rn,
837              const Register& rm,
838              Condition cond);
839 
840   // Conditional set: rd = cond ? 1 : 0.
841   void cset(const Register& rd, Condition cond);
842 
843   // Conditional set mask: rd = cond ? -1 : 0.
844   void csetm(const Register& rd, Condition cond);
845 
846   // Conditional increment: rd = cond ? rn + 1 : rn.
847   void cinc(const Register& rd, const Register& rn, Condition cond);
848 
849   // Conditional invert: rd = cond ? ~rn : rn.
850   void cinv(const Register& rd, const Register& rn, Condition cond);
851 
852   // Conditional negate: rd = cond ? -rn : rn.
853   void cneg(const Register& rd, const Register& rn, Condition cond);
854 
855   // Rotate right.
ror(const Register & rd,const Register & rs,unsigned shift)856   void ror(const Register& rd, const Register& rs, unsigned shift) {
857     extr(rd, rs, rs, shift);
858   }
859 
860   // Conditional comparison.
861   // Conditional compare negative.
862   void ccmn(const Register& rn,
863             const Operand& operand,
864             StatusFlags nzcv,
865             Condition cond);
866 
867   // Conditional compare.
868   void ccmp(const Register& rn,
869             const Operand& operand,
870             StatusFlags nzcv,
871             Condition cond);
872 
873   // CRC-32 checksum from byte.
874   void crc32b(const Register& wd, const Register& wn, const Register& wm);
875 
876   // CRC-32 checksum from half-word.
877   void crc32h(const Register& wd, const Register& wn, const Register& wm);
878 
879   // CRC-32 checksum from word.
880   void crc32w(const Register& wd, const Register& wn, const Register& wm);
881 
882   // CRC-32 checksum from double word.
883   void crc32x(const Register& wd, const Register& wn, const Register& xm);
884 
885   // CRC-32 C checksum from byte.
886   void crc32cb(const Register& wd, const Register& wn, const Register& wm);
887 
888   // CRC-32 C checksum from half-word.
889   void crc32ch(const Register& wd, const Register& wn, const Register& wm);
890 
891   // CRC-32 C checksum from word.
892   void crc32cw(const Register& wd, const Register& wn, const Register& wm);
893 
894   // CRC-32C checksum from double word.
895   void crc32cx(const Register& wd, const Register& wn, const Register& xm);
896 
897   // Multiply.
898   void mul(const Register& rd, const Register& rn, const Register& rm);
899 
900   // Negated multiply.
901   void mneg(const Register& rd, const Register& rn, const Register& rm);
902 
903   // Signed long multiply: 32 x 32 -> 64-bit.
904   void smull(const Register& xd, const Register& wn, const Register& wm);
905 
906   // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
907   void smulh(const Register& xd, const Register& xn, const Register& xm);
908 
909   // Multiply and accumulate.
910   void madd(const Register& rd,
911             const Register& rn,
912             const Register& rm,
913             const Register& ra);
914 
915   // Multiply and subtract.
916   void msub(const Register& rd,
917             const Register& rn,
918             const Register& rm,
919             const Register& ra);
920 
921   // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
922   void smaddl(const Register& xd,
923               const Register& wn,
924               const Register& wm,
925               const Register& xa);
926 
927   // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
928   void umaddl(const Register& xd,
929               const Register& wn,
930               const Register& wm,
931               const Register& xa);
932 
933   // Unsigned long multiply: 32 x 32 -> 64-bit.
umull(const Register & xd,const Register & wn,const Register & wm)934   void umull(const Register& xd, const Register& wn, const Register& wm) {
935     umaddl(xd, wn, wm, xzr);
936   }
937 
938   // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
939   void umulh(const Register& xd, const Register& xn, const Register& xm);
940 
941   // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
942   void smsubl(const Register& xd,
943               const Register& wn,
944               const Register& wm,
945               const Register& xa);
946 
947   // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
948   void umsubl(const Register& xd,
949               const Register& wn,
950               const Register& wm,
951               const Register& xa);
952 
953   // Signed integer divide.
954   void sdiv(const Register& rd, const Register& rn, const Register& rm);
955 
956   // Unsigned integer divide.
957   void udiv(const Register& rd, const Register& rn, const Register& rm);
958 
959   // Bit reverse.
960   void rbit(const Register& rd, const Register& rn);
961 
962   // Reverse bytes in 16-bit half words.
963   void rev16(const Register& rd, const Register& rn);
964 
965   // Reverse bytes in 32-bit words.
966   void rev32(const Register& xd, const Register& xn);
967 
968   // Reverse bytes.
969   void rev(const Register& rd, const Register& rn);
970 
971   // Count leading zeroes.
972   void clz(const Register& rd, const Register& rn);
973 
974   // Count leading sign bits.
975   void cls(const Register& rd, const Register& rn);
976 
977   // Memory instructions.
978   // Load integer or FP register.
979   void ldr(const CPURegister& rt,
980            const MemOperand& src,
981            LoadStoreScalingOption option = PreferScaledOffset);
982 
983   // Store integer or FP register.
984   void str(const CPURegister& rt,
985            const MemOperand& dst,
986            LoadStoreScalingOption option = PreferScaledOffset);
987 
988   // Load word with sign extension.
989   void ldrsw(const Register& xt,
990              const MemOperand& src,
991              LoadStoreScalingOption option = PreferScaledOffset);
992 
993   // Load byte.
994   void ldrb(const Register& rt,
995             const MemOperand& src,
996             LoadStoreScalingOption option = PreferScaledOffset);
997 
998   // Store byte.
999   void strb(const Register& rt,
1000             const MemOperand& dst,
1001             LoadStoreScalingOption option = PreferScaledOffset);
1002 
1003   // Load byte with sign extension.
1004   void ldrsb(const Register& rt,
1005              const MemOperand& src,
1006              LoadStoreScalingOption option = PreferScaledOffset);
1007 
1008   // Load half-word.
1009   void ldrh(const Register& rt,
1010             const MemOperand& src,
1011             LoadStoreScalingOption option = PreferScaledOffset);
1012 
1013   // Store half-word.
1014   void strh(const Register& rt,
1015             const MemOperand& dst,
1016             LoadStoreScalingOption option = PreferScaledOffset);
1017 
1018   // Load half-word with sign extension.
1019   void ldrsh(const Register& rt,
1020              const MemOperand& src,
1021              LoadStoreScalingOption option = PreferScaledOffset);
1022 
1023   // Load integer or FP register (with unscaled offset).
1024   void ldur(const CPURegister& rt,
1025             const MemOperand& src,
1026             LoadStoreScalingOption option = PreferUnscaledOffset);
1027 
1028   // Store integer or FP register (with unscaled offset).
1029   void stur(const CPURegister& rt,
1030             const MemOperand& src,
1031             LoadStoreScalingOption option = PreferUnscaledOffset);
1032 
1033   // Load word with sign extension.
1034   void ldursw(const Register& xt,
1035               const MemOperand& src,
1036               LoadStoreScalingOption option = PreferUnscaledOffset);
1037 
1038   // Load byte (with unscaled offset).
1039   void ldurb(const Register& rt,
1040              const MemOperand& src,
1041              LoadStoreScalingOption option = PreferUnscaledOffset);
1042 
1043   // Store byte (with unscaled offset).
1044   void sturb(const Register& rt,
1045              const MemOperand& dst,
1046              LoadStoreScalingOption option = PreferUnscaledOffset);
1047 
1048   // Load byte with sign extension (and unscaled offset).
1049   void ldursb(const Register& rt,
1050               const MemOperand& src,
1051               LoadStoreScalingOption option = PreferUnscaledOffset);
1052 
1053   // Load half-word (with unscaled offset).
1054   void ldurh(const Register& rt,
1055              const MemOperand& src,
1056              LoadStoreScalingOption option = PreferUnscaledOffset);
1057 
1058   // Store half-word (with unscaled offset).
1059   void sturh(const Register& rt,
1060              const MemOperand& dst,
1061              LoadStoreScalingOption option = PreferUnscaledOffset);
1062 
1063   // Load half-word with sign extension (and unscaled offset).
1064   void ldursh(const Register& rt,
1065               const MemOperand& src,
1066               LoadStoreScalingOption option = PreferUnscaledOffset);
1067 
1068   // Load integer or FP register pair.
1069   void ldp(const CPURegister& rt,
1070            const CPURegister& rt2,
1071            const MemOperand& src);
1072 
1073   // Store integer or FP register pair.
1074   void stp(const CPURegister& rt,
1075            const CPURegister& rt2,
1076            const MemOperand& dst);
1077 
1078   // Load word pair with sign extension.
1079   void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src);
1080 
1081   // Load integer or FP register pair, non-temporal.
1082   void ldnp(const CPURegister& rt,
1083             const CPURegister& rt2,
1084             const MemOperand& src);
1085 
1086   // Store integer or FP register pair, non-temporal.
1087   void stnp(const CPURegister& rt,
1088             const CPURegister& rt2,
1089             const MemOperand& dst);
1090 
1091   // Load integer or FP register from literal pool.
1092   void ldr(const CPURegister& rt, RawLiteral* literal);
1093 
1094   // Load word with sign extension from literal pool.
1095   void ldrsw(const Register& xt, RawLiteral* literal);
1096 
1097   // Load integer or FP register from pc + imm19 << 2.
1098   void ldr(const CPURegister& rt, int64_t imm19);
1099 
1100   // Load word with sign extension from pc + imm19 << 2.
1101   void ldrsw(const Register& xt, int64_t imm19);
1102 
1103   // Store exclusive byte.
1104   void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1105 
1106   // Store exclusive half-word.
1107   void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1108 
1109   // Store exclusive register.
1110   void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1111 
1112   // Load exclusive byte.
1113   void ldxrb(const Register& rt, const MemOperand& src);
1114 
1115   // Load exclusive half-word.
1116   void ldxrh(const Register& rt, const MemOperand& src);
1117 
1118   // Load exclusive register.
1119   void ldxr(const Register& rt, const MemOperand& src);
1120 
1121   // Store exclusive register pair.
1122   void stxp(const Register& rs,
1123             const Register& rt,
1124             const Register& rt2,
1125             const MemOperand& dst);
1126 
1127   // Load exclusive register pair.
1128   void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1129 
1130   // Store-release exclusive byte.
1131   void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1132 
1133   // Store-release exclusive half-word.
1134   void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1135 
1136   // Store-release exclusive register.
1137   void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1138 
1139   // Load-acquire exclusive byte.
1140   void ldaxrb(const Register& rt, const MemOperand& src);
1141 
1142   // Load-acquire exclusive half-word.
1143   void ldaxrh(const Register& rt, const MemOperand& src);
1144 
1145   // Load-acquire exclusive register.
1146   void ldaxr(const Register& rt, const MemOperand& src);
1147 
1148   // Store-release exclusive register pair.
1149   void stlxp(const Register& rs,
1150              const Register& rt,
1151              const Register& rt2,
1152              const MemOperand& dst);
1153 
1154   // Load-acquire exclusive register pair.
1155   void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1156 
1157   // Store-release byte.
1158   void stlrb(const Register& rt, const MemOperand& dst);
1159 
1160   // Store-release half-word.
1161   void stlrh(const Register& rt, const MemOperand& dst);
1162 
1163   // Store-release register.
1164   void stlr(const Register& rt, const MemOperand& dst);
1165 
1166   // Load-acquire byte.
1167   void ldarb(const Register& rt, const MemOperand& src);
1168 
1169   // Load-acquire half-word.
1170   void ldarh(const Register& rt, const MemOperand& src);
1171 
1172   // Load-acquire register.
1173   void ldar(const Register& rt, const MemOperand& src);
1174 
1175   // Prefetch memory.
1176   void prfm(PrefetchOperation op,
1177             const MemOperand& addr,
1178             LoadStoreScalingOption option = PreferScaledOffset);
1179 
1180   // Prefetch memory (with unscaled offset).
1181   void prfum(PrefetchOperation op,
1182              const MemOperand& addr,
1183              LoadStoreScalingOption option = PreferUnscaledOffset);
1184 
1185   // Prefetch memory in the literal pool.
1186   void prfm(PrefetchOperation op, RawLiteral* literal);
1187 
1188   // Prefetch from pc + imm19 << 2.
1189   void prfm(PrefetchOperation op, int64_t imm19);
1190 
1191   // Move instructions. The default shift of -1 indicates that the move
1192   // instruction will calculate an appropriate 16-bit immediate and left shift
1193   // that is equal to the 64-bit immediate argument. If an explicit left shift
1194   // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
1195   //
1196   // For movk, an explicit shift can be used to indicate which half word should
1197   // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
1198   // half word with zero, whereas movk(x0, 0, 48) will overwrite the
1199   // most-significant.
1200 
1201   // Move immediate and keep.
1202   void movk(const Register& rd, uint64_t imm, int shift = -1) {
1203     MoveWide(rd, imm, shift, MOVK);
1204   }
1205 
1206   // Move inverted immediate.
1207   void movn(const Register& rd, uint64_t imm, int shift = -1) {
1208     MoveWide(rd, imm, shift, MOVN);
1209   }
1210 
1211   // Move immediate.
1212   void movz(const Register& rd, uint64_t imm, int shift = -1) {
1213     MoveWide(rd, imm, shift, MOVZ);
1214   }
1215 
1216   // Misc instructions.
1217   // Monitor debug-mode breakpoint.
1218   void brk(int code);
1219 
1220   // Halting debug-mode breakpoint.
1221   void hlt(int code);
1222 
1223   // Generate exception targeting EL1.
1224   void svc(int code);
1225 
1226   // Move register to register.
1227   void mov(const Register& rd, const Register& rn);
1228 
1229   // Move inverted operand to register.
1230   void mvn(const Register& rd, const Operand& operand);
1231 
1232   // System instructions.
1233   // Move to register from system register.
1234   void mrs(const Register& xt, SystemRegister sysreg);
1235 
1236   // Move from register to system register.
1237   void msr(SystemRegister sysreg, const Register& xt);
1238 
1239   // System instruction.
1240   void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr);
1241 
1242   // System instruction with pre-encoded op (op1:crn:crm:op2).
1243   void sys(int op, const Register& xt = xzr);
1244 
1245   // System data cache operation.
1246   void dc(DataCacheOp op, const Register& rt);
1247 
1248   // System instruction cache operation.
1249   void ic(InstructionCacheOp op, const Register& rt);
1250 
1251   // System hint.
1252   void hint(SystemHint code);
1253 
1254   // Clear exclusive monitor.
1255   void clrex(int imm4 = 0xf);
1256 
1257   // Data memory barrier.
1258   void dmb(BarrierDomain domain, BarrierType type);
1259 
1260   // Data synchronization barrier.
1261   void dsb(BarrierDomain domain, BarrierType type);
1262 
1263   // Instruction synchronization barrier.
1264   void isb();
1265 
1266   // Alias for system instructions.
1267   // No-op.
nop()1268   void nop() { hint(NOP); }
1269 
1270   // FP and NEON instructions.
1271   // Move double precision immediate to FP register.
1272   void fmov(const VRegister& vd, double imm);
1273 
1274   // Move single precision immediate to FP register.
1275   void fmov(const VRegister& vd, float imm);
1276 
1277   // Move FP register to register.
1278   void fmov(const Register& rd, const VRegister& fn);
1279 
1280   // Move register to FP register.
1281   void fmov(const VRegister& vd, const Register& rn);
1282 
1283   // Move FP register to FP register.
1284   void fmov(const VRegister& vd, const VRegister& fn);
1285 
1286   // Move 64-bit register to top half of 128-bit FP register.
1287   void fmov(const VRegister& vd, int index, const Register& rn);
1288 
1289   // Move top half of 128-bit FP register to 64-bit register.
1290   void fmov(const Register& rd, const VRegister& vn, int index);
1291 
1292   // FP add.
1293   void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1294 
1295   // FP subtract.
1296   void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1297 
1298   // FP multiply.
1299   void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1300 
1301   // FP fused multiply-add.
1302   void fmadd(const VRegister& vd,
1303              const VRegister& vn,
1304              const VRegister& vm,
1305              const VRegister& va);
1306 
1307   // FP fused multiply-subtract.
1308   void fmsub(const VRegister& vd,
1309              const VRegister& vn,
1310              const VRegister& vm,
1311              const VRegister& va);
1312 
1313   // FP fused multiply-add and negate.
1314   void fnmadd(const VRegister& vd,
1315               const VRegister& vn,
1316               const VRegister& vm,
1317               const VRegister& va);
1318 
1319   // FP fused multiply-subtract and negate.
1320   void fnmsub(const VRegister& vd,
1321               const VRegister& vn,
1322               const VRegister& vm,
1323               const VRegister& va);
1324 
1325   // FP multiply-negate scalar.
1326   void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1327 
1328   // FP reciprocal exponent scalar.
1329   void frecpx(const VRegister& vd, const VRegister& vn);
1330 
1331   // FP divide.
1332   void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
1333 
1334   // FP maximum.
1335   void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
1336 
1337   // FP minimum.
1338   void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
1339 
1340   // FP maximum number.
1341   void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
1342 
1343   // FP minimum number.
1344   void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
1345 
1346   // FP absolute.
1347   void fabs(const VRegister& vd, const VRegister& vn);
1348 
1349   // FP negate.
1350   void fneg(const VRegister& vd, const VRegister& vn);
1351 
1352   // FP square root.
1353   void fsqrt(const VRegister& vd, const VRegister& vn);
1354 
1355   // FP round to integer, nearest with ties to away.
1356   void frinta(const VRegister& vd, const VRegister& vn);
1357 
1358   // FP round to integer, implicit rounding.
1359   void frinti(const VRegister& vd, const VRegister& vn);
1360 
1361   // FP round to integer, toward minus infinity.
1362   void frintm(const VRegister& vd, const VRegister& vn);
1363 
1364   // FP round to integer, nearest with ties to even.
1365   void frintn(const VRegister& vd, const VRegister& vn);
1366 
1367   // FP round to integer, toward plus infinity.
1368   void frintp(const VRegister& vd, const VRegister& vn);
1369 
1370   // FP round to integer, exact, implicit rounding.
1371   void frintx(const VRegister& vd, const VRegister& vn);
1372 
1373   // FP round to integer, towards zero.
1374   void frintz(const VRegister& vd, const VRegister& vn);
1375 
1376   void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap);
1377 
1378   void FPCompareMacro(const VRegister& vn,
1379                       const VRegister& vm,
1380                       FPTrapFlags trap);
1381 
1382   // FP compare registers.
1383   void fcmp(const VRegister& vn, const VRegister& vm);
1384 
1385   // FP compare immediate.
1386   void fcmp(const VRegister& vn, double value);
1387 
1388   void FPCCompareMacro(const VRegister& vn,
1389                        const VRegister& vm,
1390                        StatusFlags nzcv,
1391                        Condition cond,
1392                        FPTrapFlags trap);
1393 
1394   // FP conditional compare.
1395   void fccmp(const VRegister& vn,
1396              const VRegister& vm,
1397              StatusFlags nzcv,
1398              Condition cond);
1399 
1400   // FP signaling compare registers.
1401   void fcmpe(const VRegister& vn, const VRegister& vm);
1402 
1403   // FP signaling compare immediate.
1404   void fcmpe(const VRegister& vn, double value);
1405 
1406   // FP conditional signaling compare.
1407   void fccmpe(const VRegister& vn,
1408               const VRegister& vm,
1409               StatusFlags nzcv,
1410               Condition cond);
1411 
1412   // FP conditional select.
1413   void fcsel(const VRegister& vd,
1414              const VRegister& vn,
1415              const VRegister& vm,
1416              Condition cond);
1417 
1418   // Common FP Convert functions.
1419   void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
1420   void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
1421 
1422   // FP convert between precisions.
1423   void fcvt(const VRegister& vd, const VRegister& vn);
1424 
1425   // FP convert to higher precision.
1426   void fcvtl(const VRegister& vd, const VRegister& vn);
1427 
1428   // FP convert to higher precision (second part).
1429   void fcvtl2(const VRegister& vd, const VRegister& vn);
1430 
1431   // FP convert to lower precision.
1432   void fcvtn(const VRegister& vd, const VRegister& vn);
1433 
1434   // FP convert to lower prevision (second part).
1435   void fcvtn2(const VRegister& vd, const VRegister& vn);
1436 
1437   // FP convert to lower precision, rounding to odd.
1438   void fcvtxn(const VRegister& vd, const VRegister& vn);
1439 
1440   // FP convert to lower precision, rounding to odd (second part).
1441   void fcvtxn2(const VRegister& vd, const VRegister& vn);
1442 
1443   // FP convert to signed integer, nearest with ties to away.
1444   void fcvtas(const Register& rd, const VRegister& vn);
1445 
1446   // FP convert to unsigned integer, nearest with ties to away.
1447   void fcvtau(const Register& rd, const VRegister& vn);
1448 
1449   // FP convert to signed integer, nearest with ties to away.
1450   void fcvtas(const VRegister& vd, const VRegister& vn);
1451 
1452   // FP convert to unsigned integer, nearest with ties to away.
1453   void fcvtau(const VRegister& vd, const VRegister& vn);
1454 
1455   // FP convert to signed integer, round towards -infinity.
1456   void fcvtms(const Register& rd, const VRegister& vn);
1457 
1458   // FP convert to unsigned integer, round towards -infinity.
1459   void fcvtmu(const Register& rd, const VRegister& vn);
1460 
1461   // FP convert to signed integer, round towards -infinity.
1462   void fcvtms(const VRegister& vd, const VRegister& vn);
1463 
1464   // FP convert to unsigned integer, round towards -infinity.
1465   void fcvtmu(const VRegister& vd, const VRegister& vn);
1466 
1467   // FP convert to signed integer, nearest with ties to even.
1468   void fcvtns(const Register& rd, const VRegister& vn);
1469 
1470   // FP convert to unsigned integer, nearest with ties to even.
1471   void fcvtnu(const Register& rd, const VRegister& vn);
1472 
1473   // FP convert to signed integer, nearest with ties to even.
1474   void fcvtns(const VRegister& rd, const VRegister& vn);
1475 
1476   // FP convert to unsigned integer, nearest with ties to even.
1477   void fcvtnu(const VRegister& rd, const VRegister& vn);
1478 
1479   // FP convert to signed integer or fixed-point, round towards zero.
1480   void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
1481 
1482   // FP convert to unsigned integer or fixed-point, round towards zero.
1483   void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
1484 
1485   // FP convert to signed integer or fixed-point, round towards zero.
1486   void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
1487 
1488   // FP convert to unsigned integer or fixed-point, round towards zero.
1489   void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
1490 
1491   // FP convert to signed integer, round towards +infinity.
1492   void fcvtps(const Register& rd, const VRegister& vn);
1493 
1494   // FP convert to unsigned integer, round towards +infinity.
1495   void fcvtpu(const Register& rd, const VRegister& vn);
1496 
1497   // FP convert to signed integer, round towards +infinity.
1498   void fcvtps(const VRegister& vd, const VRegister& vn);
1499 
1500   // FP convert to unsigned integer, round towards +infinity.
1501   void fcvtpu(const VRegister& vd, const VRegister& vn);
1502 
1503   // Convert signed integer or fixed point to FP.
1504   void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
1505 
1506   // Convert unsigned integer or fixed point to FP.
1507   void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
1508 
1509   // Convert signed integer or fixed-point to FP.
1510   void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
1511 
1512   // Convert unsigned integer or fixed-point to FP.
1513   void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
1514 
1515   // Unsigned absolute difference.
1516   void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1517 
1518   // Signed absolute difference.
1519   void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1520 
1521   // Unsigned absolute difference and accumulate.
1522   void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1523 
1524   // Signed absolute difference and accumulate.
1525   void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1526 
1527   // Add.
1528   void add(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1529 
1530   // Subtract.
1531   void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1532 
1533   // Unsigned halving add.
1534   void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1535 
1536   // Signed halving add.
1537   void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1538 
1539   // Unsigned rounding halving add.
1540   void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1541 
1542   // Signed rounding halving add.
1543   void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1544 
1545   // Unsigned halving sub.
1546   void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1547 
1548   // Signed halving sub.
1549   void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1550 
1551   // Unsigned saturating add.
1552   void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1553 
1554   // Signed saturating add.
1555   void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1556 
1557   // Unsigned saturating subtract.
1558   void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1559 
1560   // Signed saturating subtract.
1561   void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1562 
1563   // Add pairwise.
1564   void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1565 
1566   // Add pair of elements scalar.
1567   void addp(const VRegister& vd, const VRegister& vn);
1568 
1569   // Multiply-add to accumulator.
1570   void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1571 
1572   // Multiply-subtract to accumulator.
1573   void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1574 
1575   // Multiply.
1576   void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1577 
1578   // Multiply by scalar element.
1579   void mul(const VRegister& vd,
1580            const VRegister& vn,
1581            const VRegister& vm,
1582            int vm_index);
1583 
1584   // Multiply-add by scalar element.
1585   void mla(const VRegister& vd,
1586            const VRegister& vn,
1587            const VRegister& vm,
1588            int vm_index);
1589 
1590   // Multiply-subtract by scalar element.
1591   void mls(const VRegister& vd,
1592            const VRegister& vn,
1593            const VRegister& vm,
1594            int vm_index);
1595 
1596   // Signed long multiply-add by scalar element.
1597   void smlal(const VRegister& vd,
1598              const VRegister& vn,
1599              const VRegister& vm,
1600              int vm_index);
1601 
1602   // Signed long multiply-add by scalar element (second part).
1603   void smlal2(const VRegister& vd,
1604               const VRegister& vn,
1605               const VRegister& vm,
1606               int vm_index);
1607 
1608   // Unsigned long multiply-add by scalar element.
1609   void umlal(const VRegister& vd,
1610              const VRegister& vn,
1611              const VRegister& vm,
1612              int vm_index);
1613 
1614   // Unsigned long multiply-add by scalar element (second part).
1615   void umlal2(const VRegister& vd,
1616               const VRegister& vn,
1617               const VRegister& vm,
1618               int vm_index);
1619 
1620   // Signed long multiply-sub by scalar element.
1621   void smlsl(const VRegister& vd,
1622              const VRegister& vn,
1623              const VRegister& vm,
1624              int vm_index);
1625 
1626   // Signed long multiply-sub by scalar element (second part).
1627   void smlsl2(const VRegister& vd,
1628               const VRegister& vn,
1629               const VRegister& vm,
1630               int vm_index);
1631 
1632   // Unsigned long multiply-sub by scalar element.
1633   void umlsl(const VRegister& vd,
1634              const VRegister& vn,
1635              const VRegister& vm,
1636              int vm_index);
1637 
1638   // Unsigned long multiply-sub by scalar element (second part).
1639   void umlsl2(const VRegister& vd,
1640               const VRegister& vn,
1641               const VRegister& vm,
1642               int vm_index);
1643 
1644   // Signed long multiply by scalar element.
1645   void smull(const VRegister& vd,
1646              const VRegister& vn,
1647              const VRegister& vm,
1648              int vm_index);
1649 
1650   // Signed long multiply by scalar element (second part).
1651   void smull2(const VRegister& vd,
1652               const VRegister& vn,
1653               const VRegister& vm,
1654               int vm_index);
1655 
1656   // Unsigned long multiply by scalar element.
1657   void umull(const VRegister& vd,
1658              const VRegister& vn,
1659              const VRegister& vm,
1660              int vm_index);
1661 
1662   // Unsigned long multiply by scalar element (second part).
1663   void umull2(const VRegister& vd,
1664               const VRegister& vn,
1665               const VRegister& vm,
1666               int vm_index);
1667 
1668   // Signed saturating double long multiply by element.
1669   void sqdmull(const VRegister& vd,
1670                const VRegister& vn,
1671                const VRegister& vm,
1672                int vm_index);
1673 
1674   // Signed saturating double long multiply by element (second part).
1675   void sqdmull2(const VRegister& vd,
1676                 const VRegister& vn,
1677                 const VRegister& vm,
1678                 int vm_index);
1679 
1680   // Signed saturating doubling long multiply-add by element.
1681   void sqdmlal(const VRegister& vd,
1682                const VRegister& vn,
1683                const VRegister& vm,
1684                int vm_index);
1685 
1686   // Signed saturating doubling long multiply-add by element (second part).
1687   void sqdmlal2(const VRegister& vd,
1688                 const VRegister& vn,
1689                 const VRegister& vm,
1690                 int vm_index);
1691 
1692   // Signed saturating doubling long multiply-sub by element.
1693   void sqdmlsl(const VRegister& vd,
1694                const VRegister& vn,
1695                const VRegister& vm,
1696                int vm_index);
1697 
1698   // Signed saturating doubling long multiply-sub by element (second part).
1699   void sqdmlsl2(const VRegister& vd,
1700                 const VRegister& vn,
1701                 const VRegister& vm,
1702                 int vm_index);
1703 
1704   // Compare equal.
1705   void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1706 
1707   // Compare signed greater than or equal.
1708   void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1709 
1710   // Compare signed greater than.
1711   void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1712 
1713   // Compare unsigned higher.
1714   void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1715 
1716   // Compare unsigned higher or same.
1717   void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1718 
1719   // Compare bitwise test bits nonzero.
1720   void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1721 
1722   // Compare bitwise to zero.
1723   void cmeq(const VRegister& vd, const VRegister& vn, int value);
1724 
1725   // Compare signed greater than or equal to zero.
1726   void cmge(const VRegister& vd, const VRegister& vn, int value);
1727 
1728   // Compare signed greater than zero.
1729   void cmgt(const VRegister& vd, const VRegister& vn, int value);
1730 
1731   // Compare signed less than or equal to zero.
1732   void cmle(const VRegister& vd, const VRegister& vn, int value);
1733 
1734   // Compare signed less than zero.
1735   void cmlt(const VRegister& vd, const VRegister& vn, int value);
1736 
1737   // Signed shift left by register.
1738   void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1739 
1740   // Unsigned shift left by register.
1741   void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1742 
1743   // Signed saturating shift left by register.
1744   void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1745 
1746   // Unsigned saturating shift left by register.
1747   void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1748 
1749   // Signed rounding shift left by register.
1750   void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1751 
1752   // Unsigned rounding shift left by register.
1753   void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1754 
1755   // Signed saturating rounding shift left by register.
1756   void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1757 
1758   // Unsigned saturating rounding shift left by register.
1759   void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1760 
1761   // Bitwise and.
1762   void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1763 
1764   // Bitwise or.
1765   void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1766 
1767   // Bitwise or immediate.
1768   void orr(const VRegister& vd, const int imm8, const int left_shift = 0);
1769 
1770   // Move register to register.
1771   void mov(const VRegister& vd, const VRegister& vn);
1772 
1773   // Bitwise orn.
1774   void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1775 
1776   // Bitwise eor.
1777   void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1778 
1779   // Bit clear immediate.
1780   void bic(const VRegister& vd, const int imm8, const int left_shift = 0);
1781 
1782   // Bit clear.
1783   void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1784 
1785   // Bitwise insert if false.
1786   void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1787 
1788   // Bitwise insert if true.
1789   void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1790 
1791   // Bitwise select.
1792   void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1793 
1794   // Polynomial multiply.
1795   void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1796 
1797   // Vector move immediate.
1798   void movi(const VRegister& vd,
1799             const uint64_t imm,
1800             Shift shift = LSL,
1801             const int shift_amount = 0);
1802 
1803   // Bitwise not.
1804   void mvn(const VRegister& vd, const VRegister& vn);
1805 
1806   // Vector move inverted immediate.
1807   void mvni(const VRegister& vd,
1808             const int imm8,
1809             Shift shift = LSL,
1810             const int shift_amount = 0);
1811 
1812   // Signed saturating accumulate of unsigned value.
1813   void suqadd(const VRegister& vd, const VRegister& vn);
1814 
1815   // Unsigned saturating accumulate of signed value.
1816   void usqadd(const VRegister& vd, const VRegister& vn);
1817 
1818   // Absolute value.
1819   void abs(const VRegister& vd, const VRegister& vn);
1820 
1821   // Signed saturating absolute value.
1822   void sqabs(const VRegister& vd, const VRegister& vn);
1823 
1824   // Negate.
1825   void neg(const VRegister& vd, const VRegister& vn);
1826 
1827   // Signed saturating negate.
1828   void sqneg(const VRegister& vd, const VRegister& vn);
1829 
1830   // Bitwise not.
1831   void not_(const VRegister& vd, const VRegister& vn);
1832 
1833   // Extract narrow.
1834   void xtn(const VRegister& vd, const VRegister& vn);
1835 
1836   // Extract narrow (second part).
1837   void xtn2(const VRegister& vd, const VRegister& vn);
1838 
1839   // Signed saturating extract narrow.
1840   void sqxtn(const VRegister& vd, const VRegister& vn);
1841 
1842   // Signed saturating extract narrow (second part).
1843   void sqxtn2(const VRegister& vd, const VRegister& vn);
1844 
1845   // Unsigned saturating extract narrow.
1846   void uqxtn(const VRegister& vd, const VRegister& vn);
1847 
1848   // Unsigned saturating extract narrow (second part).
1849   void uqxtn2(const VRegister& vd, const VRegister& vn);
1850 
1851   // Signed saturating extract unsigned narrow.
1852   void sqxtun(const VRegister& vd, const VRegister& vn);
1853 
1854   // Signed saturating extract unsigned narrow (second part).
1855   void sqxtun2(const VRegister& vd, const VRegister& vn);
1856 
1857   // Extract vector from pair of vectors.
1858   void ext(const VRegister& vd,
1859            const VRegister& vn,
1860            const VRegister& vm,
1861            int index);
1862 
1863   // Duplicate vector element to vector or scalar.
1864   void dup(const VRegister& vd, const VRegister& vn, int vn_index);
1865 
1866   // Move vector element to scalar.
1867   void mov(const VRegister& vd, const VRegister& vn, int vn_index);
1868 
1869   // Duplicate general-purpose register to vector.
1870   void dup(const VRegister& vd, const Register& rn);
1871 
1872   // Insert vector element from another vector element.
1873   void ins(const VRegister& vd,
1874            int vd_index,
1875            const VRegister& vn,
1876            int vn_index);
1877 
1878   // Move vector element to another vector element.
1879   void mov(const VRegister& vd,
1880            int vd_index,
1881            const VRegister& vn,
1882            int vn_index);
1883 
1884   // Insert vector element from general-purpose register.
1885   void ins(const VRegister& vd, int vd_index, const Register& rn);
1886 
1887   // Move general-purpose register to a vector element.
1888   void mov(const VRegister& vd, int vd_index, const Register& rn);
1889 
1890   // Unsigned move vector element to general-purpose register.
1891   void umov(const Register& rd, const VRegister& vn, int vn_index);
1892 
1893   // Move vector element to general-purpose register.
1894   void mov(const Register& rd, const VRegister& vn, int vn_index);
1895 
1896   // Signed move vector element to general-purpose register.
1897   void smov(const Register& rd, const VRegister& vn, int vn_index);
1898 
1899   // One-element structure load to one register.
1900   void ld1(const VRegister& vt, const MemOperand& src);
1901 
1902   // One-element structure load to two registers.
1903   void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
1904 
1905   // One-element structure load to three registers.
1906   void ld1(const VRegister& vt,
1907            const VRegister& vt2,
1908            const VRegister& vt3,
1909            const MemOperand& src);
1910 
1911   // One-element structure load to four registers.
1912   void ld1(const VRegister& vt,
1913            const VRegister& vt2,
1914            const VRegister& vt3,
1915            const VRegister& vt4,
1916            const MemOperand& src);
1917 
1918   // One-element single structure load to one lane.
1919   void ld1(const VRegister& vt, int lane, const MemOperand& src);
1920 
1921   // One-element single structure load to all lanes.
1922   void ld1r(const VRegister& vt, const MemOperand& src);
1923 
1924   // Two-element structure load.
1925   void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
1926 
1927   // Two-element single structure load to one lane.
1928   void ld2(const VRegister& vt,
1929            const VRegister& vt2,
1930            int lane,
1931            const MemOperand& src);
1932 
1933   // Two-element single structure load to all lanes.
1934   void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
1935 
1936   // Three-element structure load.
1937   void ld3(const VRegister& vt,
1938            const VRegister& vt2,
1939            const VRegister& vt3,
1940            const MemOperand& src);
1941 
1942   // Three-element single structure load to one lane.
1943   void ld3(const VRegister& vt,
1944            const VRegister& vt2,
1945            const VRegister& vt3,
1946            int lane,
1947            const MemOperand& src);
1948 
1949   // Three-element single structure load to all lanes.
1950   void ld3r(const VRegister& vt,
1951             const VRegister& vt2,
1952             const VRegister& vt3,
1953             const MemOperand& src);
1954 
1955   // Four-element structure load.
1956   void ld4(const VRegister& vt,
1957            const VRegister& vt2,
1958            const VRegister& vt3,
1959            const VRegister& vt4,
1960            const MemOperand& src);
1961 
1962   // Four-element single structure load to one lane.
1963   void ld4(const VRegister& vt,
1964            const VRegister& vt2,
1965            const VRegister& vt3,
1966            const VRegister& vt4,
1967            int lane,
1968            const MemOperand& src);
1969 
1970   // Four-element single structure load to all lanes.
1971   void ld4r(const VRegister& vt,
1972             const VRegister& vt2,
1973             const VRegister& vt3,
1974             const VRegister& vt4,
1975             const MemOperand& src);
1976 
1977   // Count leading sign bits.
1978   void cls(const VRegister& vd, const VRegister& vn);
1979 
1980   // Count leading zero bits (vector).
1981   void clz(const VRegister& vd, const VRegister& vn);
1982 
1983   // Population count per byte.
1984   void cnt(const VRegister& vd, const VRegister& vn);
1985 
1986   // Reverse bit order.
1987   void rbit(const VRegister& vd, const VRegister& vn);
1988 
1989   // Reverse elements in 16-bit halfwords.
1990   void rev16(const VRegister& vd, const VRegister& vn);
1991 
1992   // Reverse elements in 32-bit words.
1993   void rev32(const VRegister& vd, const VRegister& vn);
1994 
1995   // Reverse elements in 64-bit doublewords.
1996   void rev64(const VRegister& vd, const VRegister& vn);
1997 
1998   // Unsigned reciprocal square root estimate.
1999   void ursqrte(const VRegister& vd, const VRegister& vn);
2000 
2001   // Unsigned reciprocal estimate.
2002   void urecpe(const VRegister& vd, const VRegister& vn);
2003 
2004   // Signed pairwise long add.
2005   void saddlp(const VRegister& vd, const VRegister& vn);
2006 
2007   // Unsigned pairwise long add.
2008   void uaddlp(const VRegister& vd, const VRegister& vn);
2009 
2010   // Signed pairwise long add and accumulate.
2011   void sadalp(const VRegister& vd, const VRegister& vn);
2012 
2013   // Unsigned pairwise long add and accumulate.
2014   void uadalp(const VRegister& vd, const VRegister& vn);
2015 
2016   // Shift left by immediate.
2017   void shl(const VRegister& vd, const VRegister& vn, int shift);
2018 
2019   // Signed saturating shift left by immediate.
2020   void sqshl(const VRegister& vd, const VRegister& vn, int shift);
2021 
2022   // Signed saturating shift left unsigned by immediate.
2023   void sqshlu(const VRegister& vd, const VRegister& vn, int shift);
2024 
2025   // Unsigned saturating shift left by immediate.
2026   void uqshl(const VRegister& vd, const VRegister& vn, int shift);
2027 
2028   // Signed shift left long by immediate.
2029   void sshll(const VRegister& vd, const VRegister& vn, int shift);
2030 
2031   // Signed shift left long by immediate (second part).
2032   void sshll2(const VRegister& vd, const VRegister& vn, int shift);
2033 
2034   // Signed extend long.
2035   void sxtl(const VRegister& vd, const VRegister& vn);
2036 
2037   // Signed extend long (second part).
2038   void sxtl2(const VRegister& vd, const VRegister& vn);
2039 
2040   // Unsigned shift left long by immediate.
2041   void ushll(const VRegister& vd, const VRegister& vn, int shift);
2042 
2043   // Unsigned shift left long by immediate (second part).
2044   void ushll2(const VRegister& vd, const VRegister& vn, int shift);
2045 
2046   // Shift left long by element size.
2047   void shll(const VRegister& vd, const VRegister& vn, int shift);
2048 
2049   // Shift left long by element size (second part).
2050   void shll2(const VRegister& vd, const VRegister& vn, int shift);
2051 
2052   // Unsigned extend long.
2053   void uxtl(const VRegister& vd, const VRegister& vn);
2054 
2055   // Unsigned extend long (second part).
2056   void uxtl2(const VRegister& vd, const VRegister& vn);
2057 
2058   // Shift left by immediate and insert.
2059   void sli(const VRegister& vd, const VRegister& vn, int shift);
2060 
2061   // Shift right by immediate and insert.
2062   void sri(const VRegister& vd, const VRegister& vn, int shift);
2063 
2064   // Signed maximum.
2065   void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2066 
2067   // Signed pairwise maximum.
2068   void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2069 
2070   // Add across vector.
2071   void addv(const VRegister& vd, const VRegister& vn);
2072 
2073   // Signed add long across vector.
2074   void saddlv(const VRegister& vd, const VRegister& vn);
2075 
2076   // Unsigned add long across vector.
2077   void uaddlv(const VRegister& vd, const VRegister& vn);
2078 
2079   // FP maximum number across vector.
2080   void fmaxnmv(const VRegister& vd, const VRegister& vn);
2081 
2082   // FP maximum across vector.
2083   void fmaxv(const VRegister& vd, const VRegister& vn);
2084 
2085   // FP minimum number across vector.
2086   void fminnmv(const VRegister& vd, const VRegister& vn);
2087 
2088   // FP minimum across vector.
2089   void fminv(const VRegister& vd, const VRegister& vn);
2090 
2091   // Signed maximum across vector.
2092   void smaxv(const VRegister& vd, const VRegister& vn);
2093 
2094   // Signed minimum.
2095   void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2096 
2097   // Signed minimum pairwise.
2098   void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2099 
2100   // Signed minimum across vector.
2101   void sminv(const VRegister& vd, const VRegister& vn);
2102 
2103   // One-element structure store from one register.
2104   void st1(const VRegister& vt, const MemOperand& src);
2105 
2106   // One-element structure store from two registers.
2107   void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2108 
2109   // One-element structure store from three registers.
2110   void st1(const VRegister& vt,
2111            const VRegister& vt2,
2112            const VRegister& vt3,
2113            const MemOperand& src);
2114 
2115   // One-element structure store from four registers.
2116   void st1(const VRegister& vt,
2117            const VRegister& vt2,
2118            const VRegister& vt3,
2119            const VRegister& vt4,
2120            const MemOperand& src);
2121 
2122   // One-element single structure store from one lane.
2123   void st1(const VRegister& vt, int lane, const MemOperand& src);
2124 
2125   // Two-element structure store from two registers.
2126   void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2127 
2128   // Two-element single structure store from two lanes.
2129   void st2(const VRegister& vt,
2130            const VRegister& vt2,
2131            int lane,
2132            const MemOperand& src);
2133 
2134   // Three-element structure store from three registers.
2135   void st3(const VRegister& vt,
2136            const VRegister& vt2,
2137            const VRegister& vt3,
2138            const MemOperand& src);
2139 
2140   // Three-element single structure store from three lanes.
2141   void st3(const VRegister& vt,
2142            const VRegister& vt2,
2143            const VRegister& vt3,
2144            int lane,
2145            const MemOperand& src);
2146 
2147   // Four-element structure store from four registers.
2148   void st4(const VRegister& vt,
2149            const VRegister& vt2,
2150            const VRegister& vt3,
2151            const VRegister& vt4,
2152            const MemOperand& src);
2153 
2154   // Four-element single structure store from four lanes.
2155   void st4(const VRegister& vt,
2156            const VRegister& vt2,
2157            const VRegister& vt3,
2158            const VRegister& vt4,
2159            int lane,
2160            const MemOperand& src);
2161 
2162   // Unsigned add long.
2163   void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2164 
2165   // Unsigned add long (second part).
2166   void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2167 
2168   // Unsigned add wide.
2169   void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2170 
2171   // Unsigned add wide (second part).
2172   void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2173 
2174   // Signed add long.
2175   void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2176 
2177   // Signed add long (second part).
2178   void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2179 
2180   // Signed add wide.
2181   void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2182 
2183   // Signed add wide (second part).
2184   void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2185 
2186   // Unsigned subtract long.
2187   void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2188 
2189   // Unsigned subtract long (second part).
2190   void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2191 
2192   // Unsigned subtract wide.
2193   void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2194 
2195   // Unsigned subtract wide (second part).
2196   void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2197 
2198   // Signed subtract long.
2199   void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2200 
2201   // Signed subtract long (second part).
2202   void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2203 
2204   // Signed integer subtract wide.
2205   void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2206 
2207   // Signed integer subtract wide (second part).
2208   void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2209 
2210   // Unsigned maximum.
2211   void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2212 
2213   // Unsigned pairwise maximum.
2214   void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2215 
2216   // Unsigned maximum across vector.
2217   void umaxv(const VRegister& vd, const VRegister& vn);
2218 
2219   // Unsigned minimum.
2220   void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2221 
2222   // Unsigned pairwise minimum.
2223   void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2224 
2225   // Unsigned minimum across vector.
2226   void uminv(const VRegister& vd, const VRegister& vn);
2227 
2228   // Transpose vectors (primary).
2229   void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2230 
2231   // Transpose vectors (secondary).
2232   void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2233 
2234   // Unzip vectors (primary).
2235   void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2236 
2237   // Unzip vectors (secondary).
2238   void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2239 
2240   // Zip vectors (primary).
2241   void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2242 
2243   // Zip vectors (secondary).
2244   void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2245 
2246   // Signed shift right by immediate.
2247   void sshr(const VRegister& vd, const VRegister& vn, int shift);
2248 
2249   // Unsigned shift right by immediate.
2250   void ushr(const VRegister& vd, const VRegister& vn, int shift);
2251 
2252   // Signed rounding shift right by immediate.
2253   void srshr(const VRegister& vd, const VRegister& vn, int shift);
2254 
2255   // Unsigned rounding shift right by immediate.
2256   void urshr(const VRegister& vd, const VRegister& vn, int shift);
2257 
2258   // Signed shift right by immediate and accumulate.
2259   void ssra(const VRegister& vd, const VRegister& vn, int shift);
2260 
2261   // Unsigned shift right by immediate and accumulate.
2262   void usra(const VRegister& vd, const VRegister& vn, int shift);
2263 
2264   // Signed rounding shift right by immediate and accumulate.
2265   void srsra(const VRegister& vd, const VRegister& vn, int shift);
2266 
2267   // Unsigned rounding shift right by immediate and accumulate.
2268   void ursra(const VRegister& vd, const VRegister& vn, int shift);
2269 
2270   // Shift right narrow by immediate.
2271   void shrn(const VRegister& vd, const VRegister& vn, int shift);
2272 
2273   // Shift right narrow by immediate (second part).
2274   void shrn2(const VRegister& vd, const VRegister& vn, int shift);
2275 
2276   // Rounding shift right narrow by immediate.
2277   void rshrn(const VRegister& vd, const VRegister& vn, int shift);
2278 
2279   // Rounding shift right narrow by immediate (second part).
2280   void rshrn2(const VRegister& vd, const VRegister& vn, int shift);
2281 
2282   // Unsigned saturating shift right narrow by immediate.
2283   void uqshrn(const VRegister& vd, const VRegister& vn, int shift);
2284 
2285   // Unsigned saturating shift right narrow by immediate (second part).
2286   void uqshrn2(const VRegister& vd, const VRegister& vn, int shift);
2287 
2288   // Unsigned saturating rounding shift right narrow by immediate.
2289   void uqrshrn(const VRegister& vd, const VRegister& vn, int shift);
2290 
2291   // Unsigned saturating rounding shift right narrow by immediate (second part).
2292   void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
2293 
2294   // Signed saturating shift right narrow by immediate.
2295   void sqshrn(const VRegister& vd, const VRegister& vn, int shift);
2296 
2297   // Signed saturating shift right narrow by immediate (second part).
2298   void sqshrn2(const VRegister& vd, const VRegister& vn, int shift);
2299 
2300   // Signed saturating rounded shift right narrow by immediate.
2301   void sqrshrn(const VRegister& vd, const VRegister& vn, int shift);
2302 
2303   // Signed saturating rounded shift right narrow by immediate (second part).
2304   void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
2305 
2306   // Signed saturating shift right unsigned narrow by immediate.
2307   void sqshrun(const VRegister& vd, const VRegister& vn, int shift);
2308 
2309   // Signed saturating shift right unsigned narrow by immediate (second part).
2310   void sqshrun2(const VRegister& vd, const VRegister& vn, int shift);
2311 
2312   // Signed sat rounded shift right unsigned narrow by immediate.
2313   void sqrshrun(const VRegister& vd, const VRegister& vn, int shift);
2314 
2315   // Signed sat rounded shift right unsigned narrow by immediate (second part).
2316   void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift);
2317 
2318   // FP reciprocal step.
2319   void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2320 
2321   // FP reciprocal estimate.
2322   void frecpe(const VRegister& vd, const VRegister& vn);
2323 
2324   // FP reciprocal square root estimate.
2325   void frsqrte(const VRegister& vd, const VRegister& vn);
2326 
2327   // FP reciprocal square root step.
2328   void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2329 
2330   // Signed absolute difference and accumulate long.
2331   void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2332 
2333   // Signed absolute difference and accumulate long (second part).
2334   void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2335 
2336   // Unsigned absolute difference and accumulate long.
2337   void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2338 
2339   // Unsigned absolute difference and accumulate long (second part).
2340   void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2341 
2342   // Signed absolute difference long.
2343   void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2344 
2345   // Signed absolute difference long (second part).
2346   void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2347 
2348   // Unsigned absolute difference long.
2349   void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2350 
2351   // Unsigned absolute difference long (second part).
2352   void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2353 
2354   // Polynomial multiply long.
2355   void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2356 
2357   // Polynomial multiply long (second part).
2358   void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2359 
2360   // Signed long multiply-add.
2361   void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2362 
2363   // Signed long multiply-add (second part).
2364   void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2365 
2366   // Unsigned long multiply-add.
2367   void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2368 
2369   // Unsigned long multiply-add (second part).
2370   void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2371 
2372   // Signed long multiply-sub.
2373   void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2374 
2375   // Signed long multiply-sub (second part).
2376   void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2377 
2378   // Unsigned long multiply-sub.
2379   void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2380 
2381   // Unsigned long multiply-sub (second part).
2382   void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2383 
2384   // Signed long multiply.
2385   void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2386 
2387   // Signed long multiply (second part).
2388   void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2389 
2390   // Signed saturating doubling long multiply-add.
2391   void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2392 
2393   // Signed saturating doubling long multiply-add (second part).
2394   void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2395 
2396   // Signed saturating doubling long multiply-subtract.
2397   void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2398 
2399   // Signed saturating doubling long multiply-subtract (second part).
2400   void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2401 
2402   // Signed saturating doubling long multiply.
2403   void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2404 
2405   // Signed saturating doubling long multiply (second part).
2406   void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2407 
2408   // Signed saturating doubling multiply returning high half.
2409   void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2410 
2411   // Signed saturating rounding doubling multiply returning high half.
2412   void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2413 
2414   // Signed saturating doubling multiply element returning high half.
2415   void sqdmulh(const VRegister& vd,
2416                const VRegister& vn,
2417                const VRegister& vm,
2418                int vm_index);
2419 
2420   // Signed saturating rounding doubling multiply element returning high half.
2421   void sqrdmulh(const VRegister& vd,
2422                 const VRegister& vn,
2423                 const VRegister& vm,
2424                 int vm_index);
2425 
2426   // Unsigned long multiply long.
2427   void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2428 
2429   // Unsigned long multiply (second part).
2430   void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2431 
2432   // Add narrow returning high half.
2433   void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2434 
2435   // Add narrow returning high half (second part).
2436   void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2437 
2438   // Rounding add narrow returning high half.
2439   void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2440 
2441   // Rounding add narrow returning high half (second part).
2442   void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2443 
2444   // Subtract narrow returning high half.
2445   void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2446 
2447   // Subtract narrow returning high half (second part).
2448   void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2449 
2450   // Rounding subtract narrow returning high half.
2451   void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2452 
2453   // Rounding subtract narrow returning high half (second part).
2454   void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2455 
2456   // FP vector multiply accumulate.
2457   void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2458 
2459   // FP vector multiply subtract.
2460   void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2461 
2462   // FP vector multiply extended.
2463   void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2464 
2465   // FP absolute greater than or equal.
2466   void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2467 
2468   // FP absolute greater than.
2469   void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2470 
2471   // FP multiply by element.
2472   void fmul(const VRegister& vd,
2473             const VRegister& vn,
2474             const VRegister& vm,
2475             int vm_index);
2476 
2477   // FP fused multiply-add to accumulator by element.
2478   void fmla(const VRegister& vd,
2479             const VRegister& vn,
2480             const VRegister& vm,
2481             int vm_index);
2482 
2483   // FP fused multiply-sub from accumulator by element.
2484   void fmls(const VRegister& vd,
2485             const VRegister& vn,
2486             const VRegister& vm,
2487             int vm_index);
2488 
2489   // FP multiply extended by element.
2490   void fmulx(const VRegister& vd,
2491              const VRegister& vn,
2492              const VRegister& vm,
2493              int vm_index);
2494 
2495   // FP compare equal.
2496   void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2497 
2498   // FP greater than.
2499   void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2500 
2501   // FP greater than or equal.
2502   void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2503 
2504   // FP compare equal to zero.
2505   void fcmeq(const VRegister& vd, const VRegister& vn, double imm);
2506 
2507   // FP greater than zero.
2508   void fcmgt(const VRegister& vd, const VRegister& vn, double imm);
2509 
2510   // FP greater than or equal to zero.
2511   void fcmge(const VRegister& vd, const VRegister& vn, double imm);
2512 
2513   // FP less than or equal to zero.
2514   void fcmle(const VRegister& vd, const VRegister& vn, double imm);
2515 
2516   // FP less than to zero.
2517   void fcmlt(const VRegister& vd, const VRegister& vn, double imm);
2518 
2519   // FP absolute difference.
2520   void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2521 
2522   // FP pairwise add vector.
2523   void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2524 
2525   // FP pairwise add scalar.
2526   void faddp(const VRegister& vd, const VRegister& vn);
2527 
2528   // FP pairwise maximum vector.
2529   void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2530 
2531   // FP pairwise maximum scalar.
2532   void fmaxp(const VRegister& vd, const VRegister& vn);
2533 
2534   // FP pairwise minimum vector.
2535   void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2536 
2537   // FP pairwise minimum scalar.
2538   void fminp(const VRegister& vd, const VRegister& vn);
2539 
2540   // FP pairwise maximum number vector.
2541   void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2542 
2543   // FP pairwise maximum number scalar.
2544   void fmaxnmp(const VRegister& vd, const VRegister& vn);
2545 
2546   // FP pairwise minimum number vector.
2547   void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2548 
2549   // FP pairwise minimum number scalar.
2550   void fminnmp(const VRegister& vd, const VRegister& vn);
2551 
2552   // Emit generic instructions.
2553   // Emit raw instructions into the instruction stream.
dci(Instr raw_inst)2554   void dci(Instr raw_inst) { Emit(raw_inst); }
2555 
2556   // Emit 32 bits of data into the instruction stream.
dc32(uint32_t data)2557   void dc32(uint32_t data) { dc(data); }
2558 
2559   // Emit 64 bits of data into the instruction stream.
dc64(uint64_t data)2560   void dc64(uint64_t data) { dc(data); }
2561 
2562   // Emit data in the instruction stream.
2563   template <typename T>
dc(T data)2564   void dc(T data) {
2565     VIXL_ASSERT(AllowAssembler());
2566     GetBuffer()->Emit<T>(data);
2567   }
2568 
2569   // Copy a string into the instruction stream, including the terminating NULL
2570   // character. The instruction pointer is then aligned correctly for
2571   // subsequent instructions.
EmitString(const char * string)2572   void EmitString(const char* string) {
2573     VIXL_ASSERT(string != NULL);
2574     VIXL_ASSERT(AllowAssembler());
2575 
2576     GetBuffer()->EmitString(string);
2577     GetBuffer()->Align();
2578   }
2579 
2580   // Code generation helpers.
2581 
2582   // Register encoding.
Rd(CPURegister rd)2583   static Instr Rd(CPURegister rd) {
2584     VIXL_ASSERT(rd.GetCode() != kSPRegInternalCode);
2585     return rd.GetCode() << Rd_offset;
2586   }
2587 
Rn(CPURegister rn)2588   static Instr Rn(CPURegister rn) {
2589     VIXL_ASSERT(rn.GetCode() != kSPRegInternalCode);
2590     return rn.GetCode() << Rn_offset;
2591   }
2592 
Rm(CPURegister rm)2593   static Instr Rm(CPURegister rm) {
2594     VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
2595     return rm.GetCode() << Rm_offset;
2596   }
2597 
RmNot31(CPURegister rm)2598   static Instr RmNot31(CPURegister rm) {
2599     VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode);
2600     VIXL_ASSERT(!rm.IsZero());
2601     return Rm(rm);
2602   }
2603 
Ra(CPURegister ra)2604   static Instr Ra(CPURegister ra) {
2605     VIXL_ASSERT(ra.GetCode() != kSPRegInternalCode);
2606     return ra.GetCode() << Ra_offset;
2607   }
2608 
Rt(CPURegister rt)2609   static Instr Rt(CPURegister rt) {
2610     VIXL_ASSERT(rt.GetCode() != kSPRegInternalCode);
2611     return rt.GetCode() << Rt_offset;
2612   }
2613 
Rt2(CPURegister rt2)2614   static Instr Rt2(CPURegister rt2) {
2615     VIXL_ASSERT(rt2.GetCode() != kSPRegInternalCode);
2616     return rt2.GetCode() << Rt2_offset;
2617   }
2618 
Rs(CPURegister rs)2619   static Instr Rs(CPURegister rs) {
2620     VIXL_ASSERT(rs.GetCode() != kSPRegInternalCode);
2621     return rs.GetCode() << Rs_offset;
2622   }
2623 
2624   // These encoding functions allow the stack pointer to be encoded, and
2625   // disallow the zero register.
RdSP(Register rd)2626   static Instr RdSP(Register rd) {
2627     VIXL_ASSERT(!rd.IsZero());
2628     return (rd.GetCode() & kRegCodeMask) << Rd_offset;
2629   }
2630 
RnSP(Register rn)2631   static Instr RnSP(Register rn) {
2632     VIXL_ASSERT(!rn.IsZero());
2633     return (rn.GetCode() & kRegCodeMask) << Rn_offset;
2634   }
2635 
2636   // Flags encoding.
Flags(FlagsUpdate S)2637   static Instr Flags(FlagsUpdate S) {
2638     if (S == SetFlags) {
2639       return 1 << FlagsUpdate_offset;
2640     } else if (S == LeaveFlags) {
2641       return 0 << FlagsUpdate_offset;
2642     }
2643     VIXL_UNREACHABLE();
2644     return 0;
2645   }
2646 
Cond(Condition cond)2647   static Instr Cond(Condition cond) { return cond << Condition_offset; }
2648 
2649   // PC-relative address encoding.
ImmPCRelAddress(int64_t imm21)2650   static Instr ImmPCRelAddress(int64_t imm21) {
2651     VIXL_ASSERT(IsInt21(imm21));
2652     Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
2653     Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
2654     Instr immlo = imm << ImmPCRelLo_offset;
2655     return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
2656   }
2657 
2658   // Branch encoding.
ImmUncondBranch(int64_t imm26)2659   static Instr ImmUncondBranch(int64_t imm26) {
2660     VIXL_ASSERT(IsInt26(imm26));
2661     return TruncateToUint26(imm26) << ImmUncondBranch_offset;
2662   }
2663 
ImmCondBranch(int64_t imm19)2664   static Instr ImmCondBranch(int64_t imm19) {
2665     VIXL_ASSERT(IsInt19(imm19));
2666     return TruncateToUint19(imm19) << ImmCondBranch_offset;
2667   }
2668 
ImmCmpBranch(int64_t imm19)2669   static Instr ImmCmpBranch(int64_t imm19) {
2670     VIXL_ASSERT(IsInt19(imm19));
2671     return TruncateToUint19(imm19) << ImmCmpBranch_offset;
2672   }
2673 
ImmTestBranch(int64_t imm14)2674   static Instr ImmTestBranch(int64_t imm14) {
2675     VIXL_ASSERT(IsInt14(imm14));
2676     return TruncateToUint14(imm14) << ImmTestBranch_offset;
2677   }
2678 
ImmTestBranchBit(unsigned bit_pos)2679   static Instr ImmTestBranchBit(unsigned bit_pos) {
2680     VIXL_ASSERT(IsUint6(bit_pos));
2681     // Subtract five from the shift offset, as we need bit 5 from bit_pos.
2682     unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5);
2683     unsigned b40 = bit_pos << ImmTestBranchBit40_offset;
2684     b5 &= ImmTestBranchBit5_mask;
2685     b40 &= ImmTestBranchBit40_mask;
2686     return b5 | b40;
2687   }
2688 
2689   // Data Processing encoding.
SF(Register rd)2690   static Instr SF(Register rd) {
2691     return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
2692   }
2693 
ImmAddSub(int imm)2694   static Instr ImmAddSub(int imm) {
2695     VIXL_ASSERT(IsImmAddSub(imm));
2696     if (IsUint12(imm)) {  // No shift required.
2697       imm <<= ImmAddSub_offset;
2698     } else {
2699       imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset);
2700     }
2701     return imm;
2702   }
2703 
ImmS(unsigned imms,unsigned reg_size)2704   static Instr ImmS(unsigned imms, unsigned reg_size) {
2705     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
2706                 ((reg_size == kWRegSize) && IsUint5(imms)));
2707     USE(reg_size);
2708     return imms << ImmS_offset;
2709   }
2710 
ImmR(unsigned immr,unsigned reg_size)2711   static Instr ImmR(unsigned immr, unsigned reg_size) {
2712     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
2713                 ((reg_size == kWRegSize) && IsUint5(immr)));
2714     USE(reg_size);
2715     VIXL_ASSERT(IsUint6(immr));
2716     return immr << ImmR_offset;
2717   }
2718 
ImmSetBits(unsigned imms,unsigned reg_size)2719   static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
2720     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
2721     VIXL_ASSERT(IsUint6(imms));
2722     VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
2723     USE(reg_size);
2724     return imms << ImmSetBits_offset;
2725   }
2726 
ImmRotate(unsigned immr,unsigned reg_size)2727   static Instr ImmRotate(unsigned immr, unsigned reg_size) {
2728     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
2729     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
2730                 ((reg_size == kWRegSize) && IsUint5(immr)));
2731     USE(reg_size);
2732     return immr << ImmRotate_offset;
2733   }
2734 
ImmLLiteral(int64_t imm19)2735   static Instr ImmLLiteral(int64_t imm19) {
2736     VIXL_ASSERT(IsInt19(imm19));
2737     return TruncateToUint19(imm19) << ImmLLiteral_offset;
2738   }
2739 
BitN(unsigned bitn,unsigned reg_size)2740   static Instr BitN(unsigned bitn, unsigned reg_size) {
2741     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
2742     VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
2743     USE(reg_size);
2744     return bitn << BitN_offset;
2745   }
2746 
ShiftDP(Shift shift)2747   static Instr ShiftDP(Shift shift) {
2748     VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
2749     return shift << ShiftDP_offset;
2750   }
2751 
ImmDPShift(unsigned amount)2752   static Instr ImmDPShift(unsigned amount) {
2753     VIXL_ASSERT(IsUint6(amount));
2754     return amount << ImmDPShift_offset;
2755   }
2756 
ExtendMode(Extend extend)2757   static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; }
2758 
ImmExtendShift(unsigned left_shift)2759   static Instr ImmExtendShift(unsigned left_shift) {
2760     VIXL_ASSERT(left_shift <= 4);
2761     return left_shift << ImmExtendShift_offset;
2762   }
2763 
ImmCondCmp(unsigned imm)2764   static Instr ImmCondCmp(unsigned imm) {
2765     VIXL_ASSERT(IsUint5(imm));
2766     return imm << ImmCondCmp_offset;
2767   }
2768 
Nzcv(StatusFlags nzcv)2769   static Instr Nzcv(StatusFlags nzcv) {
2770     return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
2771   }
2772 
2773   // MemOperand offset encoding.
ImmLSUnsigned(int64_t imm12)2774   static Instr ImmLSUnsigned(int64_t imm12) {
2775     VIXL_ASSERT(IsUint12(imm12));
2776     return TruncateToUint12(imm12) << ImmLSUnsigned_offset;
2777   }
2778 
ImmLS(int64_t imm9)2779   static Instr ImmLS(int64_t imm9) {
2780     VIXL_ASSERT(IsInt9(imm9));
2781     return TruncateToUint9(imm9) << ImmLS_offset;
2782   }
2783 
ImmLSPair(int64_t imm7,unsigned access_size)2784   static Instr ImmLSPair(int64_t imm7, unsigned access_size) {
2785     VIXL_ASSERT(IsMultiple(imm7, 1 << access_size));
2786     int64_t scaled_imm7 = imm7 / (1 << access_size);
2787     VIXL_ASSERT(IsInt7(scaled_imm7));
2788     return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
2789   }
2790 
ImmShiftLS(unsigned shift_amount)2791   static Instr ImmShiftLS(unsigned shift_amount) {
2792     VIXL_ASSERT(IsUint1(shift_amount));
2793     return shift_amount << ImmShiftLS_offset;
2794   }
2795 
ImmPrefetchOperation(int imm5)2796   static Instr ImmPrefetchOperation(int imm5) {
2797     VIXL_ASSERT(IsUint5(imm5));
2798     return imm5 << ImmPrefetchOperation_offset;
2799   }
2800 
ImmException(int imm16)2801   static Instr ImmException(int imm16) {
2802     VIXL_ASSERT(IsUint16(imm16));
2803     return imm16 << ImmException_offset;
2804   }
2805 
ImmSystemRegister(int imm15)2806   static Instr ImmSystemRegister(int imm15) {
2807     VIXL_ASSERT(IsUint15(imm15));
2808     return imm15 << ImmSystemRegister_offset;
2809   }
2810 
ImmHint(int imm7)2811   static Instr ImmHint(int imm7) {
2812     VIXL_ASSERT(IsUint7(imm7));
2813     return imm7 << ImmHint_offset;
2814   }
2815 
CRm(int imm4)2816   static Instr CRm(int imm4) {
2817     VIXL_ASSERT(IsUint4(imm4));
2818     return imm4 << CRm_offset;
2819   }
2820 
CRn(int imm4)2821   static Instr CRn(int imm4) {
2822     VIXL_ASSERT(IsUint4(imm4));
2823     return imm4 << CRn_offset;
2824   }
2825 
SysOp(int imm14)2826   static Instr SysOp(int imm14) {
2827     VIXL_ASSERT(IsUint14(imm14));
2828     return imm14 << SysOp_offset;
2829   }
2830 
ImmSysOp1(int imm3)2831   static Instr ImmSysOp1(int imm3) {
2832     VIXL_ASSERT(IsUint3(imm3));
2833     return imm3 << SysOp1_offset;
2834   }
2835 
ImmSysOp2(int imm3)2836   static Instr ImmSysOp2(int imm3) {
2837     VIXL_ASSERT(IsUint3(imm3));
2838     return imm3 << SysOp2_offset;
2839   }
2840 
ImmBarrierDomain(int imm2)2841   static Instr ImmBarrierDomain(int imm2) {
2842     VIXL_ASSERT(IsUint2(imm2));
2843     return imm2 << ImmBarrierDomain_offset;
2844   }
2845 
ImmBarrierType(int imm2)2846   static Instr ImmBarrierType(int imm2) {
2847     VIXL_ASSERT(IsUint2(imm2));
2848     return imm2 << ImmBarrierType_offset;
2849   }
2850 
2851   // Move immediates encoding.
ImmMoveWide(uint64_t imm)2852   static Instr ImmMoveWide(uint64_t imm) {
2853     VIXL_ASSERT(IsUint16(imm));
2854     return static_cast<Instr>(imm << ImmMoveWide_offset);
2855   }
2856 
ShiftMoveWide(int64_t shift)2857   static Instr ShiftMoveWide(int64_t shift) {
2858     VIXL_ASSERT(IsUint2(shift));
2859     return static_cast<Instr>(shift << ShiftMoveWide_offset);
2860   }
2861 
2862   // FP Immediates.
2863   static Instr ImmFP32(float imm);
2864   static Instr ImmFP64(double imm);
2865 
2866   // FP register type.
FPType(FPRegister fd)2867   static Instr FPType(FPRegister fd) { return fd.Is64Bits() ? FP64 : FP32; }
2868 
FPScale(unsigned scale)2869   static Instr FPScale(unsigned scale) {
2870     VIXL_ASSERT(IsUint6(scale));
2871     return scale << FPScale_offset;
2872   }
2873 
2874   // Immediate field checking helpers.
2875   static bool IsImmAddSub(int64_t immediate);
2876   static bool IsImmConditionalCompare(int64_t immediate);
2877   static bool IsImmFP32(float imm);
2878   static bool IsImmFP64(double imm);
2879   static bool IsImmLogical(uint64_t value,
2880                            unsigned width,
2881                            unsigned* n = NULL,
2882                            unsigned* imm_s = NULL,
2883                            unsigned* imm_r = NULL);
2884   static bool IsImmLSPair(int64_t offset, unsigned access_size);
2885   static bool IsImmLSScaled(int64_t offset, unsigned access_size);
2886   static bool IsImmLSUnscaled(int64_t offset);
2887   static bool IsImmMovn(uint64_t imm, unsigned reg_size);
2888   static bool IsImmMovz(uint64_t imm, unsigned reg_size);
2889 
2890   // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)2891   static Instr VFormat(VRegister vd) {
2892     if (vd.Is64Bits()) {
2893       switch (vd.GetLanes()) {
2894         case 2:
2895           return NEON_2S;
2896         case 4:
2897           return NEON_4H;
2898         case 8:
2899           return NEON_8B;
2900         default:
2901           return 0xffffffff;
2902       }
2903     } else {
2904       VIXL_ASSERT(vd.Is128Bits());
2905       switch (vd.GetLanes()) {
2906         case 2:
2907           return NEON_2D;
2908         case 4:
2909           return NEON_4S;
2910         case 8:
2911           return NEON_8H;
2912         case 16:
2913           return NEON_16B;
2914         default:
2915           return 0xffffffff;
2916       }
2917     }
2918   }
2919 
2920   // Instruction bits for vector format in floating point data processing
2921   // operations.
FPFormat(VRegister vd)2922   static Instr FPFormat(VRegister vd) {
2923     if (vd.GetLanes() == 1) {
2924       // Floating point scalar formats.
2925       VIXL_ASSERT(vd.Is32Bits() || vd.Is64Bits());
2926       return vd.Is64Bits() ? FP64 : FP32;
2927     }
2928 
2929     // Two lane floating point vector formats.
2930     if (vd.GetLanes() == 2) {
2931       VIXL_ASSERT(vd.Is64Bits() || vd.Is128Bits());
2932       return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S;
2933     }
2934 
2935     // Four lane floating point vector format.
2936     VIXL_ASSERT((vd.GetLanes() == 4) && vd.Is128Bits());
2937     return NEON_FP_4S;
2938   }
2939 
2940   // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)2941   static Instr LSVFormat(VRegister vd) {
2942     if (vd.Is64Bits()) {
2943       switch (vd.GetLanes()) {
2944         case 1:
2945           return LS_NEON_1D;
2946         case 2:
2947           return LS_NEON_2S;
2948         case 4:
2949           return LS_NEON_4H;
2950         case 8:
2951           return LS_NEON_8B;
2952         default:
2953           return 0xffffffff;
2954       }
2955     } else {
2956       VIXL_ASSERT(vd.Is128Bits());
2957       switch (vd.GetLanes()) {
2958         case 2:
2959           return LS_NEON_2D;
2960         case 4:
2961           return LS_NEON_4S;
2962         case 8:
2963           return LS_NEON_8H;
2964         case 16:
2965           return LS_NEON_16B;
2966         default:
2967           return 0xffffffff;
2968       }
2969     }
2970   }
2971 
2972   // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)2973   static Instr SFormat(VRegister vd) {
2974     VIXL_ASSERT(vd.GetLanes() == 1);
2975     switch (vd.GetSizeInBytes()) {
2976       case 1:
2977         return NEON_B;
2978       case 2:
2979         return NEON_H;
2980       case 4:
2981         return NEON_S;
2982       case 8:
2983         return NEON_D;
2984       default:
2985         return 0xffffffff;
2986     }
2987   }
2988 
ImmNEONHLM(int index,int num_bits)2989   static Instr ImmNEONHLM(int index, int num_bits) {
2990     int h, l, m;
2991     if (num_bits == 3) {
2992       VIXL_ASSERT(IsUint3(index));
2993       h = (index >> 2) & 1;
2994       l = (index >> 1) & 1;
2995       m = (index >> 0) & 1;
2996     } else if (num_bits == 2) {
2997       VIXL_ASSERT(IsUint2(index));
2998       h = (index >> 1) & 1;
2999       l = (index >> 0) & 1;
3000       m = 0;
3001     } else {
3002       VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
3003       h = (index >> 0) & 1;
3004       l = 0;
3005       m = 0;
3006     }
3007     return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
3008   }
3009 
ImmNEONExt(int imm4)3010   static Instr ImmNEONExt(int imm4) {
3011     VIXL_ASSERT(IsUint4(imm4));
3012     return imm4 << ImmNEONExt_offset;
3013   }
3014 
ImmNEON5(Instr format,int index)3015   static Instr ImmNEON5(Instr format, int index) {
3016     VIXL_ASSERT(IsUint4(index));
3017     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
3018     int imm5 = (index << (s + 1)) | (1 << s);
3019     return imm5 << ImmNEON5_offset;
3020   }
3021 
ImmNEON4(Instr format,int index)3022   static Instr ImmNEON4(Instr format, int index) {
3023     VIXL_ASSERT(IsUint4(index));
3024     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
3025     int imm4 = index << s;
3026     return imm4 << ImmNEON4_offset;
3027   }
3028 
ImmNEONabcdefgh(int imm8)3029   static Instr ImmNEONabcdefgh(int imm8) {
3030     VIXL_ASSERT(IsUint8(imm8));
3031     Instr instr;
3032     instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
3033     instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
3034     return instr;
3035   }
3036 
NEONCmode(int cmode)3037   static Instr NEONCmode(int cmode) {
3038     VIXL_ASSERT(IsUint4(cmode));
3039     return cmode << NEONCmode_offset;
3040   }
3041 
NEONModImmOp(int op)3042   static Instr NEONModImmOp(int op) {
3043     VIXL_ASSERT(IsUint1(op));
3044     return op << NEONModImmOp_offset;
3045   }
3046 
3047   // Size of the code generated since label to the current position.
GetSizeOfCodeGeneratedSince(Label * label)3048   size_t GetSizeOfCodeGeneratedSince(Label* label) const {
3049     VIXL_ASSERT(label->IsBound());
3050     return GetBuffer().GetOffsetFrom(label->GetLocation());
3051   }
3052   VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince",
3053                   size_t SizeOfCodeGeneratedSince(Label* label) const) {
3054     return GetSizeOfCodeGeneratedSince(label);
3055   }
3056 
3057   VIXL_DEPRECATED("GetBuffer().GetCapacity()",
3058                   size_t GetBufferCapacity() const) {
3059     return GetBuffer().GetCapacity();
3060   }
3061   VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) {
3062     return GetBuffer().GetCapacity();
3063   }
3064 
3065   VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
3066                   size_t GetRemainingBufferSpace() const) {
3067     return GetBuffer().GetRemainingBytes();
3068   }
3069   VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()",
3070                   size_t RemainingBufferSpace() const) {
3071     return GetBuffer().GetRemainingBytes();
3072   }
3073 
GetPic()3074   PositionIndependentCodeOption GetPic() const { return pic_; }
3075   VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) {
3076     return GetPic();
3077   }
3078 
AllowPageOffsetDependentCode()3079   bool AllowPageOffsetDependentCode() const {
3080     return (GetPic() == PageOffsetDependentCode) ||
3081            (GetPic() == PositionDependentCode);
3082   }
3083 
AppropriateZeroRegFor(const CPURegister & reg)3084   static Register AppropriateZeroRegFor(const CPURegister& reg) {
3085     return reg.Is64Bits() ? Register(xzr) : Register(wzr);
3086   }
3087 
3088  protected:
3089   void LoadStore(const CPURegister& rt,
3090                  const MemOperand& addr,
3091                  LoadStoreOp op,
3092                  LoadStoreScalingOption option = PreferScaledOffset);
3093 
3094   void LoadStorePair(const CPURegister& rt,
3095                      const CPURegister& rt2,
3096                      const MemOperand& addr,
3097                      LoadStorePairOp op);
3098   void LoadStoreStruct(const VRegister& vt,
3099                        const MemOperand& addr,
3100                        NEONLoadStoreMultiStructOp op);
3101   void LoadStoreStruct1(const VRegister& vt,
3102                         int reg_count,
3103                         const MemOperand& addr);
3104   void LoadStoreStructSingle(const VRegister& vt,
3105                              uint32_t lane,
3106                              const MemOperand& addr,
3107                              NEONLoadStoreSingleStructOp op);
3108   void LoadStoreStructSingleAllLanes(const VRegister& vt,
3109                                      const MemOperand& addr,
3110                                      NEONLoadStoreSingleStructOp op);
3111   void LoadStoreStructVerify(const VRegister& vt,
3112                              const MemOperand& addr,
3113                              Instr op);
3114 
3115   void Prefetch(PrefetchOperation op,
3116                 const MemOperand& addr,
3117                 LoadStoreScalingOption option = PreferScaledOffset);
3118 
3119   // TODO(all): The third parameter should be passed by reference but gcc 4.8.2
3120   // reports a bogus uninitialised warning then.
3121   void Logical(const Register& rd,
3122                const Register& rn,
3123                const Operand operand,
3124                LogicalOp op);
3125   void LogicalImmediate(const Register& rd,
3126                         const Register& rn,
3127                         unsigned n,
3128                         unsigned imm_s,
3129                         unsigned imm_r,
3130                         LogicalOp op);
3131 
3132   void ConditionalCompare(const Register& rn,
3133                           const Operand& operand,
3134                           StatusFlags nzcv,
3135                           Condition cond,
3136                           ConditionalCompareOp op);
3137 
3138   void AddSubWithCarry(const Register& rd,
3139                        const Register& rn,
3140                        const Operand& operand,
3141                        FlagsUpdate S,
3142                        AddSubWithCarryOp op);
3143 
3144 
3145   // Functions for emulating operands not directly supported by the instruction
3146   // set.
3147   void EmitShift(const Register& rd,
3148                  const Register& rn,
3149                  Shift shift,
3150                  unsigned amount);
3151   void EmitExtendShift(const Register& rd,
3152                        const Register& rn,
3153                        Extend extend,
3154                        unsigned left_shift);
3155 
3156   void AddSub(const Register& rd,
3157               const Register& rn,
3158               const Operand& operand,
3159               FlagsUpdate S,
3160               AddSubOp op);
3161 
3162   void NEONTable(const VRegister& vd,
3163                  const VRegister& vn,
3164                  const VRegister& vm,
3165                  NEONTableOp op);
3166 
3167   // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
3168   // registers. Only simple loads are supported; sign- and zero-extension (such
3169   // as in LDPSW_x or LDRB_w) are not supported.
3170   static LoadStoreOp LoadOpFor(const CPURegister& rt);
3171   static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
3172                                        const CPURegister& rt2);
3173   static LoadStoreOp StoreOpFor(const CPURegister& rt);
3174   static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
3175                                         const CPURegister& rt2);
3176   static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
3177       const CPURegister& rt, const CPURegister& rt2);
3178   static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
3179       const CPURegister& rt, const CPURegister& rt2);
3180   static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
3181 
3182 
3183  private:
3184   static uint32_t FP32ToImm8(float imm);
3185   static uint32_t FP64ToImm8(double imm);
3186 
3187   // Instruction helpers.
3188   void MoveWide(const Register& rd,
3189                 uint64_t imm,
3190                 int shift,
3191                 MoveWideImmediateOp mov_op);
3192   void DataProcShiftedRegister(const Register& rd,
3193                                const Register& rn,
3194                                const Operand& operand,
3195                                FlagsUpdate S,
3196                                Instr op);
3197   void DataProcExtendedRegister(const Register& rd,
3198                                 const Register& rn,
3199                                 const Operand& operand,
3200                                 FlagsUpdate S,
3201                                 Instr op);
3202   void LoadStorePairNonTemporal(const CPURegister& rt,
3203                                 const CPURegister& rt2,
3204                                 const MemOperand& addr,
3205                                 LoadStorePairNonTemporalOp op);
3206   void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
3207   void ConditionalSelect(const Register& rd,
3208                          const Register& rn,
3209                          const Register& rm,
3210                          Condition cond,
3211                          ConditionalSelectOp op);
3212   void DataProcessing1Source(const Register& rd,
3213                              const Register& rn,
3214                              DataProcessing1SourceOp op);
3215   void DataProcessing3Source(const Register& rd,
3216                              const Register& rn,
3217                              const Register& rm,
3218                              const Register& ra,
3219                              DataProcessing3SourceOp op);
3220   void FPDataProcessing1Source(const VRegister& fd,
3221                                const VRegister& fn,
3222                                FPDataProcessing1SourceOp op);
3223   void FPDataProcessing3Source(const VRegister& fd,
3224                                const VRegister& fn,
3225                                const VRegister& fm,
3226                                const VRegister& fa,
3227                                FPDataProcessing3SourceOp op);
3228   void NEONAcrossLanesL(const VRegister& vd,
3229                         const VRegister& vn,
3230                         NEONAcrossLanesOp op);
3231   void NEONAcrossLanes(const VRegister& vd,
3232                        const VRegister& vn,
3233                        NEONAcrossLanesOp op);
3234   void NEONModifiedImmShiftLsl(const VRegister& vd,
3235                                const int imm8,
3236                                const int left_shift,
3237                                NEONModifiedImmediateOp op);
3238   void NEONModifiedImmShiftMsl(const VRegister& vd,
3239                                const int imm8,
3240                                const int shift_amount,
3241                                NEONModifiedImmediateOp op);
3242   void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop);
3243   void NEON3Same(const VRegister& vd,
3244                  const VRegister& vn,
3245                  const VRegister& vm,
3246                  NEON3SameOp vop);
3247   void NEONFP3Same(const VRegister& vd,
3248                    const VRegister& vn,
3249                    const VRegister& vm,
3250                    Instr op);
3251   void NEON3DifferentL(const VRegister& vd,
3252                        const VRegister& vn,
3253                        const VRegister& vm,
3254                        NEON3DifferentOp vop);
3255   void NEON3DifferentW(const VRegister& vd,
3256                        const VRegister& vn,
3257                        const VRegister& vm,
3258                        NEON3DifferentOp vop);
3259   void NEON3DifferentHN(const VRegister& vd,
3260                         const VRegister& vn,
3261                         const VRegister& vm,
3262                         NEON3DifferentOp vop);
3263   void NEONFP2RegMisc(const VRegister& vd,
3264                       const VRegister& vn,
3265                       NEON2RegMiscOp vop,
3266                       double value = 0.0);
3267   void NEON2RegMisc(const VRegister& vd,
3268                     const VRegister& vn,
3269                     NEON2RegMiscOp vop,
3270                     int value = 0);
3271   void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
3272   void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
3273   void NEONPerm(const VRegister& vd,
3274                 const VRegister& vn,
3275                 const VRegister& vm,
3276                 NEONPermOp op);
3277   void NEONFPByElement(const VRegister& vd,
3278                        const VRegister& vn,
3279                        const VRegister& vm,
3280                        int vm_index,
3281                        NEONByIndexedElementOp op);
3282   void NEONByElement(const VRegister& vd,
3283                      const VRegister& vn,
3284                      const VRegister& vm,
3285                      int vm_index,
3286                      NEONByIndexedElementOp op);
3287   void NEONByElementL(const VRegister& vd,
3288                       const VRegister& vn,
3289                       const VRegister& vm,
3290                       int vm_index,
3291                       NEONByIndexedElementOp op);
3292   void NEONShiftImmediate(const VRegister& vd,
3293                           const VRegister& vn,
3294                           NEONShiftImmediateOp op,
3295                           int immh_immb);
3296   void NEONShiftLeftImmediate(const VRegister& vd,
3297                               const VRegister& vn,
3298                               int shift,
3299                               NEONShiftImmediateOp op);
3300   void NEONShiftRightImmediate(const VRegister& vd,
3301                                const VRegister& vn,
3302                                int shift,
3303                                NEONShiftImmediateOp op);
3304   void NEONShiftImmediateL(const VRegister& vd,
3305                            const VRegister& vn,
3306                            int shift,
3307                            NEONShiftImmediateOp op);
3308   void NEONShiftImmediateN(const VRegister& vd,
3309                            const VRegister& vn,
3310                            int shift,
3311                            NEONShiftImmediateOp op);
3312   void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
3313 
3314   Instr LoadStoreStructAddrModeField(const MemOperand& addr);
3315 
3316   // Encode the specified MemOperand for the specified access size and scaling
3317   // preference.
3318   Instr LoadStoreMemOperand(const MemOperand& addr,
3319                             unsigned access_size,
3320                             LoadStoreScalingOption option);
3321 
3322   // Link the current (not-yet-emitted) instruction to the specified label, then
3323   // return an offset to be encoded in the instruction. If the label is not yet
3324   // bound, an offset of 0 is returned.
3325   ptrdiff_t LinkAndGetByteOffsetTo(Label* label);
3326   ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label);
3327   ptrdiff_t LinkAndGetPageOffsetTo(Label* label);
3328 
3329   // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
3330   template <int element_shift>
3331   ptrdiff_t LinkAndGetOffsetTo(Label* label);
3332 
3333   // Literal load offset are in words (32-bit).
3334   ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal);
3335 
3336   // Emit the instruction in buffer_.
Emit(Instr instruction)3337   void Emit(Instr instruction) {
3338     VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize);
3339     VIXL_ASSERT(AllowAssembler());
3340     GetBuffer()->Emit32(instruction);
3341   }
3342 
3343   PositionIndependentCodeOption pic_;
3344 };
3345 
3346 
3347 template <typename T>
UpdateValue(T new_value,const Assembler * assembler)3348 void Literal<T>::UpdateValue(T new_value, const Assembler* assembler) {
3349   return UpdateValue(new_value,
3350                      assembler->GetBuffer().GetStartAddress<uint8_t*>());
3351 }
3352 
3353 
3354 template <typename T>
UpdateValue(T high64,T low64,const Assembler * assembler)3355 void Literal<T>::UpdateValue(T high64, T low64, const Assembler* assembler) {
3356   return UpdateValue(high64,
3357                      low64,
3358                      assembler->GetBuffer().GetStartAddress<uint8_t*>());
3359 }
3360 
3361 
3362 }  // namespace aarch64
3363 
3364 // Required InvalSet template specialisations.
3365 // TODO: These template specialisations should not live in this file.  Move
3366 // Label out of the aarch64 namespace in order to share its implementation
3367 // later.
3368 #define INVAL_SET_TEMPLATE_PARAMETERS                                \
3369   ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t,         \
3370       aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \
3371       aarch64::Label::kReclaimFactor
3372 template <>
GetKey(const ptrdiff_t & element)3373 inline ptrdiff_t InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::GetKey(
3374     const ptrdiff_t& element) {
3375   return element;
3376 }
3377 template <>
SetKey(ptrdiff_t * element,ptrdiff_t key)3378 inline void InvalSet<INVAL_SET_TEMPLATE_PARAMETERS>::SetKey(ptrdiff_t* element,
3379                                                             ptrdiff_t key) {
3380   *element = key;
3381 }
3382 #undef INVAL_SET_TEMPLATE_PARAMETERS
3383 
3384 }  // namespace vixl
3385 
3386 #endif  // VIXL_AARCH64_ASSEMBLER_AARCH64_H_
3387