• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_ARM64_ASSEMBLER_ARM64_H_
6 #define V8_ARM64_ASSEMBLER_ARM64_H_
7 
8 #include <deque>
9 #include <list>
10 #include <map>
11 #include <vector>
12 
13 #include "src/arm64/constants-arm64.h"
14 #include "src/arm64/instructions-arm64.h"
15 #include "src/assembler.h"
16 #include "src/base/optional.h"
17 #include "src/globals.h"
18 #include "src/utils.h"
19 
20 
21 namespace v8 {
22 namespace internal {
23 
24 // -----------------------------------------------------------------------------
25 // Registers.
26 // clang-format off
27 #define GENERAL_REGISTER_CODE_LIST(R)                     \
28   R(0)  R(1)  R(2)  R(3)  R(4)  R(5)  R(6)  R(7)          \
29   R(8)  R(9)  R(10) R(11) R(12) R(13) R(14) R(15)         \
30   R(16) R(17) R(18) R(19) R(20) R(21) R(22) R(23)         \
31   R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31)
32 
33 #define GENERAL_REGISTERS(R)                              \
34   R(x0)  R(x1)  R(x2)  R(x3)  R(x4)  R(x5)  R(x6)  R(x7)  \
35   R(x8)  R(x9)  R(x10) R(x11) R(x12) R(x13) R(x14) R(x15) \
36   R(x16) R(x17) R(x18) R(x19) R(x20) R(x21) R(x22) R(x23) \
37   R(x24) R(x25) R(x26) R(x27) R(x28) R(x29) R(x30) R(x31)
38 
39 #define ALLOCATABLE_GENERAL_REGISTERS(R)                  \
40   R(x0)  R(x1)  R(x2)  R(x3)  R(x4)  R(x5)  R(x6)  R(x7)  \
41   R(x8)  R(x9)  R(x10) R(x11) R(x12) R(x13) R(x14) R(x15) \
42   R(x18) R(x19) R(x20) R(x21) R(x22) R(x23) R(x24) R(x25) \
43   R(x27) R(x28)
44 
45 #define FLOAT_REGISTERS(V)                                \
46   V(s0)  V(s1)  V(s2)  V(s3)  V(s4)  V(s5)  V(s6)  V(s7)  \
47   V(s8)  V(s9)  V(s10) V(s11) V(s12) V(s13) V(s14) V(s15) \
48   V(s16) V(s17) V(s18) V(s19) V(s20) V(s21) V(s22) V(s23) \
49   V(s24) V(s25) V(s26) V(s27) V(s28) V(s29) V(s30) V(s31)
50 
51 #define DOUBLE_REGISTERS(R)                               \
52   R(d0)  R(d1)  R(d2)  R(d3)  R(d4)  R(d5)  R(d6)  R(d7)  \
53   R(d8)  R(d9)  R(d10) R(d11) R(d12) R(d13) R(d14) R(d15) \
54   R(d16) R(d17) R(d18) R(d19) R(d20) R(d21) R(d22) R(d23) \
55   R(d24) R(d25) R(d26) R(d27) R(d28) R(d29) R(d30) R(d31)
56 
57 #define SIMD128_REGISTERS(V)                              \
58   V(q0)  V(q1)  V(q2)  V(q3)  V(q4)  V(q5)  V(q6)  V(q7)  \
59   V(q8)  V(q9)  V(q10) V(q11) V(q12) V(q13) V(q14) V(q15) \
60   V(q16) V(q17) V(q18) V(q19) V(q20) V(q21) V(q22) V(q23) \
61   V(q24) V(q25) V(q26) V(q27) V(q28) V(q29) V(q30) V(q31)
62 
63 // Register d29 could be allocated, but we keep an even length list here, in
64 // order to make stack alignment easier for save and restore.
65 #define ALLOCATABLE_DOUBLE_REGISTERS(R)                   \
66   R(d0)  R(d1)  R(d2)  R(d3)  R(d4)  R(d5)  R(d6)  R(d7)  \
67   R(d8)  R(d9)  R(d10) R(d11) R(d12) R(d13) R(d14) R(d16) \
68   R(d17) R(d18) R(d19) R(d20) R(d21) R(d22) R(d23) R(d24) \
69   R(d25) R(d26) R(d27) R(d28)
70 // clang-format on
71 
72 constexpr int kRegListSizeInBits = sizeof(RegList) * kBitsPerByte;
73 
74 const int kNumRegs = kNumberOfRegisters;
75 // Registers x0-x17 are caller-saved.
76 const int kNumJSCallerSaved = 18;
77 const RegList kJSCallerSaved = 0x3ffff;
78 
79 // Number of registers for which space is reserved in safepoints. Must be a
80 // multiple of eight.
81 // TODO(all): Refine this number.
82 const int kNumSafepointRegisters = 32;
83 
84 // Define the list of registers actually saved at safepoints.
85 // Note that the number of saved registers may be smaller than the reserved
86 // space, i.e. kNumSafepointSavedRegisters <= kNumSafepointRegisters.
87 #define kSafepointSavedRegisters CPURegList::GetSafepointSavedRegisters().list()
88 #define kNumSafepointSavedRegisters \
89   CPURegList::GetSafepointSavedRegisters().Count()
90 
91 // Some CPURegister methods can return Register and VRegister types, so we
92 // need to declare them in advance.
93 class Register;
94 class VRegister;
95 
96 enum RegisterCode {
97 #define REGISTER_CODE(R) kRegCode_##R,
98   GENERAL_REGISTERS(REGISTER_CODE)
99 #undef REGISTER_CODE
100       kRegAfterLast
101 };
102 
103 class CPURegister : public RegisterBase<CPURegister, kRegAfterLast> {
104  public:
105   enum RegisterType {
106     kRegister,
107     kVRegister,
108     kNoRegister
109   };
110 
no_reg()111   static constexpr CPURegister no_reg() {
112     return CPURegister{0, 0, kNoRegister};
113   }
114 
115   template <int code, int size, RegisterType type>
Create()116   static constexpr CPURegister Create() {
117     static_assert(IsValid(code, size, type), "Cannot create invalid registers");
118     return CPURegister{code, size, type};
119   }
120 
Create(int code,int size,RegisterType type)121   static CPURegister Create(int code, int size, RegisterType type) {
122     DCHECK(IsValid(code, size, type));
123     return CPURegister{code, size, type};
124   }
125 
type()126   RegisterType type() const { return reg_type_; }
SizeInBits()127   int SizeInBits() const {
128     DCHECK(IsValid());
129     return reg_size_;
130   }
SizeInBytes()131   int SizeInBytes() const {
132     DCHECK(IsValid());
133     DCHECK_EQ(SizeInBits() % 8, 0);
134     return reg_size_ / 8;
135   }
Is8Bits()136   bool Is8Bits() const {
137     DCHECK(IsValid());
138     return reg_size_ == 8;
139   }
Is16Bits()140   bool Is16Bits() const {
141     DCHECK(IsValid());
142     return reg_size_ == 16;
143   }
Is32Bits()144   bool Is32Bits() const {
145     DCHECK(IsValid());
146     return reg_size_ == 32;
147   }
Is64Bits()148   bool Is64Bits() const {
149     DCHECK(IsValid());
150     return reg_size_ == 64;
151   }
Is128Bits()152   bool Is128Bits() const {
153     DCHECK(IsValid());
154     return reg_size_ == 128;
155   }
IsValid()156   bool IsValid() const { return reg_type_ != kNoRegister; }
IsNone()157   bool IsNone() const { return reg_type_ == kNoRegister; }
Is(const CPURegister & other)158   bool Is(const CPURegister& other) const {
159     return Aliases(other) && (reg_size_ == other.reg_size_);
160   }
Aliases(const CPURegister & other)161   bool Aliases(const CPURegister& other) const {
162     return (reg_code_ == other.reg_code_) && (reg_type_ == other.reg_type_);
163   }
164 
165   bool IsZero() const;
166   bool IsSP() const;
167 
IsRegister()168   bool IsRegister() const { return reg_type_ == kRegister; }
IsVRegister()169   bool IsVRegister() const { return reg_type_ == kVRegister; }
170 
IsFPRegister()171   bool IsFPRegister() const { return IsS() || IsD(); }
172 
IsW()173   bool IsW() const { return IsRegister() && Is32Bits(); }
IsX()174   bool IsX() const { return IsRegister() && Is64Bits(); }
175 
176   // These assertions ensure that the size and type of the register are as
177   // described. They do not consider the number of lanes that make up a vector.
178   // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD()
179   // does not imply Is1D() or Is8B().
180   // Check the number of lanes, ie. the format of the vector, using methods such
181   // as Is8B(), Is1D(), etc. in the VRegister class.
IsV()182   bool IsV() const { return IsVRegister(); }
IsB()183   bool IsB() const { return IsV() && Is8Bits(); }
IsH()184   bool IsH() const { return IsV() && Is16Bits(); }
IsS()185   bool IsS() const { return IsV() && Is32Bits(); }
IsD()186   bool IsD() const { return IsV() && Is64Bits(); }
IsQ()187   bool IsQ() const { return IsV() && Is128Bits(); }
188 
189   Register Reg() const;
190   VRegister VReg() const;
191 
192   Register X() const;
193   Register W() const;
194   VRegister V() const;
195   VRegister B() const;
196   VRegister H() const;
197   VRegister D() const;
198   VRegister S() const;
199   VRegister Q() const;
200 
201   bool IsSameSizeAndType(const CPURegister& other) const;
202 
is(const CPURegister & other)203   bool is(const CPURegister& other) const { return Is(other); }
is_valid()204   bool is_valid() const { return IsValid(); }
205 
206  protected:
207   int reg_size_;
208   RegisterType reg_type_;
209 
210   friend class RegisterBase;
211 
CPURegister(int code,int size,RegisterType type)212   constexpr CPURegister(int code, int size, RegisterType type)
213       : RegisterBase(code), reg_size_(size), reg_type_(type) {}
214 
IsValidRegister(int code,int size)215   static constexpr bool IsValidRegister(int code, int size) {
216     return (size == kWRegSizeInBits || size == kXRegSizeInBits) &&
217            (code < kNumberOfRegisters || code == kSPRegInternalCode);
218   }
219 
IsValidVRegister(int code,int size)220   static constexpr bool IsValidVRegister(int code, int size) {
221     return (size == kBRegSizeInBits || size == kHRegSizeInBits ||
222             size == kSRegSizeInBits || size == kDRegSizeInBits ||
223             size == kQRegSizeInBits) &&
224            code < kNumberOfVRegisters;
225   }
226 
IsValid(int code,int size,RegisterType type)227   static constexpr bool IsValid(int code, int size, RegisterType type) {
228     return (type == kRegister && IsValidRegister(code, size)) ||
229            (type == kVRegister && IsValidVRegister(code, size));
230   }
231 
IsNone(int code,int size,RegisterType type)232   static constexpr bool IsNone(int code, int size, RegisterType type) {
233     return type == kNoRegister && code == 0 && size == 0;
234   }
235 };
236 
237 ASSERT_TRIVIALLY_COPYABLE(CPURegister);
238 
239 class Register : public CPURegister {
240  public:
no_reg()241   static constexpr Register no_reg() { return Register(CPURegister::no_reg()); }
242 
243   template <int code, int size>
Create()244   static constexpr Register Create() {
245     return Register(CPURegister::Create<code, size, CPURegister::kRegister>());
246   }
247 
Create(int code,int size)248   static Register Create(int code, int size) {
249     return Register(CPURegister::Create(code, size, CPURegister::kRegister));
250   }
251 
252   static Register XRegFromCode(unsigned code);
253   static Register WRegFromCode(unsigned code);
254 
from_code(int code)255   static Register from_code(int code) {
256     // Always return an X register.
257     return Register::Create(code, kXRegSizeInBits);
258   }
259 
260   template <int code>
from_code()261   static Register from_code() {
262     // Always return an X register.
263     return Register::Create<code, kXRegSizeInBits>();
264   }
265 
266  private:
Register(const CPURegister & r)267   constexpr explicit Register(const CPURegister& r) : CPURegister(r) {}
268 };
269 
270 ASSERT_TRIVIALLY_COPYABLE(Register);
271 
272 constexpr bool kPadArguments = true;
273 constexpr bool kSimpleFPAliasing = true;
274 constexpr bool kSimdMaskRegisters = false;
275 
276 enum DoubleRegisterCode {
277 #define REGISTER_CODE(R) kDoubleCode_##R,
278   DOUBLE_REGISTERS(REGISTER_CODE)
279 #undef REGISTER_CODE
280       kDoubleAfterLast
281 };
282 
283 class VRegister : public CPURegister {
284  public:
no_reg()285   static constexpr VRegister no_reg() {
286     return VRegister(CPURegister::no_reg(), 0);
287   }
288 
289   template <int code, int size, int lane_count = 1>
Create()290   static constexpr VRegister Create() {
291     static_assert(IsValidLaneCount(lane_count), "Invalid lane count");
292     return VRegister(CPURegister::Create<code, size, kVRegister>(), lane_count);
293   }
294 
295   static VRegister Create(int code, int size, int lane_count = 1) {
296     DCHECK(IsValidLaneCount(lane_count));
297     return VRegister(CPURegister::Create(code, size, CPURegister::kVRegister),
298                      lane_count);
299   }
300 
Create(int reg_code,VectorFormat format)301   static VRegister Create(int reg_code, VectorFormat format) {
302     int reg_size = RegisterSizeInBitsFromFormat(format);
303     int reg_count = IsVectorFormat(format) ? LaneCountFromFormat(format) : 1;
304     return VRegister::Create(reg_code, reg_size, reg_count);
305   }
306 
307   static VRegister BRegFromCode(unsigned code);
308   static VRegister HRegFromCode(unsigned code);
309   static VRegister SRegFromCode(unsigned code);
310   static VRegister DRegFromCode(unsigned code);
311   static VRegister QRegFromCode(unsigned code);
312   static VRegister VRegFromCode(unsigned code);
313 
V8B()314   VRegister V8B() const {
315     return VRegister::Create(code(), kDRegSizeInBits, 8);
316   }
V16B()317   VRegister V16B() const {
318     return VRegister::Create(code(), kQRegSizeInBits, 16);
319   }
V4H()320   VRegister V4H() const {
321     return VRegister::Create(code(), kDRegSizeInBits, 4);
322   }
V8H()323   VRegister V8H() const {
324     return VRegister::Create(code(), kQRegSizeInBits, 8);
325   }
V2S()326   VRegister V2S() const {
327     return VRegister::Create(code(), kDRegSizeInBits, 2);
328   }
V4S()329   VRegister V4S() const {
330     return VRegister::Create(code(), kQRegSizeInBits, 4);
331   }
V2D()332   VRegister V2D() const {
333     return VRegister::Create(code(), kQRegSizeInBits, 2);
334   }
V1D()335   VRegister V1D() const {
336     return VRegister::Create(code(), kDRegSizeInBits, 1);
337   }
338 
Is8B()339   bool Is8B() const { return (Is64Bits() && (lane_count_ == 8)); }
Is16B()340   bool Is16B() const { return (Is128Bits() && (lane_count_ == 16)); }
Is4H()341   bool Is4H() const { return (Is64Bits() && (lane_count_ == 4)); }
Is8H()342   bool Is8H() const { return (Is128Bits() && (lane_count_ == 8)); }
Is2S()343   bool Is2S() const { return (Is64Bits() && (lane_count_ == 2)); }
Is4S()344   bool Is4S() const { return (Is128Bits() && (lane_count_ == 4)); }
Is1D()345   bool Is1D() const { return (Is64Bits() && (lane_count_ == 1)); }
Is2D()346   bool Is2D() const { return (Is128Bits() && (lane_count_ == 2)); }
347 
348   // For consistency, we assert the number of lanes of these scalar registers,
349   // even though there are no vectors of equivalent total size with which they
350   // could alias.
Is1B()351   bool Is1B() const {
352     DCHECK(!(Is8Bits() && IsVector()));
353     return Is8Bits();
354   }
Is1H()355   bool Is1H() const {
356     DCHECK(!(Is16Bits() && IsVector()));
357     return Is16Bits();
358   }
Is1S()359   bool Is1S() const {
360     DCHECK(!(Is32Bits() && IsVector()));
361     return Is32Bits();
362   }
363 
IsLaneSizeB()364   bool IsLaneSizeB() const { return LaneSizeInBits() == kBRegSizeInBits; }
IsLaneSizeH()365   bool IsLaneSizeH() const { return LaneSizeInBits() == kHRegSizeInBits; }
IsLaneSizeS()366   bool IsLaneSizeS() const { return LaneSizeInBits() == kSRegSizeInBits; }
IsLaneSizeD()367   bool IsLaneSizeD() const { return LaneSizeInBits() == kDRegSizeInBits; }
368 
IsScalar()369   bool IsScalar() const { return lane_count_ == 1; }
IsVector()370   bool IsVector() const { return lane_count_ > 1; }
371 
IsSameFormat(const VRegister & other)372   bool IsSameFormat(const VRegister& other) const {
373     return (reg_size_ == other.reg_size_) && (lane_count_ == other.lane_count_);
374   }
375 
LaneCount()376   int LaneCount() const { return lane_count_; }
377 
LaneSizeInBytes()378   unsigned LaneSizeInBytes() const { return SizeInBytes() / lane_count_; }
379 
LaneSizeInBits()380   unsigned LaneSizeInBits() const { return LaneSizeInBytes() * 8; }
381 
382   static constexpr int kMaxNumRegisters = kNumberOfVRegisters;
383   STATIC_ASSERT(kMaxNumRegisters == kDoubleAfterLast);
384 
from_code(int code)385   static VRegister from_code(int code) {
386     // Always return a D register.
387     return VRegister::Create(code, kDRegSizeInBits);
388   }
389 
390  private:
391   int lane_count_;
392 
VRegister(const CPURegister & r,int lane_count)393   constexpr explicit VRegister(const CPURegister& r, int lane_count)
394       : CPURegister(r), lane_count_(lane_count) {}
395 
IsValidLaneCount(int lane_count)396   static constexpr bool IsValidLaneCount(int lane_count) {
397     return base::bits::IsPowerOfTwo(lane_count) && lane_count <= 16;
398   }
399 };
400 
401 ASSERT_TRIVIALLY_COPYABLE(VRegister);
402 
403 // No*Reg is used to indicate an unused argument, or an error case. Note that
404 // these all compare equal (using the Is() method). The Register and VRegister
405 // variants are provided for convenience.
406 constexpr Register NoReg = Register::no_reg();
407 constexpr VRegister NoVReg = VRegister::no_reg();
408 constexpr CPURegister NoCPUReg = CPURegister::no_reg();
409 constexpr Register no_reg = NoReg;
410 constexpr VRegister no_dreg = NoVReg;
411 
412 #define DEFINE_REGISTER(register_class, name, ...) \
413   constexpr register_class name = register_class::Create<__VA_ARGS__>()
414 #define ALIAS_REGISTER(register_class, alias, name) \
415   constexpr register_class alias = name
416 
417 #define DEFINE_REGISTERS(N)                            \
418   DEFINE_REGISTER(Register, w##N, N, kWRegSizeInBits); \
419   DEFINE_REGISTER(Register, x##N, N, kXRegSizeInBits);
420 GENERAL_REGISTER_CODE_LIST(DEFINE_REGISTERS)
421 #undef DEFINE_REGISTERS
422 
423 DEFINE_REGISTER(Register, wsp, kSPRegInternalCode, kWRegSizeInBits);
424 DEFINE_REGISTER(Register, sp, kSPRegInternalCode, kXRegSizeInBits);
425 
426 #define DEFINE_VREGISTERS(N)                            \
427   DEFINE_REGISTER(VRegister, b##N, N, kBRegSizeInBits); \
428   DEFINE_REGISTER(VRegister, h##N, N, kHRegSizeInBits); \
429   DEFINE_REGISTER(VRegister, s##N, N, kSRegSizeInBits); \
430   DEFINE_REGISTER(VRegister, d##N, N, kDRegSizeInBits); \
431   DEFINE_REGISTER(VRegister, q##N, N, kQRegSizeInBits); \
432   DEFINE_REGISTER(VRegister, v##N, N, kQRegSizeInBits);
433 GENERAL_REGISTER_CODE_LIST(DEFINE_VREGISTERS)
434 #undef DEFINE_VREGISTERS
435 
436 #undef DEFINE_REGISTER
437 
438 // Registers aliases.
439 ALIAS_REGISTER(VRegister, v8_, v8);  // Avoid conflicts with namespace v8.
440 ALIAS_REGISTER(Register, ip0, x16);
441 ALIAS_REGISTER(Register, ip1, x17);
442 ALIAS_REGISTER(Register, wip0, w16);
443 ALIAS_REGISTER(Register, wip1, w17);
444 // Root register.
445 ALIAS_REGISTER(Register, kRootRegister, x26);
446 ALIAS_REGISTER(Register, rr, x26);
447 // Context pointer register.
448 ALIAS_REGISTER(Register, cp, x27);
449 ALIAS_REGISTER(Register, fp, x29);
450 ALIAS_REGISTER(Register, lr, x30);
451 ALIAS_REGISTER(Register, xzr, x31);
452 ALIAS_REGISTER(Register, wzr, w31);
453 
454 // Register used for padding stack slots.
455 ALIAS_REGISTER(Register, padreg, x31);
456 
457 // Keeps the 0 double value.
458 ALIAS_REGISTER(VRegister, fp_zero, d15);
459 // MacroAssembler fixed V Registers.
460 ALIAS_REGISTER(VRegister, fp_fixed1, d28);
461 ALIAS_REGISTER(VRegister, fp_fixed2, d29);
462 
463 // MacroAssembler scratch V registers.
464 ALIAS_REGISTER(VRegister, fp_scratch, d30);
465 ALIAS_REGISTER(VRegister, fp_scratch1, d30);
466 ALIAS_REGISTER(VRegister, fp_scratch2, d31);
467 
468 #undef ALIAS_REGISTER
469 
470 // AreAliased returns true if any of the named registers overlap. Arguments set
471 // to NoReg are ignored. The system stack pointer may be specified.
472 bool AreAliased(const CPURegister& reg1,
473                 const CPURegister& reg2,
474                 const CPURegister& reg3 = NoReg,
475                 const CPURegister& reg4 = NoReg,
476                 const CPURegister& reg5 = NoReg,
477                 const CPURegister& reg6 = NoReg,
478                 const CPURegister& reg7 = NoReg,
479                 const CPURegister& reg8 = NoReg);
480 
481 // AreSameSizeAndType returns true if all of the specified registers have the
482 // same size, and are of the same type. The system stack pointer may be
483 // specified. Arguments set to NoReg are ignored, as are any subsequent
484 // arguments. At least one argument (reg1) must be valid (not NoCPUReg).
485 bool AreSameSizeAndType(
486     const CPURegister& reg1, const CPURegister& reg2 = NoCPUReg,
487     const CPURegister& reg3 = NoCPUReg, const CPURegister& reg4 = NoCPUReg,
488     const CPURegister& reg5 = NoCPUReg, const CPURegister& reg6 = NoCPUReg,
489     const CPURegister& reg7 = NoCPUReg, const CPURegister& reg8 = NoCPUReg);
490 
491 // AreSameFormat returns true if all of the specified VRegisters have the same
492 // vector format. Arguments set to NoVReg are ignored, as are any subsequent
493 // arguments. At least one argument (reg1) must be valid (not NoVReg).
494 bool AreSameFormat(const VRegister& reg1, const VRegister& reg2,
495                    const VRegister& reg3 = NoVReg,
496                    const VRegister& reg4 = NoVReg);
497 
498 // AreConsecutive returns true if all of the specified VRegisters are
499 // consecutive in the register file. Arguments may be set to NoVReg, and if so,
500 // subsequent arguments must also be NoVReg. At least one argument (reg1) must
501 // be valid (not NoVReg).
502 bool AreConsecutive(const VRegister& reg1, const VRegister& reg2,
503                     const VRegister& reg3 = NoVReg,
504                     const VRegister& reg4 = NoVReg);
505 
506 typedef VRegister FloatRegister;
507 typedef VRegister DoubleRegister;
508 typedef VRegister Simd128Register;
509 
510 // -----------------------------------------------------------------------------
511 // Lists of registers.
512 class CPURegList {
513  public:
514   template <typename... CPURegisters>
CPURegList(CPURegister reg0,CPURegisters...regs)515   explicit CPURegList(CPURegister reg0, CPURegisters... regs)
516       : list_(CPURegister::ListOf(reg0, regs...)),
517         size_(reg0.SizeInBits()),
518         type_(reg0.type()) {
519     DCHECK(AreSameSizeAndType(reg0, regs...));
520     DCHECK(IsValid());
521   }
522 
CPURegList(CPURegister::RegisterType type,int size,RegList list)523   CPURegList(CPURegister::RegisterType type, int size, RegList list)
524       : list_(list), size_(size), type_(type) {
525     DCHECK(IsValid());
526   }
527 
CPURegList(CPURegister::RegisterType type,int size,int first_reg,int last_reg)528   CPURegList(CPURegister::RegisterType type, int size, int first_reg,
529              int last_reg)
530       : size_(size), type_(type) {
531     DCHECK(
532         ((type == CPURegister::kRegister) && (last_reg < kNumberOfRegisters)) ||
533         ((type == CPURegister::kVRegister) &&
534          (last_reg < kNumberOfVRegisters)));
535     DCHECK(last_reg >= first_reg);
536     list_ = (1UL << (last_reg + 1)) - 1;
537     list_ &= ~((1UL << first_reg) - 1);
538     DCHECK(IsValid());
539   }
540 
type()541   CPURegister::RegisterType type() const {
542     DCHECK(IsValid());
543     return type_;
544   }
545 
list()546   RegList list() const {
547     DCHECK(IsValid());
548     return list_;
549   }
550 
set_list(RegList new_list)551   inline void set_list(RegList new_list) {
552     DCHECK(IsValid());
553     list_ = new_list;
554   }
555 
556   // Combine another CPURegList into this one. Registers that already exist in
557   // this list are left unchanged. The type and size of the registers in the
558   // 'other' list must match those in this list.
559   void Combine(const CPURegList& other);
560 
561   // Remove every register in the other CPURegList from this one. Registers that
562   // do not exist in this list are ignored. The type of the registers in the
563   // 'other' list must match those in this list.
564   void Remove(const CPURegList& other);
565 
566   // Variants of Combine and Remove which take CPURegisters.
567   void Combine(const CPURegister& other);
568   void Remove(const CPURegister& other1,
569               const CPURegister& other2 = NoCPUReg,
570               const CPURegister& other3 = NoCPUReg,
571               const CPURegister& other4 = NoCPUReg);
572 
573   // Variants of Combine and Remove which take a single register by its code;
574   // the type and size of the register is inferred from this list.
575   void Combine(int code);
576   void Remove(int code);
577 
578   // Remove all callee-saved registers from the list. This can be useful when
579   // preparing registers for an AAPCS64 function call, for example.
580   void RemoveCalleeSaved();
581 
582   CPURegister PopLowestIndex();
583   CPURegister PopHighestIndex();
584 
585   // AAPCS64 callee-saved registers.
586   static CPURegList GetCalleeSaved(int size = kXRegSizeInBits);
587   static CPURegList GetCalleeSavedV(int size = kDRegSizeInBits);
588 
589   // AAPCS64 caller-saved registers. Note that this includes lr.
590   // TODO(all): Determine how we handle d8-d15 being callee-saved, but the top
591   // 64-bits being caller-saved.
592   static CPURegList GetCallerSaved(int size = kXRegSizeInBits);
593   static CPURegList GetCallerSavedV(int size = kDRegSizeInBits);
594 
595   // Registers saved as safepoints.
596   static CPURegList GetSafepointSavedRegisters();
597 
IsEmpty()598   bool IsEmpty() const {
599     DCHECK(IsValid());
600     return list_ == 0;
601   }
602 
603   bool IncludesAliasOf(const CPURegister& other1,
604                        const CPURegister& other2 = NoCPUReg,
605                        const CPURegister& other3 = NoCPUReg,
606                        const CPURegister& other4 = NoCPUReg) const {
607     DCHECK(IsValid());
608     RegList list = 0;
609     if (!other1.IsNone() && (other1.type() == type_)) list |= other1.bit();
610     if (!other2.IsNone() && (other2.type() == type_)) list |= other2.bit();
611     if (!other3.IsNone() && (other3.type() == type_)) list |= other3.bit();
612     if (!other4.IsNone() && (other4.type() == type_)) list |= other4.bit();
613     return (list_ & list) != 0;
614   }
615 
Count()616   int Count() const {
617     DCHECK(IsValid());
618     return CountSetBits(list_, kRegListSizeInBits);
619   }
620 
RegisterSizeInBits()621   int RegisterSizeInBits() const {
622     DCHECK(IsValid());
623     return size_;
624   }
625 
RegisterSizeInBytes()626   int RegisterSizeInBytes() const {
627     int size_in_bits = RegisterSizeInBits();
628     DCHECK_EQ(size_in_bits % kBitsPerByte, 0);
629     return size_in_bits / kBitsPerByte;
630   }
631 
TotalSizeInBytes()632   int TotalSizeInBytes() const {
633     DCHECK(IsValid());
634     return RegisterSizeInBytes() * Count();
635   }
636 
637  private:
638   RegList list_;
639   int size_;
640   CPURegister::RegisterType type_;
641 
IsValid()642   bool IsValid() const {
643     constexpr RegList kValidRegisters{0x8000000ffffffff};
644     constexpr RegList kValidVRegisters{0x0000000ffffffff};
645     switch (type_) {
646       case CPURegister::kRegister:
647         return (list_ & kValidRegisters) == list_;
648       case CPURegister::kVRegister:
649         return (list_ & kValidVRegisters) == list_;
650       case CPURegister::kNoRegister:
651         return list_ == 0;
652       default:
653         UNREACHABLE();
654     }
655   }
656 };
657 
658 
659 // AAPCS64 callee-saved registers.
660 #define kCalleeSaved CPURegList::GetCalleeSaved()
661 #define kCalleeSavedV CPURegList::GetCalleeSavedV()
662 
663 // AAPCS64 caller-saved registers. Note that this includes lr.
664 #define kCallerSaved CPURegList::GetCallerSaved()
665 #define kCallerSavedV CPURegList::GetCallerSavedV()
666 
667 // -----------------------------------------------------------------------------
668 // Immediates.
669 class Immediate {
670  public:
671   template<typename T>
672   inline explicit Immediate(Handle<T> handle);
673 
674   // This is allowed to be an implicit constructor because Immediate is
675   // a wrapper class that doesn't normally perform any type conversion.
676   template<typename T>
677   inline Immediate(T value);  // NOLINT(runtime/explicit)
678 
679   template<typename T>
680   inline Immediate(T value, RelocInfo::Mode rmode);
681 
value()682   int64_t value() const { return value_; }
rmode()683   RelocInfo::Mode rmode() const { return rmode_; }
684 
685  private:
686   void InitializeHandle(Handle<HeapObject> value);
687 
688   int64_t value_;
689   RelocInfo::Mode rmode_;
690 };
691 
692 
693 // -----------------------------------------------------------------------------
694 // Operands.
695 constexpr int kSmiShift = kSmiTagSize + kSmiShiftSize;
696 constexpr uint64_t kSmiShiftMask = (1UL << kSmiShift) - 1;
697 
698 // Represents an operand in a machine instruction.
699 class Operand {
700   // TODO(all): If necessary, study more in details which methods
701   // TODO(all): should be inlined or not.
702  public:
703   // rm, {<shift> {#<shift_amount>}}
704   // where <shift> is one of {LSL, LSR, ASR, ROR}.
705   //       <shift_amount> is uint6_t.
706   // This is allowed to be an implicit constructor because Operand is
707   // a wrapper class that doesn't normally perform any type conversion.
708   inline Operand(Register reg,
709                  Shift shift = LSL,
710                  unsigned shift_amount = 0);  // NOLINT(runtime/explicit)
711 
712   // rm, <extend> {#<shift_amount>}
713   // where <extend> is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}.
714   //       <shift_amount> is uint2_t.
715   inline Operand(Register reg,
716                  Extend extend,
717                  unsigned shift_amount = 0);
718 
719   static Operand EmbeddedNumber(double number);  // Smi or HeapNumber.
720   static Operand EmbeddedCode(CodeStub* stub);
721 
722   inline bool IsHeapObjectRequest() const;
723   inline HeapObjectRequest heap_object_request() const;
724   inline Immediate immediate_for_heap_object_request() const;
725 
726   template<typename T>
727   inline explicit Operand(Handle<T> handle);
728 
729   // Implicit constructor for all int types, ExternalReference, and Smi.
730   template<typename T>
731   inline Operand(T t);  // NOLINT(runtime/explicit)
732 
733   // Implicit constructor for int types.
734   template<typename T>
735   inline Operand(T t, RelocInfo::Mode rmode);
736 
737   inline bool IsImmediate() const;
738   inline bool IsShiftedRegister() const;
739   inline bool IsExtendedRegister() const;
740   inline bool IsZero() const;
741 
742   // This returns an LSL shift (<= 4) operand as an equivalent extend operand,
743   // which helps in the encoding of instructions that use the stack pointer.
744   inline Operand ToExtendedRegister() const;
745 
746   inline Immediate immediate() const;
747   inline int64_t ImmediateValue() const;
748   inline RelocInfo::Mode ImmediateRMode() const;
749   inline Register reg() const;
750   inline Shift shift() const;
751   inline Extend extend() const;
752   inline unsigned shift_amount() const;
753 
754   // Relocation information.
755   bool NeedsRelocation(const Assembler* assembler) const;
756 
757   // Helpers
758   inline static Operand UntagSmi(Register smi);
759   inline static Operand UntagSmiAndScale(Register smi, int scale);
760 
761  private:
762   base::Optional<HeapObjectRequest> heap_object_request_;
763   Immediate immediate_;
764   Register reg_;
765   Shift shift_;
766   Extend extend_;
767   unsigned shift_amount_;
768 };
769 
770 
771 // MemOperand represents a memory operand in a load or store instruction.
772 class MemOperand {
773  public:
774   inline MemOperand();
775   inline explicit MemOperand(Register base,
776                              int64_t offset = 0,
777                              AddrMode addrmode = Offset);
778   inline explicit MemOperand(Register base,
779                              Register regoffset,
780                              Shift shift = LSL,
781                              unsigned shift_amount = 0);
782   inline explicit MemOperand(Register base,
783                              Register regoffset,
784                              Extend extend,
785                              unsigned shift_amount = 0);
786   inline explicit MemOperand(Register base,
787                              const Operand& offset,
788                              AddrMode addrmode = Offset);
789 
base()790   const Register& base() const { return base_; }
regoffset()791   const Register& regoffset() const { return regoffset_; }
offset()792   int64_t offset() const { return offset_; }
addrmode()793   AddrMode addrmode() const { return addrmode_; }
shift()794   Shift shift() const { return shift_; }
extend()795   Extend extend() const { return extend_; }
shift_amount()796   unsigned shift_amount() const { return shift_amount_; }
797   inline bool IsImmediateOffset() const;
798   inline bool IsRegisterOffset() const;
799   inline bool IsPreIndex() const;
800   inline bool IsPostIndex() const;
801 
802   // For offset modes, return the offset as an Operand. This helper cannot
803   // handle indexed modes.
804   inline Operand OffsetAsOperand() const;
805 
806   enum PairResult {
807     kNotPair,   // Can't use a pair instruction.
808     kPairAB,    // Can use a pair instruction (operandA has lower address).
809     kPairBA     // Can use a pair instruction (operandB has lower address).
810   };
811   // Check if two MemOperand are consistent for stp/ldp use.
812   static PairResult AreConsistentForPair(const MemOperand& operandA,
813                                          const MemOperand& operandB,
814                                          int access_size_log2 = kXRegSizeLog2);
815 
816  private:
817   Register base_;
818   Register regoffset_;
819   int64_t offset_;
820   AddrMode addrmode_;
821   Shift shift_;
822   Extend extend_;
823   unsigned shift_amount_;
824 };
825 
826 
827 class ConstPool {
828  public:
ConstPool(Assembler * assm)829   explicit ConstPool(Assembler* assm) : assm_(assm), first_use_(-1) {}
830   // Returns true when we need to write RelocInfo and false when we do not.
831   bool RecordEntry(intptr_t data, RelocInfo::Mode mode);
EntryCount()832   int EntryCount() const { return static_cast<int>(entries_.size()); }
IsEmpty()833   bool IsEmpty() const { return entries_.empty(); }
834   // Distance in bytes between the current pc and the first instruction
835   // using the pool. If there are no pending entries return kMaxInt.
836   int DistanceToFirstUse();
837   // Offset after which instructions using the pool will be out of range.
838   int MaxPcOffset();
839   // Maximum size the constant pool can be with current entries. It always
840   // includes alignment padding and branch over.
841   int WorstCaseSize();
842   // Size in bytes of the literal pool *if* it is emitted at the current
843   // pc. The size will include the branch over the pool if it was requested.
844   int SizeIfEmittedAtCurrentPc(bool require_jump);
845   // Emit the literal pool at the current pc with a branch over the pool if
846   // requested.
847   void Emit(bool require_jump);
848   // Discard any pending pool entries.
849   void Clear();
850 
851  private:
852   void EmitMarker();
853   void EmitGuard();
854   void EmitEntries();
855 
856   typedef std::map<uint64_t, int> SharedEntryMap;
857   // Adds a shared entry to entries_, using 'entry_map' to determine whether we
858   // already track this entry. Returns true if this is the first time we add
859   // this entry, false otherwise.
860   bool AddSharedEntry(SharedEntryMap& entry_map, uint64_t data, int offset);
861 
862   Assembler* assm_;
863   // Keep track of the first instruction requiring a constant pool entry
864   // since the previous constant pool was emitted.
865   int first_use_;
866 
867   // Map of data to index in entries_ for shared entries.
868   SharedEntryMap shared_entries_;
869 
870   // Map of address of handle to index in entries_. We need to keep track of
871   // code targets separately from other shared entries, as they can be
872   // relocated.
873   SharedEntryMap handle_to_index_map_;
874 
875   // Values, pc offset(s) of entries. Use a vector to preserve the order of
876   // insertion, as the serializer expects code target RelocInfo to point to
877   // constant pool addresses in an ascending order.
878   std::vector<std::pair<uint64_t, std::vector<int> > > entries_;
879 };
880 
881 
882 // -----------------------------------------------------------------------------
883 // Assembler.
884 
885 class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
886  public:
887   // Create an assembler. Instructions and relocation information are emitted
888   // into a buffer, with the instructions starting from the beginning and the
889   // relocation information starting from the end of the buffer. See CodeDesc
890   // for a detailed comment on the layout (globals.h).
891   //
892   // If the provided buffer is nullptr, the assembler allocates and grows its
893   // own buffer, and buffer_size determines the initial buffer size. The buffer
894   // is owned by the assembler and deallocated upon destruction of the
895   // assembler.
896   //
897   // If the provided buffer is not nullptr, the assembler uses the provided
898   // buffer for code generation and assumes its size to be buffer_size. If the
899   // buffer is too small, a fatal error occurs. No deallocation of the buffer is
900   // done upon destruction of the assembler.
901   Assembler(const AssemblerOptions& options, void* buffer, int buffer_size);
902 
903   virtual ~Assembler();
904 
AbortedCodeGeneration()905   virtual void AbortedCodeGeneration() {
906     constpool_.Clear();
907   }
908 
909   // System functions ---------------------------------------------------------
910   // Start generating code from the beginning of the buffer, discarding any code
911   // and data that has already been emitted into the buffer.
912   //
913   // In order to avoid any accidental transfer of state, Reset DCHECKs that the
914   // constant pool is not blocked.
915   void Reset();
916 
917   // GetCode emits any pending (non-emitted) code and fills the descriptor
918   // desc. GetCode() is idempotent; it returns the same result if no other
919   // Assembler functions are invoked in between GetCode() calls.
920   //
921   // The descriptor (desc) can be nullptr. In that case, the code is finalized
922   // as usual, but the descriptor is not populated.
923   void GetCode(Isolate* isolate, CodeDesc* desc);
924 
925   // Insert the smallest number of nop instructions
926   // possible to align the pc offset to a multiple
927   // of m. m must be a power of 2 (>= 4).
928   void Align(int m);
929   // Insert the smallest number of zero bytes possible to align the pc offset
930   // to a mulitple of m. m must be a power of 2 (>= 2).
931   void DataAlign(int m);
932 
933   inline void Unreachable();
934 
935   // Label --------------------------------------------------------------------
936   // Bind a label to the current pc. Note that labels can only be bound once,
937   // and if labels are linked to other instructions, they _must_ be bound
938   // before they go out of scope.
939   void bind(Label* label);
940 
941 
942   // RelocInfo and pools ------------------------------------------------------
943 
944   // Record relocation information for current pc_.
945   enum ConstantPoolMode { NEEDS_POOL_ENTRY, NO_POOL_ENTRY };
946   void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0,
947                        ConstantPoolMode constant_pool_mode = NEEDS_POOL_ENTRY);
948 
949   // Generate a B immediate instruction with the corresponding relocation info.
950   // 'offset' is the immediate to encode in the B instruction (so it is the
951   // difference between the target and the PC of the instruction, divided by
952   // the instruction size).
953   void near_jump(int offset, RelocInfo::Mode rmode);
954   // Generate a BL immediate instruction with the corresponding relocation info.
955   // As for near_jump, 'offset' is the immediate to encode in the BL
956   // instruction.
957   void near_call(int offset, RelocInfo::Mode rmode);
958   // Generate a BL immediate instruction with the corresponding relocation info
959   // for the input HeapObjectRequest.
960   void near_call(HeapObjectRequest request);
961 
962   // Return the address in the constant pool of the code target address used by
963   // the branch/call instruction at pc.
964   inline static Address target_pointer_address_at(Address pc);
965 
966   // Read/Modify the code target address in the branch/call instruction at pc.
967   // The isolate argument is unused (and may be nullptr) when skipping flushing.
968   inline static Address target_address_at(Address pc, Address constant_pool);
969   inline static void set_target_address_at(
970       Address pc, Address constant_pool, Address target,
971       ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
972 
973   // Returns the handle for the code object called at 'pc'.
974   // This might need to be temporarily encoded as an offset into code_targets_.
975   inline Handle<Code> code_target_object_handle_at(Address pc);
976 
977   // Returns the target address for a runtime function for the call encoded
978   // at 'pc'.
979   // Runtime entries can be temporarily encoded as the offset between the
980   // runtime function entrypoint and the code range start (stored in the
981   // code_range_start field), in order to be encodable as we generate the code,
982   // before it is moved into the code space.
983   inline Address runtime_entry_at(Address pc);
984 
985   // Return the code target address at a call site from the return address of
986   // that call in the instruction stream.
987   inline static Address target_address_from_return_address(Address pc);
988 
989   // This sets the branch destination. 'location' here can be either the pc of
990   // an immediate branch or the address of an entry in the constant pool.
991   // This is for calls and branches within generated code.
992   inline static void deserialization_set_special_target_at(Address location,
993                                                            Code* code,
994                                                            Address target);
995 
996   // Get the size of the special target encoded at 'location'.
997   inline static int deserialization_special_target_size(Address location);
998 
999   // This sets the internal reference at the pc.
1000   inline static void deserialization_set_target_internal_reference_at(
1001       Address pc, Address target,
1002       RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
1003 
1004   // This value is used in the serialization process and must be zero for
1005   // ARM64, as the code target is split across multiple instructions and does
1006   // not exist separately in the code, so the serializer should not step
1007   // forwards in memory after a target is resolved and written.
1008   static constexpr int kSpecialTargetSize = 0;
1009 
1010   // The sizes of the call sequences emitted by MacroAssembler::Call.
1011   //
1012   // A "near" call is encoded in a BL immediate instruction:
1013   //  bl target
1014   //
1015   // whereas a "far" call will be encoded like this:
1016   //  ldr temp, =target
1017   //  blr temp
1018   static constexpr int kNearCallSize = 1 * kInstrSize;
1019   static constexpr int kFarCallSize = 2 * kInstrSize;
1020 
1021   // Size of the generated code in bytes
SizeOfGeneratedCode()1022   uint64_t SizeOfGeneratedCode() const {
1023     DCHECK((pc_ >= buffer_) && (pc_ < (buffer_ + buffer_size_)));
1024     return pc_ - buffer_;
1025   }
1026 
1027   // Return the code size generated from label to the current position.
SizeOfCodeGeneratedSince(const Label * label)1028   uint64_t SizeOfCodeGeneratedSince(const Label* label) {
1029     DCHECK(label->is_bound());
1030     DCHECK(pc_offset() >= label->pos());
1031     DCHECK(pc_offset() < buffer_size_);
1032     return pc_offset() - label->pos();
1033   }
1034 
1035   // Return the number of instructions generated from label to the
1036   // current position.
InstructionsGeneratedSince(const Label * label)1037   uint64_t InstructionsGeneratedSince(const Label* label) {
1038     return SizeOfCodeGeneratedSince(label) / kInstrSize;
1039   }
1040 
1041   // Prevent contant pool emission until EndBlockConstPool is called.
1042   // Call to this function can be nested but must be followed by an equal
1043   // number of calls to EndBlockConstpool.
1044   void StartBlockConstPool();
1045 
1046   // Resume constant pool emission. Need to be called as many time as
1047   // StartBlockConstPool to have an effect.
1048   void EndBlockConstPool();
1049 
1050   bool is_const_pool_blocked() const;
1051   static bool IsConstantPoolAt(Instruction* instr);
1052   static int ConstantPoolSizeAt(Instruction* instr);
1053   // See Assembler::CheckConstPool for more info.
1054   void EmitPoolGuard();
1055 
1056   // Prevent veneer pool emission until EndBlockVeneerPool is called.
1057   // Call to this function can be nested but must be followed by an equal
1058   // number of calls to EndBlockConstpool.
1059   void StartBlockVeneerPool();
1060 
1061   // Resume constant pool emission. Need to be called as many time as
1062   // StartBlockVeneerPool to have an effect.
1063   void EndBlockVeneerPool();
1064 
is_veneer_pool_blocked()1065   bool is_veneer_pool_blocked() const {
1066     return veneer_pool_blocked_nesting_ > 0;
1067   }
1068 
1069   // Block/resume emission of constant pools and veneer pools.
StartBlockPools()1070   void StartBlockPools() {
1071     StartBlockConstPool();
1072     StartBlockVeneerPool();
1073   }
EndBlockPools()1074   void EndBlockPools() {
1075     EndBlockConstPool();
1076     EndBlockVeneerPool();
1077   }
1078 
1079   // Debugging ----------------------------------------------------------------
1080   void RecordComment(const char* msg);
1081 
1082   // Record a deoptimization reason that can be used by a log or cpu profiler.
1083   // Use --trace-deopt to enable.
1084   void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position,
1085                          int id);
1086 
1087   int buffer_space() const;
1088 
1089   // Record the emission of a constant pool.
1090   //
1091   // The emission of constant and veneer pools depends on the size of the code
1092   // generated and the number of RelocInfo recorded.
1093   // The Debug mechanism needs to map code offsets between two versions of a
1094   // function, compiled with and without debugger support (see for example
1095   // Debug::PrepareForBreakPoints()).
1096   // Compiling functions with debugger support generates additional code
1097   // (DebugCodegen::GenerateSlot()). This may affect the emission of the pools
1098   // and cause the version of the code with debugger support to have pools
1099   // generated in different places.
1100   // Recording the position and size of emitted pools allows to correctly
1101   // compute the offset mappings between the different versions of a function in
1102   // all situations.
1103   //
1104   // The parameter indicates the size of the pool (in bytes), including
1105   // the marker and branch over the data.
1106   void RecordConstPool(int size);
1107 
1108   // Instruction set functions ------------------------------------------------
1109 
1110   // Branch / Jump instructions.
1111   // For branches offsets are scaled, i.e. they in instrcutions not in bytes.
1112   // Branch to register.
1113   void br(const Register& xn);
1114 
1115   // Branch-link to register.
1116   void blr(const Register& xn);
1117 
1118   // Branch to register with return hint.
1119   void ret(const Register& xn = lr);
1120 
1121   // Unconditional branch to label.
1122   void b(Label* label);
1123 
1124   // Conditional branch to label.
1125   void b(Label* label, Condition cond);
1126 
1127   // Unconditional branch to PC offset.
1128   void b(int imm26);
1129 
1130   // Conditional branch to PC offset.
1131   void b(int imm19, Condition cond);
1132 
1133   // Branch-link to label / pc offset.
1134   void bl(Label* label);
1135   void bl(int imm26);
1136 
1137   // Compare and branch to label / pc offset if zero.
1138   void cbz(const Register& rt, Label* label);
1139   void cbz(const Register& rt, int imm19);
1140 
1141   // Compare and branch to label / pc offset if not zero.
1142   void cbnz(const Register& rt, Label* label);
1143   void cbnz(const Register& rt, int imm19);
1144 
1145   // Test bit and branch to label / pc offset if zero.
1146   void tbz(const Register& rt, unsigned bit_pos, Label* label);
1147   void tbz(const Register& rt, unsigned bit_pos, int imm14);
1148 
1149   // Test bit and branch to label / pc offset if not zero.
1150   void tbnz(const Register& rt, unsigned bit_pos, Label* label);
1151   void tbnz(const Register& rt, unsigned bit_pos, int imm14);
1152 
1153   // Address calculation instructions.
1154   // Calculate a PC-relative address. Unlike for branches the offset in adr is
1155   // unscaled (i.e. the result can be unaligned).
1156   void adr(const Register& rd, Label* label);
1157   void adr(const Register& rd, int imm21);
1158 
1159   // Data Processing instructions.
1160   // Add.
1161   void add(const Register& rd,
1162            const Register& rn,
1163            const Operand& operand);
1164 
1165   // Add and update status flags.
1166   void adds(const Register& rd,
1167             const Register& rn,
1168             const Operand& operand);
1169 
1170   // Compare negative.
1171   void cmn(const Register& rn, const Operand& operand);
1172 
1173   // Subtract.
1174   void sub(const Register& rd,
1175            const Register& rn,
1176            const Operand& operand);
1177 
1178   // Subtract and update status flags.
1179   void subs(const Register& rd,
1180             const Register& rn,
1181             const Operand& operand);
1182 
1183   // Compare.
1184   void cmp(const Register& rn, const Operand& operand);
1185 
1186   // Negate.
1187   void neg(const Register& rd,
1188            const Operand& operand);
1189 
1190   // Negate and update status flags.
1191   void negs(const Register& rd,
1192             const Operand& operand);
1193 
1194   // Add with carry bit.
1195   void adc(const Register& rd,
1196            const Register& rn,
1197            const Operand& operand);
1198 
1199   // Add with carry bit and update status flags.
1200   void adcs(const Register& rd,
1201             const Register& rn,
1202             const Operand& operand);
1203 
1204   // Subtract with carry bit.
1205   void sbc(const Register& rd,
1206            const Register& rn,
1207            const Operand& operand);
1208 
1209   // Subtract with carry bit and update status flags.
1210   void sbcs(const Register& rd,
1211             const Register& rn,
1212             const Operand& operand);
1213 
1214   // Negate with carry bit.
1215   void ngc(const Register& rd,
1216            const Operand& operand);
1217 
1218   // Negate with carry bit and update status flags.
1219   void ngcs(const Register& rd,
1220             const Operand& operand);
1221 
1222   // Logical instructions.
1223   // Bitwise and (A & B).
1224   void and_(const Register& rd,
1225             const Register& rn,
1226             const Operand& operand);
1227 
1228   // Bitwise and (A & B) and update status flags.
1229   void ands(const Register& rd,
1230             const Register& rn,
1231             const Operand& operand);
1232 
1233   // Bit test, and set flags.
1234   void tst(const Register& rn, const Operand& operand);
1235 
1236   // Bit clear (A & ~B).
1237   void bic(const Register& rd,
1238            const Register& rn,
1239            const Operand& operand);
1240 
1241   // Bit clear (A & ~B) and update status flags.
1242   void bics(const Register& rd,
1243             const Register& rn,
1244             const Operand& operand);
1245 
1246   // Bitwise and.
1247   void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1248 
1249   // Bit clear immediate.
1250   void bic(const VRegister& vd, const int imm8, const int left_shift = 0);
1251 
1252   // Bit clear.
1253   void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1254 
1255   // Bitwise insert if false.
1256   void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1257 
1258   // Bitwise insert if true.
1259   void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1260 
1261   // Bitwise select.
1262   void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1263 
1264   // Polynomial multiply.
1265   void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1266 
1267   // Vector move immediate.
1268   void movi(const VRegister& vd, const uint64_t imm, Shift shift = LSL,
1269             const int shift_amount = 0);
1270 
1271   // Bitwise not.
1272   void mvn(const VRegister& vd, const VRegister& vn);
1273 
1274   // Vector move inverted immediate.
1275   void mvni(const VRegister& vd, const int imm8, Shift shift = LSL,
1276             const int shift_amount = 0);
1277 
1278   // Signed saturating accumulate of unsigned value.
1279   void suqadd(const VRegister& vd, const VRegister& vn);
1280 
1281   // Unsigned saturating accumulate of signed value.
1282   void usqadd(const VRegister& vd, const VRegister& vn);
1283 
1284   // Absolute value.
1285   void abs(const VRegister& vd, const VRegister& vn);
1286 
1287   // Signed saturating absolute value.
1288   void sqabs(const VRegister& vd, const VRegister& vn);
1289 
1290   // Negate.
1291   void neg(const VRegister& vd, const VRegister& vn);
1292 
1293   // Signed saturating negate.
1294   void sqneg(const VRegister& vd, const VRegister& vn);
1295 
1296   // Bitwise not.
1297   void not_(const VRegister& vd, const VRegister& vn);
1298 
1299   // Extract narrow.
1300   void xtn(const VRegister& vd, const VRegister& vn);
1301 
1302   // Extract narrow (second part).
1303   void xtn2(const VRegister& vd, const VRegister& vn);
1304 
1305   // Signed saturating extract narrow.
1306   void sqxtn(const VRegister& vd, const VRegister& vn);
1307 
1308   // Signed saturating extract narrow (second part).
1309   void sqxtn2(const VRegister& vd, const VRegister& vn);
1310 
1311   // Unsigned saturating extract narrow.
1312   void uqxtn(const VRegister& vd, const VRegister& vn);
1313 
1314   // Unsigned saturating extract narrow (second part).
1315   void uqxtn2(const VRegister& vd, const VRegister& vn);
1316 
1317   // Signed saturating extract unsigned narrow.
1318   void sqxtun(const VRegister& vd, const VRegister& vn);
1319 
1320   // Signed saturating extract unsigned narrow (second part).
1321   void sqxtun2(const VRegister& vd, const VRegister& vn);
1322 
1323   // Move register to register.
1324   void mov(const VRegister& vd, const VRegister& vn);
1325 
1326   // Bitwise not or.
1327   void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1328 
1329   // Bitwise exclusive or.
1330   void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1331 
1332   // Bitwise or (A | B).
1333   void orr(const Register& rd, const Register& rn, const Operand& operand);
1334 
1335   // Bitwise or.
1336   void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1337 
1338   // Bitwise or immediate.
1339   void orr(const VRegister& vd, const int imm8, const int left_shift = 0);
1340 
1341   // Bitwise nor (A | ~B).
1342   void orn(const Register& rd, const Register& rn, const Operand& operand);
1343 
1344   // Bitwise eor/xor (A ^ B).
1345   void eor(const Register& rd, const Register& rn, const Operand& operand);
1346 
1347   // Bitwise enor/xnor (A ^ ~B).
1348   void eon(const Register& rd, const Register& rn, const Operand& operand);
1349 
1350   // Logical shift left variable.
1351   void lslv(const Register& rd, const Register& rn, const Register& rm);
1352 
1353   // Logical shift right variable.
1354   void lsrv(const Register& rd, const Register& rn, const Register& rm);
1355 
1356   // Arithmetic shift right variable.
1357   void asrv(const Register& rd, const Register& rn, const Register& rm);
1358 
1359   // Rotate right variable.
1360   void rorv(const Register& rd, const Register& rn, const Register& rm);
1361 
1362   // Bitfield instructions.
1363   // Bitfield move.
1364   void bfm(const Register& rd, const Register& rn, int immr, int imms);
1365 
1366   // Signed bitfield move.
1367   void sbfm(const Register& rd, const Register& rn, int immr, int imms);
1368 
1369   // Unsigned bitfield move.
1370   void ubfm(const Register& rd, const Register& rn, int immr, int imms);
1371 
1372   // Bfm aliases.
1373   // Bitfield insert.
bfi(const Register & rd,const Register & rn,int lsb,int width)1374   void bfi(const Register& rd, const Register& rn, int lsb, int width) {
1375     DCHECK_GE(width, 1);
1376     DCHECK(lsb + width <= rn.SizeInBits());
1377     bfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1);
1378   }
1379 
1380   // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,int lsb,int width)1381   void bfxil(const Register& rd, const Register& rn, int lsb, int width) {
1382     DCHECK_GE(width, 1);
1383     DCHECK(lsb + width <= rn.SizeInBits());
1384     bfm(rd, rn, lsb, lsb + width - 1);
1385   }
1386 
1387   // Sbfm aliases.
1388   // Arithmetic shift right.
asr(const Register & rd,const Register & rn,int shift)1389   void asr(const Register& rd, const Register& rn, int shift) {
1390     DCHECK(shift < rd.SizeInBits());
1391     sbfm(rd, rn, shift, rd.SizeInBits() - 1);
1392   }
1393 
1394   // Signed bitfield insert in zero.
sbfiz(const Register & rd,const Register & rn,int lsb,int width)1395   void sbfiz(const Register& rd, const Register& rn, int lsb, int width) {
1396     DCHECK_GE(width, 1);
1397     DCHECK(lsb + width <= rn.SizeInBits());
1398     sbfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1);
1399   }
1400 
1401   // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,int lsb,int width)1402   void sbfx(const Register& rd, const Register& rn, int lsb, int width) {
1403     DCHECK_GE(width, 1);
1404     DCHECK(lsb + width <= rn.SizeInBits());
1405     sbfm(rd, rn, lsb, lsb + width - 1);
1406   }
1407 
1408   // Signed extend byte.
sxtb(const Register & rd,const Register & rn)1409   void sxtb(const Register& rd, const Register& rn) {
1410     sbfm(rd, rn, 0, 7);
1411   }
1412 
1413   // Signed extend halfword.
sxth(const Register & rd,const Register & rn)1414   void sxth(const Register& rd, const Register& rn) {
1415     sbfm(rd, rn, 0, 15);
1416   }
1417 
1418   // Signed extend word.
sxtw(const Register & rd,const Register & rn)1419   void sxtw(const Register& rd, const Register& rn) {
1420     sbfm(rd, rn, 0, 31);
1421   }
1422 
1423   // Ubfm aliases.
1424   // Logical shift left.
lsl(const Register & rd,const Register & rn,int shift)1425   void lsl(const Register& rd, const Register& rn, int shift) {
1426     int reg_size = rd.SizeInBits();
1427     DCHECK(shift < reg_size);
1428     ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
1429   }
1430 
1431   // Logical shift right.
lsr(const Register & rd,const Register & rn,int shift)1432   void lsr(const Register& rd, const Register& rn, int shift) {
1433     DCHECK(shift < rd.SizeInBits());
1434     ubfm(rd, rn, shift, rd.SizeInBits() - 1);
1435   }
1436 
1437   // Unsigned bitfield insert in zero.
ubfiz(const Register & rd,const Register & rn,int lsb,int width)1438   void ubfiz(const Register& rd, const Register& rn, int lsb, int width) {
1439     DCHECK_GE(width, 1);
1440     DCHECK(lsb + width <= rn.SizeInBits());
1441     ubfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1);
1442   }
1443 
1444   // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,int lsb,int width)1445   void ubfx(const Register& rd, const Register& rn, int lsb, int width) {
1446     DCHECK_GE(width, 1);
1447     DCHECK(lsb + width <= rn.SizeInBits());
1448     ubfm(rd, rn, lsb, lsb + width - 1);
1449   }
1450 
1451   // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)1452   void uxtb(const Register& rd, const Register& rn) {
1453     ubfm(rd, rn, 0, 7);
1454   }
1455 
1456   // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)1457   void uxth(const Register& rd, const Register& rn) {
1458     ubfm(rd, rn, 0, 15);
1459   }
1460 
1461   // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)1462   void uxtw(const Register& rd, const Register& rn) {
1463     ubfm(rd, rn, 0, 31);
1464   }
1465 
1466   // Extract.
1467   void extr(const Register& rd, const Register& rn, const Register& rm,
1468             int lsb);
1469 
1470   // Conditional select: rd = cond ? rn : rm.
1471   void csel(const Register& rd,
1472             const Register& rn,
1473             const Register& rm,
1474             Condition cond);
1475 
1476   // Conditional select increment: rd = cond ? rn : rm + 1.
1477   void csinc(const Register& rd,
1478              const Register& rn,
1479              const Register& rm,
1480              Condition cond);
1481 
1482   // Conditional select inversion: rd = cond ? rn : ~rm.
1483   void csinv(const Register& rd,
1484              const Register& rn,
1485              const Register& rm,
1486              Condition cond);
1487 
1488   // Conditional select negation: rd = cond ? rn : -rm.
1489   void csneg(const Register& rd,
1490              const Register& rn,
1491              const Register& rm,
1492              Condition cond);
1493 
1494   // Conditional set: rd = cond ? 1 : 0.
1495   void cset(const Register& rd, Condition cond);
1496 
1497   // Conditional set minus: rd = cond ? -1 : 0.
1498   void csetm(const Register& rd, Condition cond);
1499 
1500   // Conditional increment: rd = cond ? rn + 1 : rn.
1501   void cinc(const Register& rd, const Register& rn, Condition cond);
1502 
1503   // Conditional invert: rd = cond ? ~rn : rn.
1504   void cinv(const Register& rd, const Register& rn, Condition cond);
1505 
1506   // Conditional negate: rd = cond ? -rn : rn.
1507   void cneg(const Register& rd, const Register& rn, Condition cond);
1508 
1509   // Extr aliases.
ror(const Register & rd,const Register & rs,unsigned shift)1510   void ror(const Register& rd, const Register& rs, unsigned shift) {
1511     extr(rd, rs, rs, shift);
1512   }
1513 
1514   // Conditional comparison.
1515   // Conditional compare negative.
1516   void ccmn(const Register& rn,
1517             const Operand& operand,
1518             StatusFlags nzcv,
1519             Condition cond);
1520 
1521   // Conditional compare.
1522   void ccmp(const Register& rn,
1523             const Operand& operand,
1524             StatusFlags nzcv,
1525             Condition cond);
1526 
1527   // Multiplication.
1528   // 32 x 32 -> 32-bit and 64 x 64 -> 64-bit multiply.
1529   void mul(const Register& rd, const Register& rn, const Register& rm);
1530 
1531   // 32 + 32 x 32 -> 32-bit and 64 + 64 x 64 -> 64-bit multiply accumulate.
1532   void madd(const Register& rd,
1533             const Register& rn,
1534             const Register& rm,
1535             const Register& ra);
1536 
1537   // -(32 x 32) -> 32-bit and -(64 x 64) -> 64-bit multiply.
1538   void mneg(const Register& rd, const Register& rn, const Register& rm);
1539 
1540   // 32 - 32 x 32 -> 32-bit and 64 - 64 x 64 -> 64-bit multiply subtract.
1541   void msub(const Register& rd,
1542             const Register& rn,
1543             const Register& rm,
1544             const Register& ra);
1545 
1546   // 32 x 32 -> 64-bit multiply.
1547   void smull(const Register& rd, const Register& rn, const Register& rm);
1548 
1549   // Xd = bits<127:64> of Xn * Xm.
1550   void smulh(const Register& rd, const Register& rn, const Register& rm);
1551 
1552   // Signed 32 x 32 -> 64-bit multiply and accumulate.
1553   void smaddl(const Register& rd,
1554               const Register& rn,
1555               const Register& rm,
1556               const Register& ra);
1557 
1558   // Unsigned 32 x 32 -> 64-bit multiply and accumulate.
1559   void umaddl(const Register& rd,
1560               const Register& rn,
1561               const Register& rm,
1562               const Register& ra);
1563 
1564   // Signed 32 x 32 -> 64-bit multiply and subtract.
1565   void smsubl(const Register& rd,
1566               const Register& rn,
1567               const Register& rm,
1568               const Register& ra);
1569 
1570   // Unsigned 32 x 32 -> 64-bit multiply and subtract.
1571   void umsubl(const Register& rd,
1572               const Register& rn,
1573               const Register& rm,
1574               const Register& ra);
1575 
1576   // Signed integer divide.
1577   void sdiv(const Register& rd, const Register& rn, const Register& rm);
1578 
1579   // Unsigned integer divide.
1580   void udiv(const Register& rd, const Register& rn, const Register& rm);
1581 
1582   // Bit count, bit reverse and endian reverse.
1583   void rbit(const Register& rd, const Register& rn);
1584   void rev16(const Register& rd, const Register& rn);
1585   void rev32(const Register& rd, const Register& rn);
1586   void rev(const Register& rd, const Register& rn);
1587   void clz(const Register& rd, const Register& rn);
1588   void cls(const Register& rd, const Register& rn);
1589 
1590   // Memory instructions.
1591 
1592   // Load integer or FP register.
1593   void ldr(const CPURegister& rt, const MemOperand& src);
1594 
1595   // Store integer or FP register.
1596   void str(const CPURegister& rt, const MemOperand& dst);
1597 
1598   // Load word with sign extension.
1599   void ldrsw(const Register& rt, const MemOperand& src);
1600 
1601   // Load byte.
1602   void ldrb(const Register& rt, const MemOperand& src);
1603 
1604   // Store byte.
1605   void strb(const Register& rt, const MemOperand& dst);
1606 
1607   // Load byte with sign extension.
1608   void ldrsb(const Register& rt, const MemOperand& src);
1609 
1610   // Load half-word.
1611   void ldrh(const Register& rt, const MemOperand& src);
1612 
1613   // Store half-word.
1614   void strh(const Register& rt, const MemOperand& dst);
1615 
1616   // Load half-word with sign extension.
1617   void ldrsh(const Register& rt, const MemOperand& src);
1618 
1619   // Load integer or FP register pair.
1620   void ldp(const CPURegister& rt, const CPURegister& rt2,
1621            const MemOperand& src);
1622 
1623   // Store integer or FP register pair.
1624   void stp(const CPURegister& rt, const CPURegister& rt2,
1625            const MemOperand& dst);
1626 
1627   // Load word pair with sign extension.
1628   void ldpsw(const Register& rt, const Register& rt2, const MemOperand& src);
1629 
1630   // Load literal to register from a pc relative address.
1631   void ldr_pcrel(const CPURegister& rt, int imm19);
1632 
1633   // Load literal to register.
1634   void ldr(const CPURegister& rt, const Immediate& imm);
1635   void ldr(const CPURegister& rt, const Operand& operand);
1636 
1637   // Load-acquire word.
1638   void ldar(const Register& rt, const Register& rn);
1639 
1640   // Load-acquire exclusive word.
1641   void ldaxr(const Register& rt, const Register& rn);
1642 
1643   // Store-release word.
1644   void stlr(const Register& rt, const Register& rn);
1645 
1646   // Store-release exclusive word.
1647   void stlxr(const Register& rs, const Register& rt, const Register& rn);
1648 
1649   // Load-acquire byte.
1650   void ldarb(const Register& rt, const Register& rn);
1651 
1652   // Load-acquire exclusive byte.
1653   void ldaxrb(const Register& rt, const Register& rn);
1654 
1655   // Store-release byte.
1656   void stlrb(const Register& rt, const Register& rn);
1657 
1658   // Store-release exclusive byte.
1659   void stlxrb(const Register& rs, const Register& rt, const Register& rn);
1660 
1661   // Load-acquire half-word.
1662   void ldarh(const Register& rt, const Register& rn);
1663 
1664   // Load-acquire exclusive half-word.
1665   void ldaxrh(const Register& rt, const Register& rn);
1666 
1667   // Store-release half-word.
1668   void stlrh(const Register& rt, const Register& rn);
1669 
1670   // Store-release exclusive half-word.
1671   void stlxrh(const Register& rs, const Register& rt, const Register& rn);
1672 
1673   // Move instructions. The default shift of -1 indicates that the move
1674   // instruction will calculate an appropriate 16-bit immediate and left shift
1675   // that is equal to the 64-bit immediate argument. If an explicit left shift
1676   // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
1677   //
1678   // For movk, an explicit shift can be used to indicate which half word should
1679   // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
1680   // half word with zero, whereas movk(x0, 0, 48) will overwrite the
1681   // most-significant.
1682 
1683   // Move and keep.
1684   void movk(const Register& rd, uint64_t imm, int shift = -1) {
1685     MoveWide(rd, imm, shift, MOVK);
1686   }
1687 
1688   // Move with non-zero.
1689   void movn(const Register& rd, uint64_t imm, int shift = -1) {
1690     MoveWide(rd, imm, shift, MOVN);
1691   }
1692 
1693   // Move with zero.
1694   void movz(const Register& rd, uint64_t imm, int shift = -1) {
1695     MoveWide(rd, imm, shift, MOVZ);
1696   }
1697 
1698   // Misc instructions.
1699   // Monitor debug-mode breakpoint.
1700   void brk(int code);
1701 
1702   // Halting debug-mode breakpoint.
1703   void hlt(int code);
1704 
1705   // Move register to register.
1706   void mov(const Register& rd, const Register& rn);
1707 
1708   // Move NOT(operand) to register.
1709   void mvn(const Register& rd, const Operand& operand);
1710 
1711   // System instructions.
1712   // Move to register from system register.
1713   void mrs(const Register& rt, SystemRegister sysreg);
1714 
1715   // Move from register to system register.
1716   void msr(SystemRegister sysreg, const Register& rt);
1717 
1718   // System hint.
1719   void hint(SystemHint code);
1720 
1721   // Data memory barrier
1722   void dmb(BarrierDomain domain, BarrierType type);
1723 
1724   // Data synchronization barrier
1725   void dsb(BarrierDomain domain, BarrierType type);
1726 
1727   // Instruction synchronization barrier
1728   void isb();
1729 
1730   // Conditional speculation barrier.
1731   void csdb();
1732 
1733   // Alias for system instructions.
nop()1734   void nop() { hint(NOP); }
1735 
1736   // Different nop operations are used by the code generator to detect certain
1737   // states of the generated code.
1738   enum NopMarkerTypes {
1739     DEBUG_BREAK_NOP,
1740     INTERRUPT_CODE_NOP,
1741     ADR_FAR_NOP,
1742     FIRST_NOP_MARKER = DEBUG_BREAK_NOP,
1743     LAST_NOP_MARKER = ADR_FAR_NOP
1744   };
1745 
nop(NopMarkerTypes n)1746   void nop(NopMarkerTypes n) {
1747     DCHECK((FIRST_NOP_MARKER <= n) && (n <= LAST_NOP_MARKER));
1748     mov(Register::XRegFromCode(n), Register::XRegFromCode(n));
1749   }
1750 
1751   // Add.
1752   void add(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1753 
1754   // Unsigned halving add.
1755   void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1756 
1757   // Subtract.
1758   void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1759 
1760   // Signed halving add.
1761   void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1762 
1763   // Multiply by scalar element.
1764   void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1765            int vm_index);
1766 
1767   // Multiply-add by scalar element.
1768   void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1769            int vm_index);
1770 
1771   // Multiply-subtract by scalar element.
1772   void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1773            int vm_index);
1774 
1775   // Signed long multiply-add by scalar element.
1776   void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1777              int vm_index);
1778 
1779   // Signed long multiply-add by scalar element (second part).
1780   void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1781               int vm_index);
1782 
1783   // Unsigned long multiply-add by scalar element.
1784   void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1785              int vm_index);
1786 
1787   // Unsigned long multiply-add by scalar element (second part).
1788   void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1789               int vm_index);
1790 
1791   // Signed long multiply-sub by scalar element.
1792   void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1793              int vm_index);
1794 
1795   // Signed long multiply-sub by scalar element (second part).
1796   void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1797               int vm_index);
1798 
1799   // Unsigned long multiply-sub by scalar element.
1800   void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1801              int vm_index);
1802 
1803   // Unsigned long multiply-sub by scalar element (second part).
1804   void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1805               int vm_index);
1806 
1807   // Signed long multiply by scalar element.
1808   void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1809              int vm_index);
1810 
1811   // Signed long multiply by scalar element (second part).
1812   void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1813               int vm_index);
1814 
1815   // Unsigned long multiply by scalar element.
1816   void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1817              int vm_index);
1818 
1819   // Unsigned long multiply by scalar element (second part).
1820   void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1821               int vm_index);
1822 
1823   // Add narrow returning high half.
1824   void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1825 
1826   // Add narrow returning high half (second part).
1827   void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1828 
1829   // Signed saturating double long multiply by element.
1830   void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1831                int vm_index);
1832 
1833   // Signed saturating double long multiply by element (second part).
1834   void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1835                 int vm_index);
1836 
1837   // Signed saturating doubling long multiply-add by element.
1838   void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1839                int vm_index);
1840 
1841   // Signed saturating doubling long multiply-add by element (second part).
1842   void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1843                 int vm_index);
1844 
1845   // Signed saturating doubling long multiply-sub by element.
1846   void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1847                int vm_index);
1848 
1849   // Signed saturating doubling long multiply-sub by element (second part).
1850   void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1851                 int vm_index);
1852 
1853   // Compare bitwise to zero.
1854   void cmeq(const VRegister& vd, const VRegister& vn, int value);
1855 
1856   // Compare signed greater than or equal to zero.
1857   void cmge(const VRegister& vd, const VRegister& vn, int value);
1858 
1859   // Compare signed greater than zero.
1860   void cmgt(const VRegister& vd, const VRegister& vn, int value);
1861 
1862   // Compare signed less than or equal to zero.
1863   void cmle(const VRegister& vd, const VRegister& vn, int value);
1864 
1865   // Compare signed less than zero.
1866   void cmlt(const VRegister& vd, const VRegister& vn, int value);
1867 
1868   // Unsigned rounding halving add.
1869   void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1870 
1871   // Compare equal.
1872   void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1873 
1874   // Compare signed greater than or equal.
1875   void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1876 
1877   // Compare signed greater than.
1878   void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1879 
1880   // Compare unsigned higher.
1881   void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1882 
1883   // Compare unsigned higher or same.
1884   void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1885 
1886   // Compare bitwise test bits nonzero.
1887   void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1888 
1889   // Signed shift left by register.
1890   void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1891 
1892   // Unsigned shift left by register.
1893   void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1894 
1895   // Signed saturating doubling long multiply-subtract.
1896   void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1897 
1898   // Signed saturating doubling long multiply-subtract (second part).
1899   void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1900 
1901   // Signed saturating doubling long multiply.
1902   void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1903 
1904   // Signed saturating doubling long multiply (second part).
1905   void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1906 
1907   // Signed saturating doubling multiply returning high half.
1908   void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1909 
1910   // Signed saturating rounding doubling multiply returning high half.
1911   void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1912 
1913   // Signed saturating doubling multiply element returning high half.
1914   void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1915                int vm_index);
1916 
1917   // Signed saturating rounding doubling multiply element returning high half.
1918   void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm,
1919                 int vm_index);
1920 
1921   // Unsigned long multiply long.
1922   void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1923 
1924   // Unsigned long multiply (second part).
1925   void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1926 
1927   // Rounding add narrow returning high half.
1928   void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1929 
1930   // Subtract narrow returning high half.
1931   void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1932 
1933   // Subtract narrow returning high half (second part).
1934   void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1935 
1936   // Rounding add narrow returning high half (second part).
1937   void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1938 
1939   // Rounding subtract narrow returning high half.
1940   void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1941 
1942   // Rounding subtract narrow returning high half (second part).
1943   void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1944 
1945   // Signed saturating shift left by register.
1946   void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1947 
1948   // Unsigned saturating shift left by register.
1949   void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1950 
1951   // Signed rounding shift left by register.
1952   void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1953 
1954   // Unsigned rounding shift left by register.
1955   void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1956 
1957   // Signed saturating rounding shift left by register.
1958   void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1959 
1960   // Unsigned saturating rounding shift left by register.
1961   void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1962 
1963   // Signed absolute difference.
1964   void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1965 
1966   // Unsigned absolute difference and accumulate.
1967   void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1968 
1969   // Shift left by immediate and insert.
1970   void sli(const VRegister& vd, const VRegister& vn, int shift);
1971 
1972   // Shift right by immediate and insert.
1973   void sri(const VRegister& vd, const VRegister& vn, int shift);
1974 
1975   // Signed maximum.
1976   void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1977 
1978   // Signed pairwise maximum.
1979   void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1980 
1981   // Add across vector.
1982   void addv(const VRegister& vd, const VRegister& vn);
1983 
1984   // Signed add long across vector.
1985   void saddlv(const VRegister& vd, const VRegister& vn);
1986 
1987   // Unsigned add long across vector.
1988   void uaddlv(const VRegister& vd, const VRegister& vn);
1989 
1990   // FP maximum number across vector.
1991   void fmaxnmv(const VRegister& vd, const VRegister& vn);
1992 
1993   // FP maximum across vector.
1994   void fmaxv(const VRegister& vd, const VRegister& vn);
1995 
1996   // FP minimum number across vector.
1997   void fminnmv(const VRegister& vd, const VRegister& vn);
1998 
1999   // FP minimum across vector.
2000   void fminv(const VRegister& vd, const VRegister& vn);
2001 
2002   // Signed maximum across vector.
2003   void smaxv(const VRegister& vd, const VRegister& vn);
2004 
2005   // Signed minimum.
2006   void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2007 
2008   // Signed minimum pairwise.
2009   void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2010 
2011   // Signed minimum across vector.
2012   void sminv(const VRegister& vd, const VRegister& vn);
2013 
2014   // One-element structure store from one register.
2015   void st1(const VRegister& vt, const MemOperand& src);
2016 
2017   // One-element structure store from two registers.
2018   void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2019 
2020   // One-element structure store from three registers.
2021   void st1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
2022            const MemOperand& src);
2023 
2024   // One-element structure store from four registers.
2025   void st1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
2026            const VRegister& vt4, const MemOperand& src);
2027 
2028   // One-element single structure store from one lane.
2029   void st1(const VRegister& vt, int lane, const MemOperand& src);
2030 
2031   // Two-element structure store from two registers.
2032   void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2033 
2034   // Two-element single structure store from two lanes.
2035   void st2(const VRegister& vt, const VRegister& vt2, int lane,
2036            const MemOperand& src);
2037 
2038   // Three-element structure store from three registers.
2039   void st3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
2040            const MemOperand& src);
2041 
2042   // Three-element single structure store from three lanes.
2043   void st3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
2044            int lane, const MemOperand& src);
2045 
2046   // Four-element structure store from four registers.
2047   void st4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
2048            const VRegister& vt4, const MemOperand& src);
2049 
2050   // Four-element single structure store from four lanes.
2051   void st4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
2052            const VRegister& vt4, int lane, const MemOperand& src);
2053 
2054   // Unsigned add long.
2055   void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2056 
2057   // Unsigned add long (second part).
2058   void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2059 
2060   // Unsigned add wide.
2061   void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2062 
2063   // Unsigned add wide (second part).
2064   void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2065 
2066   // Signed add long.
2067   void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2068 
2069   // Signed add long (second part).
2070   void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2071 
2072   // Signed add wide.
2073   void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2074 
2075   // Signed add wide (second part).
2076   void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2077 
2078   // Unsigned subtract long.
2079   void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2080 
2081   // Unsigned subtract long (second part).
2082   void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2083 
2084   // Unsigned subtract wide.
2085   void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2086 
2087   // Signed subtract long.
2088   void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2089 
2090   // Signed subtract long (second part).
2091   void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2092 
2093   // Signed integer subtract wide.
2094   void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2095 
2096   // Signed integer subtract wide (second part).
2097   void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2098 
2099   // Unsigned subtract wide (second part).
2100   void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2101 
2102   // Unsigned maximum.
2103   void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2104 
2105   // Unsigned pairwise maximum.
2106   void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2107 
2108   // Unsigned maximum across vector.
2109   void umaxv(const VRegister& vd, const VRegister& vn);
2110 
2111   // Unsigned minimum.
2112   void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2113 
2114   // Unsigned pairwise minimum.
2115   void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2116 
2117   // Unsigned minimum across vector.
2118   void uminv(const VRegister& vd, const VRegister& vn);
2119 
2120   // Transpose vectors (primary).
2121   void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2122 
2123   // Transpose vectors (secondary).
2124   void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2125 
2126   // Unzip vectors (primary).
2127   void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2128 
2129   // Unzip vectors (secondary).
2130   void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2131 
2132   // Zip vectors (primary).
2133   void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2134 
2135   // Zip vectors (secondary).
2136   void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2137 
2138   // Signed shift right by immediate.
2139   void sshr(const VRegister& vd, const VRegister& vn, int shift);
2140 
2141   // Unsigned shift right by immediate.
2142   void ushr(const VRegister& vd, const VRegister& vn, int shift);
2143 
2144   // Signed rounding shift right by immediate.
2145   void srshr(const VRegister& vd, const VRegister& vn, int shift);
2146 
2147   // Unsigned rounding shift right by immediate.
2148   void urshr(const VRegister& vd, const VRegister& vn, int shift);
2149 
2150   // Signed shift right by immediate and accumulate.
2151   void ssra(const VRegister& vd, const VRegister& vn, int shift);
2152 
2153   // Unsigned shift right by immediate and accumulate.
2154   void usra(const VRegister& vd, const VRegister& vn, int shift);
2155 
2156   // Signed rounding shift right by immediate and accumulate.
2157   void srsra(const VRegister& vd, const VRegister& vn, int shift);
2158 
2159   // Unsigned rounding shift right by immediate and accumulate.
2160   void ursra(const VRegister& vd, const VRegister& vn, int shift);
2161 
2162   // Shift right narrow by immediate.
2163   void shrn(const VRegister& vd, const VRegister& vn, int shift);
2164 
2165   // Shift right narrow by immediate (second part).
2166   void shrn2(const VRegister& vd, const VRegister& vn, int shift);
2167 
2168   // Rounding shift right narrow by immediate.
2169   void rshrn(const VRegister& vd, const VRegister& vn, int shift);
2170 
2171   // Rounding shift right narrow by immediate (second part).
2172   void rshrn2(const VRegister& vd, const VRegister& vn, int shift);
2173 
2174   // Unsigned saturating shift right narrow by immediate.
2175   void uqshrn(const VRegister& vd, const VRegister& vn, int shift);
2176 
2177   // Unsigned saturating shift right narrow by immediate (second part).
2178   void uqshrn2(const VRegister& vd, const VRegister& vn, int shift);
2179 
2180   // Unsigned saturating rounding shift right narrow by immediate.
2181   void uqrshrn(const VRegister& vd, const VRegister& vn, int shift);
2182 
2183   // Unsigned saturating rounding shift right narrow by immediate (second part).
2184   void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
2185 
2186   // Signed saturating shift right narrow by immediate.
2187   void sqshrn(const VRegister& vd, const VRegister& vn, int shift);
2188 
2189   // Signed saturating shift right narrow by immediate (second part).
2190   void sqshrn2(const VRegister& vd, const VRegister& vn, int shift);
2191 
2192   // Signed saturating rounded shift right narrow by immediate.
2193   void sqrshrn(const VRegister& vd, const VRegister& vn, int shift);
2194 
2195   // Signed saturating rounded shift right narrow by immediate (second part).
2196   void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift);
2197 
2198   // Signed saturating shift right unsigned narrow by immediate.
2199   void sqshrun(const VRegister& vd, const VRegister& vn, int shift);
2200 
2201   // Signed saturating shift right unsigned narrow by immediate (second part).
2202   void sqshrun2(const VRegister& vd, const VRegister& vn, int shift);
2203 
2204   // Signed sat rounded shift right unsigned narrow by immediate.
2205   void sqrshrun(const VRegister& vd, const VRegister& vn, int shift);
2206 
2207   // Signed sat rounded shift right unsigned narrow by immediate (second part).
2208   void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift);
2209 
2210   // FP reciprocal step.
2211   void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2212 
2213   // FP reciprocal estimate.
2214   void frecpe(const VRegister& vd, const VRegister& vn);
2215 
2216   // FP reciprocal square root estimate.
2217   void frsqrte(const VRegister& vd, const VRegister& vn);
2218 
2219   // FP reciprocal square root step.
2220   void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2221 
2222   // Signed absolute difference and accumulate long.
2223   void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2224 
2225   // Signed absolute difference and accumulate long (second part).
2226   void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2227 
2228   // Unsigned absolute difference and accumulate long.
2229   void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2230 
2231   // Unsigned absolute difference and accumulate long (second part).
2232   void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2233 
2234   // Signed absolute difference long.
2235   void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2236 
2237   // Signed absolute difference long (second part).
2238   void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2239 
2240   // Unsigned absolute difference long.
2241   void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2242 
2243   // Unsigned absolute difference long (second part).
2244   void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2245 
2246   // Polynomial multiply long.
2247   void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2248 
2249   // Polynomial multiply long (second part).
2250   void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2251 
2252   // Signed long multiply-add.
2253   void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2254 
2255   // Signed long multiply-add (second part).
2256   void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2257 
2258   // Unsigned long multiply-add.
2259   void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2260 
2261   // Unsigned long multiply-add (second part).
2262   void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2263 
2264   // Signed long multiply-sub.
2265   void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2266 
2267   // Signed long multiply-sub (second part).
2268   void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2269 
2270   // Unsigned long multiply-sub.
2271   void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2272 
2273   // Unsigned long multiply-sub (second part).
2274   void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2275 
2276   // Signed long multiply.
2277   void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2278 
2279   // Signed long multiply (second part).
2280   void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2281 
2282   // Signed saturating doubling long multiply-add.
2283   void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2284 
2285   // Signed saturating doubling long multiply-add (second part).
2286   void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2287 
2288   // Unsigned absolute difference.
2289   void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2290 
2291   // Signed absolute difference and accumulate.
2292   void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2293 
2294   // FP instructions.
2295   // Move immediate to FP register.
2296   void fmov(const VRegister& fd, double imm);
2297   void fmov(const VRegister& fd, float imm);
2298 
2299   // Move FP register to register.
2300   void fmov(const Register& rd, const VRegister& fn);
2301 
2302   // Move register to FP register.
2303   void fmov(const VRegister& fd, const Register& rn);
2304 
2305   // Move FP register to FP register.
2306   void fmov(const VRegister& fd, const VRegister& fn);
2307 
2308   // Move 64-bit register to top half of 128-bit FP register.
2309   void fmov(const VRegister& vd, int index, const Register& rn);
2310 
2311   // Move top half of 128-bit FP register to 64-bit register.
2312   void fmov(const Register& rd, const VRegister& vn, int index);
2313 
2314   // FP add.
2315   void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2316 
2317   // FP subtract.
2318   void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2319 
2320   // FP multiply.
2321   void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2322 
2323   // FP compare equal to zero.
2324   void fcmeq(const VRegister& vd, const VRegister& vn, double imm);
2325 
2326   // FP greater than zero.
2327   void fcmgt(const VRegister& vd, const VRegister& vn, double imm);
2328 
2329   // FP greater than or equal to zero.
2330   void fcmge(const VRegister& vd, const VRegister& vn, double imm);
2331 
2332   // FP less than or equal to zero.
2333   void fcmle(const VRegister& vd, const VRegister& vn, double imm);
2334 
2335   // FP less than to zero.
2336   void fcmlt(const VRegister& vd, const VRegister& vn, double imm);
2337 
2338   // FP absolute difference.
2339   void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2340 
2341   // FP pairwise add vector.
2342   void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2343 
2344   // FP pairwise add scalar.
2345   void faddp(const VRegister& vd, const VRegister& vn);
2346 
2347   // FP pairwise maximum scalar.
2348   void fmaxp(const VRegister& vd, const VRegister& vn);
2349 
2350   // FP pairwise maximum number scalar.
2351   void fmaxnmp(const VRegister& vd, const VRegister& vn);
2352 
2353   // FP pairwise minimum number scalar.
2354   void fminnmp(const VRegister& vd, const VRegister& vn);
2355 
2356   // FP vector multiply accumulate.
2357   void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2358 
2359   // FP vector multiply subtract.
2360   void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2361 
2362   // FP vector multiply extended.
2363   void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2364 
2365   // FP absolute greater than or equal.
2366   void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2367 
2368   // FP absolute greater than.
2369   void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2370 
2371   // FP multiply by element.
2372   void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm,
2373             int vm_index);
2374 
2375   // FP fused multiply-add to accumulator by element.
2376   void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm,
2377             int vm_index);
2378 
2379   // FP fused multiply-sub from accumulator by element.
2380   void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm,
2381             int vm_index);
2382 
2383   // FP multiply extended by element.
2384   void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm,
2385              int vm_index);
2386 
2387   // FP compare equal.
2388   void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2389 
2390   // FP greater than.
2391   void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2392 
2393   // FP greater than or equal.
2394   void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2395 
2396   // FP pairwise maximum vector.
2397   void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2398 
2399   // FP pairwise minimum vector.
2400   void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2401 
2402   // FP pairwise minimum scalar.
2403   void fminp(const VRegister& vd, const VRegister& vn);
2404 
2405   // FP pairwise maximum number vector.
2406   void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2407 
2408   // FP pairwise minimum number vector.
2409   void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2410 
2411   // FP fused multiply-add.
2412   void fmadd(const VRegister& vd, const VRegister& vn, const VRegister& vm,
2413              const VRegister& va);
2414 
2415   // FP fused multiply-subtract.
2416   void fmsub(const VRegister& vd, const VRegister& vn, const VRegister& vm,
2417              const VRegister& va);
2418 
2419   // FP fused multiply-add and negate.
2420   void fnmadd(const VRegister& vd, const VRegister& vn, const VRegister& vm,
2421               const VRegister& va);
2422 
2423   // FP fused multiply-subtract and negate.
2424   void fnmsub(const VRegister& vd, const VRegister& vn, const VRegister& vm,
2425               const VRegister& va);
2426 
2427   // FP multiply-negate scalar.
2428   void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2429 
2430   // FP reciprocal exponent scalar.
2431   void frecpx(const VRegister& vd, const VRegister& vn);
2432 
2433   // FP divide.
2434   void fdiv(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2435 
2436   // FP maximum.
2437   void fmax(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2438 
2439   // FP minimum.
2440   void fmin(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2441 
2442   // FP maximum.
2443   void fmaxnm(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2444 
2445   // FP minimum.
2446   void fminnm(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2447 
2448   // FP absolute.
2449   void fabs(const VRegister& vd, const VRegister& vn);
2450 
2451   // FP negate.
2452   void fneg(const VRegister& vd, const VRegister& vn);
2453 
2454   // FP square root.
2455   void fsqrt(const VRegister& vd, const VRegister& vn);
2456 
2457   // FP round to integer nearest with ties to away.
2458   void frinta(const VRegister& vd, const VRegister& vn);
2459 
2460   // FP round to integer, implicit rounding.
2461   void frinti(const VRegister& vd, const VRegister& vn);
2462 
2463   // FP round to integer toward minus infinity.
2464   void frintm(const VRegister& vd, const VRegister& vn);
2465 
2466   // FP round to integer nearest with ties to even.
2467   void frintn(const VRegister& vd, const VRegister& vn);
2468 
2469   // FP round to integer towards plus infinity.
2470   void frintp(const VRegister& vd, const VRegister& vn);
2471 
2472   // FP round to integer, exact, implicit rounding.
2473   void frintx(const VRegister& vd, const VRegister& vn);
2474 
2475   // FP round to integer towards zero.
2476   void frintz(const VRegister& vd, const VRegister& vn);
2477 
2478   // FP compare registers.
2479   void fcmp(const VRegister& vn, const VRegister& vm);
2480 
2481   // FP compare immediate.
2482   void fcmp(const VRegister& vn, double value);
2483 
2484   // FP conditional compare.
2485   void fccmp(const VRegister& vn, const VRegister& vm, StatusFlags nzcv,
2486              Condition cond);
2487 
2488   // FP conditional select.
2489   void fcsel(const VRegister& vd, const VRegister& vn, const VRegister& vm,
2490              Condition cond);
2491 
2492   // Common FP Convert functions.
2493   void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
2494   void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
2495 
2496   // FP convert between precisions.
2497   void fcvt(const VRegister& vd, const VRegister& vn);
2498 
2499   // FP convert to higher precision.
2500   void fcvtl(const VRegister& vd, const VRegister& vn);
2501 
2502   // FP convert to higher precision (second part).
2503   void fcvtl2(const VRegister& vd, const VRegister& vn);
2504 
2505   // FP convert to lower precision.
2506   void fcvtn(const VRegister& vd, const VRegister& vn);
2507 
2508   // FP convert to lower prevision (second part).
2509   void fcvtn2(const VRegister& vd, const VRegister& vn);
2510 
2511   // FP convert to lower precision, rounding to odd.
2512   void fcvtxn(const VRegister& vd, const VRegister& vn);
2513 
2514   // FP convert to lower precision, rounding to odd (second part).
2515   void fcvtxn2(const VRegister& vd, const VRegister& vn);
2516 
2517   // FP convert to signed integer, nearest with ties to away.
2518   void fcvtas(const Register& rd, const VRegister& vn);
2519 
2520   // FP convert to unsigned integer, nearest with ties to away.
2521   void fcvtau(const Register& rd, const VRegister& vn);
2522 
2523   // FP convert to signed integer, nearest with ties to away.
2524   void fcvtas(const VRegister& vd, const VRegister& vn);
2525 
2526   // FP convert to unsigned integer, nearest with ties to away.
2527   void fcvtau(const VRegister& vd, const VRegister& vn);
2528 
2529   // FP convert to signed integer, round towards -infinity.
2530   void fcvtms(const Register& rd, const VRegister& vn);
2531 
2532   // FP convert to unsigned integer, round towards -infinity.
2533   void fcvtmu(const Register& rd, const VRegister& vn);
2534 
2535   // FP convert to signed integer, round towards -infinity.
2536   void fcvtms(const VRegister& vd, const VRegister& vn);
2537 
2538   // FP convert to unsigned integer, round towards -infinity.
2539   void fcvtmu(const VRegister& vd, const VRegister& vn);
2540 
2541   // FP convert to signed integer, nearest with ties to even.
2542   void fcvtns(const Register& rd, const VRegister& vn);
2543 
2544   // FP convert to unsigned integer, nearest with ties to even.
2545   void fcvtnu(const Register& rd, const VRegister& vn);
2546 
2547   // FP convert to signed integer, nearest with ties to even.
2548   void fcvtns(const VRegister& rd, const VRegister& vn);
2549 
2550   // FP convert to unsigned integer, nearest with ties to even.
2551   void fcvtnu(const VRegister& rd, const VRegister& vn);
2552 
2553   // FP convert to signed integer or fixed-point, round towards zero.
2554   void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2555 
2556   // FP convert to unsigned integer or fixed-point, round towards zero.
2557   void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2558 
2559   // FP convert to signed integer or fixed-point, round towards zero.
2560   void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2561 
2562   // FP convert to unsigned integer or fixed-point, round towards zero.
2563   void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2564 
2565   // FP convert to signed integer, round towards +infinity.
2566   void fcvtps(const Register& rd, const VRegister& vn);
2567 
2568   // FP convert to unsigned integer, round towards +infinity.
2569   void fcvtpu(const Register& rd, const VRegister& vn);
2570 
2571   // FP convert to signed integer, round towards +infinity.
2572   void fcvtps(const VRegister& vd, const VRegister& vn);
2573 
2574   // FP convert to unsigned integer, round towards +infinity.
2575   void fcvtpu(const VRegister& vd, const VRegister& vn);
2576 
2577   // Convert signed integer or fixed point to FP.
2578   void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2579 
2580   // Convert unsigned integer or fixed point to FP.
2581   void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2582 
2583   // Convert signed integer or fixed-point to FP.
2584   void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2585 
2586   // Convert unsigned integer or fixed-point to FP.
2587   void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2588 
2589   // Extract vector from pair of vectors.
2590   void ext(const VRegister& vd, const VRegister& vn, const VRegister& vm,
2591            int index);
2592 
2593   // Duplicate vector element to vector or scalar.
2594   void dup(const VRegister& vd, const VRegister& vn, int vn_index);
2595 
2596   // Duplicate general-purpose register to vector.
2597   void dup(const VRegister& vd, const Register& rn);
2598 
2599   // Insert vector element from general-purpose register.
2600   void ins(const VRegister& vd, int vd_index, const Register& rn);
2601 
2602   // Move general-purpose register to a vector element.
2603   void mov(const VRegister& vd, int vd_index, const Register& rn);
2604 
2605   // Unsigned move vector element to general-purpose register.
2606   void umov(const Register& rd, const VRegister& vn, int vn_index);
2607 
2608   // Move vector element to general-purpose register.
2609   void mov(const Register& rd, const VRegister& vn, int vn_index);
2610 
2611   // Move vector element to scalar.
2612   void mov(const VRegister& vd, const VRegister& vn, int vn_index);
2613 
2614   // Insert vector element from another vector element.
2615   void ins(const VRegister& vd, int vd_index, const VRegister& vn,
2616            int vn_index);
2617 
2618   // Move vector element to another vector element.
2619   void mov(const VRegister& vd, int vd_index, const VRegister& vn,
2620            int vn_index);
2621 
2622   // Signed move vector element to general-purpose register.
2623   void smov(const Register& rd, const VRegister& vn, int vn_index);
2624 
2625   // One-element structure load to one register.
2626   void ld1(const VRegister& vt, const MemOperand& src);
2627 
2628   // One-element structure load to two registers.
2629   void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2630 
2631   // One-element structure load to three registers.
2632   void ld1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
2633            const MemOperand& src);
2634 
2635   // One-element structure load to four registers.
2636   void ld1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
2637            const VRegister& vt4, const MemOperand& src);
2638 
2639   // One-element single structure load to one lane.
2640   void ld1(const VRegister& vt, int lane, const MemOperand& src);
2641 
2642   // One-element single structure load to all lanes.
2643   void ld1r(const VRegister& vt, const MemOperand& src);
2644 
2645   // Two-element structure load.
2646   void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2647 
2648   // Two-element single structure load to one lane.
2649   void ld2(const VRegister& vt, const VRegister& vt2, int lane,
2650            const MemOperand& src);
2651 
2652   // Two-element single structure load to all lanes.
2653   void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src);
2654 
2655   // Three-element structure load.
2656   void ld3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
2657            const MemOperand& src);
2658 
2659   // Three-element single structure load to one lane.
2660   void ld3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
2661            int lane, const MemOperand& src);
2662 
2663   // Three-element single structure load to all lanes.
2664   void ld3r(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
2665             const MemOperand& src);
2666 
2667   // Four-element structure load.
2668   void ld4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
2669            const VRegister& vt4, const MemOperand& src);
2670 
2671   // Four-element single structure load to one lane.
2672   void ld4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
2673            const VRegister& vt4, int lane, const MemOperand& src);
2674 
2675   // Four-element single structure load to all lanes.
2676   void ld4r(const VRegister& vt, const VRegister& vt2, const VRegister& vt3,
2677             const VRegister& vt4, const MemOperand& src);
2678 
2679   // Count leading sign bits.
2680   void cls(const VRegister& vd, const VRegister& vn);
2681 
2682   // Count leading zero bits (vector).
2683   void clz(const VRegister& vd, const VRegister& vn);
2684 
2685   // Population count per byte.
2686   void cnt(const VRegister& vd, const VRegister& vn);
2687 
2688   // Reverse bit order.
2689   void rbit(const VRegister& vd, const VRegister& vn);
2690 
2691   // Reverse elements in 16-bit halfwords.
2692   void rev16(const VRegister& vd, const VRegister& vn);
2693 
2694   // Reverse elements in 32-bit words.
2695   void rev32(const VRegister& vd, const VRegister& vn);
2696 
2697   // Reverse elements in 64-bit doublewords.
2698   void rev64(const VRegister& vd, const VRegister& vn);
2699 
2700   // Unsigned reciprocal square root estimate.
2701   void ursqrte(const VRegister& vd, const VRegister& vn);
2702 
2703   // Unsigned reciprocal estimate.
2704   void urecpe(const VRegister& vd, const VRegister& vn);
2705 
2706   // Signed pairwise long add and accumulate.
2707   void sadalp(const VRegister& vd, const VRegister& vn);
2708 
2709   // Signed pairwise long add.
2710   void saddlp(const VRegister& vd, const VRegister& vn);
2711 
2712   // Unsigned pairwise long add.
2713   void uaddlp(const VRegister& vd, const VRegister& vn);
2714 
2715   // Unsigned pairwise long add and accumulate.
2716   void uadalp(const VRegister& vd, const VRegister& vn);
2717 
2718   // Shift left by immediate.
2719   void shl(const VRegister& vd, const VRegister& vn, int shift);
2720 
2721   // Signed saturating shift left by immediate.
2722   void sqshl(const VRegister& vd, const VRegister& vn, int shift);
2723 
2724   // Signed saturating shift left unsigned by immediate.
2725   void sqshlu(const VRegister& vd, const VRegister& vn, int shift);
2726 
2727   // Unsigned saturating shift left by immediate.
2728   void uqshl(const VRegister& vd, const VRegister& vn, int shift);
2729 
2730   // Signed shift left long by immediate.
2731   void sshll(const VRegister& vd, const VRegister& vn, int shift);
2732 
2733   // Signed shift left long by immediate (second part).
2734   void sshll2(const VRegister& vd, const VRegister& vn, int shift);
2735 
2736   // Signed extend long.
2737   void sxtl(const VRegister& vd, const VRegister& vn);
2738 
2739   // Signed extend long (second part).
2740   void sxtl2(const VRegister& vd, const VRegister& vn);
2741 
2742   // Unsigned shift left long by immediate.
2743   void ushll(const VRegister& vd, const VRegister& vn, int shift);
2744 
2745   // Unsigned shift left long by immediate (second part).
2746   void ushll2(const VRegister& vd, const VRegister& vn, int shift);
2747 
2748   // Shift left long by element size.
2749   void shll(const VRegister& vd, const VRegister& vn, int shift);
2750 
2751   // Shift left long by element size (second part).
2752   void shll2(const VRegister& vd, const VRegister& vn, int shift);
2753 
2754   // Unsigned extend long.
2755   void uxtl(const VRegister& vd, const VRegister& vn);
2756 
2757   // Unsigned extend long (second part).
2758   void uxtl2(const VRegister& vd, const VRegister& vn);
2759 
2760   // Signed rounding halving add.
2761   void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2762 
2763   // Unsigned halving sub.
2764   void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2765 
2766   // Signed halving sub.
2767   void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2768 
2769   // Unsigned saturating add.
2770   void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2771 
2772   // Signed saturating add.
2773   void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2774 
2775   // Unsigned saturating subtract.
2776   void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2777 
2778   // Signed saturating subtract.
2779   void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2780 
2781   // Add pairwise.
2782   void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2783 
2784   // Add pair of elements scalar.
2785   void addp(const VRegister& vd, const VRegister& vn);
2786 
2787   // Multiply-add to accumulator.
2788   void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2789 
2790   // Multiply-subtract to accumulator.
2791   void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2792 
2793   // Multiply.
2794   void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2795 
2796   // Table lookup from one register.
2797   void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2798 
2799   // Table lookup from two registers.
2800   void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2,
2801            const VRegister& vm);
2802 
2803   // Table lookup from three registers.
2804   void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2,
2805            const VRegister& vn3, const VRegister& vm);
2806 
2807   // Table lookup from four registers.
2808   void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2,
2809            const VRegister& vn3, const VRegister& vn4, const VRegister& vm);
2810 
2811   // Table lookup extension from one register.
2812   void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm);
2813 
2814   // Table lookup extension from two registers.
2815   void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2,
2816            const VRegister& vm);
2817 
2818   // Table lookup extension from three registers.
2819   void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2,
2820            const VRegister& vn3, const VRegister& vm);
2821 
2822   // Table lookup extension from four registers.
2823   void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2,
2824            const VRegister& vn3, const VRegister& vn4, const VRegister& vm);
2825 
2826   // Instruction functions used only for test, debug, and patching.
2827   // Emit raw instructions in the instruction stream.
dci(Instr raw_inst)2828   void dci(Instr raw_inst) { Emit(raw_inst); }
2829 
2830   // Emit 8 bits of data in the instruction stream.
dc8(uint8_t data)2831   void dc8(uint8_t data) { EmitData(&data, sizeof(data)); }
2832 
2833   // Emit 32 bits of data in the instruction stream.
dc32(uint32_t data)2834   void dc32(uint32_t data) { EmitData(&data, sizeof(data)); }
2835 
2836   // Emit 64 bits of data in the instruction stream.
dc64(uint64_t data)2837   void dc64(uint64_t data) { EmitData(&data, sizeof(data)); }
2838 
2839   // Emit an address in the instruction stream.
2840   void dcptr(Label* label);
2841 
2842   // Copy a string into the instruction stream, including the terminating
2843   // nullptr character. The instruction pointer (pc_) is then aligned correctly
2844   // for subsequent instructions.
2845   void EmitStringData(const char* string);
2846 
2847   // Pseudo-instructions ------------------------------------------------------
2848 
2849   // Parameters are described in arm64/instructions-arm64.h.
2850   void debug(const char* message, uint32_t code, Instr params = BREAK);
2851 
2852   // Required by V8.
dd(uint32_t data)2853   void dd(uint32_t data) { dc32(data); }
db(uint8_t data)2854   void db(uint8_t data) { dc8(data); }
dq(uint64_t data)2855   void dq(uint64_t data) { dc64(data); }
dp(uintptr_t data)2856   void dp(uintptr_t data) { dc64(data); }
2857 
2858   // Code generation helpers --------------------------------------------------
2859 
IsConstPoolEmpty()2860   bool IsConstPoolEmpty() const { return constpool_.IsEmpty(); }
2861 
pc()2862   Instruction* pc() const { return Instruction::Cast(pc_); }
2863 
InstructionAt(ptrdiff_t offset)2864   Instruction* InstructionAt(ptrdiff_t offset) const {
2865     return reinterpret_cast<Instruction*>(buffer_ + offset);
2866   }
2867 
InstructionOffset(Instruction * instr)2868   ptrdiff_t InstructionOffset(Instruction* instr) const {
2869     return reinterpret_cast<byte*>(instr) - buffer_;
2870   }
2871 
GetSpecialRegisterName(int code)2872   static const char* GetSpecialRegisterName(int code) {
2873     return (code == kSPRegInternalCode) ? "sp" : "UNKNOWN";
2874   }
2875 
2876   // Register encoding.
Rd(CPURegister rd)2877   static Instr Rd(CPURegister rd) {
2878     DCHECK_NE(rd.code(), kSPRegInternalCode);
2879     return rd.code() << Rd_offset;
2880   }
2881 
Rn(CPURegister rn)2882   static Instr Rn(CPURegister rn) {
2883     DCHECK_NE(rn.code(), kSPRegInternalCode);
2884     return rn.code() << Rn_offset;
2885   }
2886 
Rm(CPURegister rm)2887   static Instr Rm(CPURegister rm) {
2888     DCHECK_NE(rm.code(), kSPRegInternalCode);
2889     return rm.code() << Rm_offset;
2890   }
2891 
RmNot31(CPURegister rm)2892   static Instr RmNot31(CPURegister rm) {
2893     DCHECK_NE(rm.code(), kSPRegInternalCode);
2894     DCHECK(!rm.IsZero());
2895     return Rm(rm);
2896   }
2897 
Ra(CPURegister ra)2898   static Instr Ra(CPURegister ra) {
2899     DCHECK_NE(ra.code(), kSPRegInternalCode);
2900     return ra.code() << Ra_offset;
2901   }
2902 
Rt(CPURegister rt)2903   static Instr Rt(CPURegister rt) {
2904     DCHECK_NE(rt.code(), kSPRegInternalCode);
2905     return rt.code() << Rt_offset;
2906   }
2907 
Rt2(CPURegister rt2)2908   static Instr Rt2(CPURegister rt2) {
2909     DCHECK_NE(rt2.code(), kSPRegInternalCode);
2910     return rt2.code() << Rt2_offset;
2911   }
2912 
Rs(CPURegister rs)2913   static Instr Rs(CPURegister rs) {
2914     DCHECK_NE(rs.code(), kSPRegInternalCode);
2915     return rs.code() << Rs_offset;
2916   }
2917 
2918   // These encoding functions allow the stack pointer to be encoded, and
2919   // disallow the zero register.
RdSP(Register rd)2920   static Instr RdSP(Register rd) {
2921     DCHECK(!rd.IsZero());
2922     return (rd.code() & kRegCodeMask) << Rd_offset;
2923   }
2924 
RnSP(Register rn)2925   static Instr RnSP(Register rn) {
2926     DCHECK(!rn.IsZero());
2927     return (rn.code() & kRegCodeMask) << Rn_offset;
2928   }
2929 
2930   // Flags encoding.
2931   inline static Instr Flags(FlagsUpdate S);
2932   inline static Instr Cond(Condition cond);
2933 
2934   // PC-relative address encoding.
2935   inline static Instr ImmPCRelAddress(int imm21);
2936 
2937   // Branch encoding.
2938   inline static Instr ImmUncondBranch(int imm26);
2939   inline static Instr ImmCondBranch(int imm19);
2940   inline static Instr ImmCmpBranch(int imm19);
2941   inline static Instr ImmTestBranch(int imm14);
2942   inline static Instr ImmTestBranchBit(unsigned bit_pos);
2943 
2944   // Data Processing encoding.
2945   inline static Instr SF(Register rd);
2946   inline static Instr ImmAddSub(int imm);
2947   inline static Instr ImmS(unsigned imms, unsigned reg_size);
2948   inline static Instr ImmR(unsigned immr, unsigned reg_size);
2949   inline static Instr ImmSetBits(unsigned imms, unsigned reg_size);
2950   inline static Instr ImmRotate(unsigned immr, unsigned reg_size);
2951   inline static Instr ImmLLiteral(int imm19);
2952   inline static Instr BitN(unsigned bitn, unsigned reg_size);
2953   inline static Instr ShiftDP(Shift shift);
2954   inline static Instr ImmDPShift(unsigned amount);
2955   inline static Instr ExtendMode(Extend extend);
2956   inline static Instr ImmExtendShift(unsigned left_shift);
2957   inline static Instr ImmCondCmp(unsigned imm);
2958   inline static Instr Nzcv(StatusFlags nzcv);
2959 
2960   static bool IsImmAddSub(int64_t immediate);
2961   static bool IsImmLogical(uint64_t value,
2962                            unsigned width,
2963                            unsigned* n,
2964                            unsigned* imm_s,
2965                            unsigned* imm_r);
2966 
2967   // MemOperand offset encoding.
2968   inline static Instr ImmLSUnsigned(int imm12);
2969   inline static Instr ImmLS(int imm9);
2970   inline static Instr ImmLSPair(int imm7, unsigned size);
2971   inline static Instr ImmShiftLS(unsigned shift_amount);
2972   inline static Instr ImmException(int imm16);
2973   inline static Instr ImmSystemRegister(int imm15);
2974   inline static Instr ImmHint(int imm7);
2975   inline static Instr ImmBarrierDomain(int imm2);
2976   inline static Instr ImmBarrierType(int imm2);
2977   inline static unsigned CalcLSDataSize(LoadStoreOp op);
2978 
2979   // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)2980   static Instr VFormat(VRegister vd) {
2981     if (vd.Is64Bits()) {
2982       switch (vd.LaneCount()) {
2983         case 2:
2984           return NEON_2S;
2985         case 4:
2986           return NEON_4H;
2987         case 8:
2988           return NEON_8B;
2989         default:
2990           UNREACHABLE();
2991       }
2992     } else {
2993       DCHECK(vd.Is128Bits());
2994       switch (vd.LaneCount()) {
2995         case 2:
2996           return NEON_2D;
2997         case 4:
2998           return NEON_4S;
2999         case 8:
3000           return NEON_8H;
3001         case 16:
3002           return NEON_16B;
3003         default:
3004           UNREACHABLE();
3005       }
3006     }
3007   }
3008 
3009   // Instruction bits for vector format in floating point data processing
3010   // operations.
FPFormat(VRegister vd)3011   static Instr FPFormat(VRegister vd) {
3012     if (vd.LaneCount() == 1) {
3013       // Floating point scalar formats.
3014       DCHECK(vd.Is32Bits() || vd.Is64Bits());
3015       return vd.Is64Bits() ? FP64 : FP32;
3016     }
3017 
3018     // Two lane floating point vector formats.
3019     if (vd.LaneCount() == 2) {
3020       DCHECK(vd.Is64Bits() || vd.Is128Bits());
3021       return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S;
3022     }
3023 
3024     // Four lane floating point vector format.
3025     DCHECK((vd.LaneCount() == 4) && vd.Is128Bits());
3026     return NEON_FP_4S;
3027   }
3028 
3029   // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)3030   static Instr LSVFormat(VRegister vd) {
3031     if (vd.Is64Bits()) {
3032       switch (vd.LaneCount()) {
3033         case 1:
3034           return LS_NEON_1D;
3035         case 2:
3036           return LS_NEON_2S;
3037         case 4:
3038           return LS_NEON_4H;
3039         case 8:
3040           return LS_NEON_8B;
3041         default:
3042           UNREACHABLE();
3043       }
3044     } else {
3045       DCHECK(vd.Is128Bits());
3046       switch (vd.LaneCount()) {
3047         case 2:
3048           return LS_NEON_2D;
3049         case 4:
3050           return LS_NEON_4S;
3051         case 8:
3052           return LS_NEON_8H;
3053         case 16:
3054           return LS_NEON_16B;
3055         default:
3056           UNREACHABLE();
3057       }
3058     }
3059   }
3060 
3061   // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)3062   static Instr SFormat(VRegister vd) {
3063     DCHECK(vd.IsScalar());
3064     switch (vd.SizeInBytes()) {
3065       case 1:
3066         return NEON_B;
3067       case 2:
3068         return NEON_H;
3069       case 4:
3070         return NEON_S;
3071       case 8:
3072         return NEON_D;
3073       default:
3074         UNREACHABLE();
3075     }
3076   }
3077 
ImmNEONHLM(int index,int num_bits)3078   static Instr ImmNEONHLM(int index, int num_bits) {
3079     int h, l, m;
3080     if (num_bits == 3) {
3081       DCHECK(is_uint3(index));
3082       h = (index >> 2) & 1;
3083       l = (index >> 1) & 1;
3084       m = (index >> 0) & 1;
3085     } else if (num_bits == 2) {
3086       DCHECK(is_uint2(index));
3087       h = (index >> 1) & 1;
3088       l = (index >> 0) & 1;
3089       m = 0;
3090     } else {
3091       DCHECK(is_uint1(index) && (num_bits == 1));
3092       h = (index >> 0) & 1;
3093       l = 0;
3094       m = 0;
3095     }
3096     return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
3097   }
3098 
ImmNEONExt(int imm4)3099   static Instr ImmNEONExt(int imm4) {
3100     DCHECK(is_uint4(imm4));
3101     return imm4 << ImmNEONExt_offset;
3102   }
3103 
ImmNEON5(Instr format,int index)3104   static Instr ImmNEON5(Instr format, int index) {
3105     DCHECK(is_uint4(index));
3106     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
3107     int imm5 = (index << (s + 1)) | (1 << s);
3108     return imm5 << ImmNEON5_offset;
3109   }
3110 
ImmNEON4(Instr format,int index)3111   static Instr ImmNEON4(Instr format, int index) {
3112     DCHECK(is_uint4(index));
3113     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
3114     int imm4 = index << s;
3115     return imm4 << ImmNEON4_offset;
3116   }
3117 
ImmNEONabcdefgh(int imm8)3118   static Instr ImmNEONabcdefgh(int imm8) {
3119     DCHECK(is_uint8(imm8));
3120     Instr instr;
3121     instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
3122     instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
3123     return instr;
3124   }
3125 
NEONCmode(int cmode)3126   static Instr NEONCmode(int cmode) {
3127     DCHECK(is_uint4(cmode));
3128     return cmode << NEONCmode_offset;
3129   }
3130 
NEONModImmOp(int op)3131   static Instr NEONModImmOp(int op) {
3132     DCHECK(is_uint1(op));
3133     return op << NEONModImmOp_offset;
3134   }
3135 
3136   static bool IsImmLSUnscaled(int64_t offset);
3137   static bool IsImmLSScaled(int64_t offset, unsigned size);
3138   static bool IsImmLLiteral(int64_t offset);
3139 
3140   // Move immediates encoding.
3141   inline static Instr ImmMoveWide(int imm);
3142   inline static Instr ShiftMoveWide(int shift);
3143 
3144   // FP Immediates.
3145   static Instr ImmFP(double imm);
3146   static Instr ImmNEONFP(double imm);
3147   inline static Instr FPScale(unsigned scale);
3148 
3149   // FP register type.
3150   inline static Instr FPType(VRegister fd);
3151 
3152   // Class for scoping postponing the constant pool generation.
3153   class BlockConstPoolScope {
3154    public:
BlockConstPoolScope(Assembler * assem)3155     explicit BlockConstPoolScope(Assembler* assem) : assem_(assem) {
3156       assem_->StartBlockConstPool();
3157     }
~BlockConstPoolScope()3158     ~BlockConstPoolScope() {
3159       assem_->EndBlockConstPool();
3160     }
3161 
3162    private:
3163     Assembler* assem_;
3164 
3165     DISALLOW_IMPLICIT_CONSTRUCTORS(BlockConstPoolScope);
3166   };
3167 
3168   // Check if is time to emit a constant pool.
3169   void CheckConstPool(bool force_emit, bool require_jump);
3170 
PatchConstantPoolAccessInstruction(int pc_offset,int offset,ConstantPoolEntry::Access access,ConstantPoolEntry::Type type)3171   void PatchConstantPoolAccessInstruction(int pc_offset, int offset,
3172                                           ConstantPoolEntry::Access access,
3173                                           ConstantPoolEntry::Type type) {
3174     // No embedded constant pool support.
3175     UNREACHABLE();
3176   }
3177 
3178   // Returns true if we should emit a veneer as soon as possible for a branch
3179   // which can at most reach to specified pc.
3180   bool ShouldEmitVeneer(int max_reachable_pc,
3181                         int margin = kVeneerDistanceMargin);
3182   bool ShouldEmitVeneers(int margin = kVeneerDistanceMargin) {
3183     return ShouldEmitVeneer(unresolved_branches_first_limit(), margin);
3184   }
3185 
3186   // The maximum code size generated for a veneer. Currently one branch
3187   // instruction. This is for code size checking purposes, and can be extended
3188   // in the future for example if we decide to add nops between the veneers.
3189   static constexpr int kMaxVeneerCodeSize = 1 * kInstrSize;
3190 
3191   void RecordVeneerPool(int location_offset, int size);
3192   // Emits veneers for branches that are approaching their maximum range.
3193   // If need_protection is true, the veneers are protected by a branch jumping
3194   // over the code.
3195   void EmitVeneers(bool force_emit, bool need_protection,
3196                    int margin = kVeneerDistanceMargin);
EmitVeneersGuard()3197   void EmitVeneersGuard() { EmitPoolGuard(); }
3198   // Checks whether veneers need to be emitted at this point.
3199   // If force_emit is set, a veneer is generated for *all* unresolved branches.
3200   void CheckVeneerPool(bool force_emit, bool require_jump,
3201                        int margin = kVeneerDistanceMargin);
3202 
3203   class BlockPoolsScope {
3204    public:
BlockPoolsScope(Assembler * assem)3205     explicit BlockPoolsScope(Assembler* assem) : assem_(assem) {
3206       assem_->StartBlockPools();
3207     }
~BlockPoolsScope()3208     ~BlockPoolsScope() {
3209       assem_->EndBlockPools();
3210     }
3211 
3212    private:
3213     Assembler* assem_;
3214 
3215     DISALLOW_IMPLICIT_CONSTRUCTORS(BlockPoolsScope);
3216   };
3217 
3218  protected:
3219   inline const Register& AppropriateZeroRegFor(const CPURegister& reg) const;
3220 
3221   void LoadStore(const CPURegister& rt,
3222                  const MemOperand& addr,
3223                  LoadStoreOp op);
3224   void LoadStorePair(const CPURegister& rt, const CPURegister& rt2,
3225                      const MemOperand& addr, LoadStorePairOp op);
3226   void LoadStoreStruct(const VRegister& vt, const MemOperand& addr,
3227                        NEONLoadStoreMultiStructOp op);
3228   void LoadStoreStruct1(const VRegister& vt, int reg_count,
3229                         const MemOperand& addr);
3230   void LoadStoreStructSingle(const VRegister& vt, uint32_t lane,
3231                              const MemOperand& addr,
3232                              NEONLoadStoreSingleStructOp op);
3233   void LoadStoreStructSingleAllLanes(const VRegister& vt,
3234                                      const MemOperand& addr,
3235                                      NEONLoadStoreSingleStructOp op);
3236   void LoadStoreStructVerify(const VRegister& vt, const MemOperand& addr,
3237                              Instr op);
3238 
3239   static bool IsImmLSPair(int64_t offset, unsigned size);
3240 
3241   void Logical(const Register& rd,
3242                const Register& rn,
3243                const Operand& operand,
3244                LogicalOp op);
3245   void LogicalImmediate(const Register& rd,
3246                         const Register& rn,
3247                         unsigned n,
3248                         unsigned imm_s,
3249                         unsigned imm_r,
3250                         LogicalOp op);
3251 
3252   void ConditionalCompare(const Register& rn,
3253                           const Operand& operand,
3254                           StatusFlags nzcv,
3255                           Condition cond,
3256                           ConditionalCompareOp op);
3257   static bool IsImmConditionalCompare(int64_t immediate);
3258 
3259   void AddSubWithCarry(const Register& rd,
3260                        const Register& rn,
3261                        const Operand& operand,
3262                        FlagsUpdate S,
3263                        AddSubWithCarryOp op);
3264 
3265   // Functions for emulating operands not directly supported by the instruction
3266   // set.
3267   void EmitShift(const Register& rd,
3268                  const Register& rn,
3269                  Shift shift,
3270                  unsigned amount);
3271   void EmitExtendShift(const Register& rd,
3272                        const Register& rn,
3273                        Extend extend,
3274                        unsigned left_shift);
3275 
3276   void AddSub(const Register& rd,
3277               const Register& rn,
3278               const Operand& operand,
3279               FlagsUpdate S,
3280               AddSubOp op);
3281 
3282   static bool IsImmFP32(float imm);
3283   static bool IsImmFP64(double imm);
3284 
3285   // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
3286   // registers. Only simple loads are supported; sign- and zero-extension (such
3287   // as in LDPSW_x or LDRB_w) are not supported.
3288   static inline LoadStoreOp LoadOpFor(const CPURegister& rt);
3289   static inline LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
3290                                               const CPURegister& rt2);
3291   static inline LoadStoreOp StoreOpFor(const CPURegister& rt);
3292   static inline LoadStorePairOp StorePairOpFor(const CPURegister& rt,
3293                                                const CPURegister& rt2);
3294   static inline LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
3295 
3296   // Remove the specified branch from the unbound label link chain.
3297   // If available, a veneer for this label can be used for other branches in the
3298   // chain if the link chain cannot be fixed up without this branch.
3299   void RemoveBranchFromLabelLinkChain(Instruction* branch, Label* label,
3300                                       Instruction* label_veneer = nullptr);
3301 
3302  private:
3303   static uint32_t FPToImm8(double imm);
3304 
3305   // Instruction helpers.
3306   void MoveWide(const Register& rd,
3307                 uint64_t imm,
3308                 int shift,
3309                 MoveWideImmediateOp mov_op);
3310   void DataProcShiftedRegister(const Register& rd,
3311                                const Register& rn,
3312                                const Operand& operand,
3313                                FlagsUpdate S,
3314                                Instr op);
3315   void DataProcExtendedRegister(const Register& rd,
3316                                 const Register& rn,
3317                                 const Operand& operand,
3318                                 FlagsUpdate S,
3319                                 Instr op);
3320   void ConditionalSelect(const Register& rd,
3321                          const Register& rn,
3322                          const Register& rm,
3323                          Condition cond,
3324                          ConditionalSelectOp op);
3325   void DataProcessing1Source(const Register& rd,
3326                              const Register& rn,
3327                              DataProcessing1SourceOp op);
3328   void DataProcessing3Source(const Register& rd,
3329                              const Register& rn,
3330                              const Register& rm,
3331                              const Register& ra,
3332                              DataProcessing3SourceOp op);
3333   void FPDataProcessing1Source(const VRegister& fd, const VRegister& fn,
3334                                FPDataProcessing1SourceOp op);
3335   void FPDataProcessing2Source(const VRegister& fd, const VRegister& fn,
3336                                const VRegister& fm,
3337                                FPDataProcessing2SourceOp op);
3338   void FPDataProcessing3Source(const VRegister& fd, const VRegister& fn,
3339                                const VRegister& fm, const VRegister& fa,
3340                                FPDataProcessing3SourceOp op);
3341   void NEONAcrossLanesL(const VRegister& vd, const VRegister& vn,
3342                         NEONAcrossLanesOp op);
3343   void NEONAcrossLanes(const VRegister& vd, const VRegister& vn,
3344                        NEONAcrossLanesOp op);
3345   void NEONModifiedImmShiftLsl(const VRegister& vd, const int imm8,
3346                                const int left_shift,
3347                                NEONModifiedImmediateOp op);
3348   void NEONModifiedImmShiftMsl(const VRegister& vd, const int imm8,
3349                                const int shift_amount,
3350                                NEONModifiedImmediateOp op);
3351   void NEON3Same(const VRegister& vd, const VRegister& vn, const VRegister& vm,
3352                  NEON3SameOp vop);
3353   void NEONFP3Same(const VRegister& vd, const VRegister& vn,
3354                    const VRegister& vm, Instr op);
3355   void NEON3DifferentL(const VRegister& vd, const VRegister& vn,
3356                        const VRegister& vm, NEON3DifferentOp vop);
3357   void NEON3DifferentW(const VRegister& vd, const VRegister& vn,
3358                        const VRegister& vm, NEON3DifferentOp vop);
3359   void NEON3DifferentHN(const VRegister& vd, const VRegister& vn,
3360                         const VRegister& vm, NEON3DifferentOp vop);
3361   void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn,
3362                       NEON2RegMiscOp vop, double value = 0.0);
3363   void NEON2RegMisc(const VRegister& vd, const VRegister& vn,
3364                     NEON2RegMiscOp vop, int value = 0);
3365   void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
3366   void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
3367   void NEONPerm(const VRegister& vd, const VRegister& vn, const VRegister& vm,
3368                 NEONPermOp op);
3369   void NEONFPByElement(const VRegister& vd, const VRegister& vn,
3370                        const VRegister& vm, int vm_index,
3371                        NEONByIndexedElementOp op);
3372   void NEONByElement(const VRegister& vd, const VRegister& vn,
3373                      const VRegister& vm, int vm_index,
3374                      NEONByIndexedElementOp op);
3375   void NEONByElementL(const VRegister& vd, const VRegister& vn,
3376                       const VRegister& vm, int vm_index,
3377                       NEONByIndexedElementOp op);
3378   void NEONShiftImmediate(const VRegister& vd, const VRegister& vn,
3379                           NEONShiftImmediateOp op, int immh_immb);
3380   void NEONShiftLeftImmediate(const VRegister& vd, const VRegister& vn,
3381                               int shift, NEONShiftImmediateOp op);
3382   void NEONShiftRightImmediate(const VRegister& vd, const VRegister& vn,
3383                                int shift, NEONShiftImmediateOp op);
3384   void NEONShiftImmediateL(const VRegister& vd, const VRegister& vn, int shift,
3385                            NEONShiftImmediateOp op);
3386   void NEONShiftImmediateN(const VRegister& vd, const VRegister& vn, int shift,
3387                            NEONShiftImmediateOp op);
3388   void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop);
3389   void NEONTable(const VRegister& vd, const VRegister& vn, const VRegister& vm,
3390                  NEONTableOp op);
3391 
3392   Instr LoadStoreStructAddrModeField(const MemOperand& addr);
3393 
3394   // Label helpers.
3395 
3396   // Return an offset for a label-referencing instruction, typically a branch.
3397   int LinkAndGetByteOffsetTo(Label* label);
3398 
3399   // This is the same as LinkAndGetByteOffsetTo, but return an offset
3400   // suitable for fields that take instruction offsets.
3401   inline int LinkAndGetInstructionOffsetTo(Label* label);
3402 
3403   static constexpr int kStartOfLabelLinkChain = 0;
3404 
3405   // Verify that a label's link chain is intact.
3406   void CheckLabelLinkChain(Label const * label);
3407 
3408   // Postpone the generation of the constant pool for the specified number of
3409   // instructions.
3410   void BlockConstPoolFor(int instructions);
3411 
3412   // Set how far from current pc the next constant pool check will be.
SetNextConstPoolCheckIn(int instructions)3413   void SetNextConstPoolCheckIn(int instructions) {
3414     next_constant_pool_check_ = pc_offset() + instructions * kInstrSize;
3415   }
3416 
3417   // Emit the instruction at pc_.
Emit(Instr instruction)3418   void Emit(Instr instruction) {
3419     STATIC_ASSERT(sizeof(*pc_) == 1);
3420     STATIC_ASSERT(sizeof(instruction) == kInstrSize);
3421     DCHECK((pc_ + sizeof(instruction)) <= (buffer_ + buffer_size_));
3422 
3423     memcpy(pc_, &instruction, sizeof(instruction));
3424     pc_ += sizeof(instruction);
3425     CheckBuffer();
3426   }
3427 
3428   // Emit data inline in the instruction stream.
EmitData(void const * data,unsigned size)3429   void EmitData(void const * data, unsigned size) {
3430     DCHECK_EQ(sizeof(*pc_), 1);
3431     DCHECK((pc_ + size) <= (buffer_ + buffer_size_));
3432 
3433     // TODO(all): Somehow register we have some data here. Then we can
3434     // disassemble it correctly.
3435     memcpy(pc_, data, size);
3436     pc_ += size;
3437     CheckBuffer();
3438   }
3439 
3440   void GrowBuffer();
3441   void CheckBufferSpace();
3442   void CheckBuffer();
3443 
3444   // Pc offset of the next constant pool check.
3445   int next_constant_pool_check_;
3446 
3447   // Constant pool generation
3448   // Pools are emitted in the instruction stream. They are emitted when:
3449   //  * the distance to the first use is above a pre-defined distance or
3450   //  * the numbers of entries in the pool is above a pre-defined size or
3451   //  * code generation is finished
3452   // If a pool needs to be emitted before code generation is finished a branch
3453   // over the emitted pool will be inserted.
3454 
3455   // Constants in the pool may be addresses of functions that gets relocated;
3456   // if so, a relocation info entry is associated to the constant pool entry.
3457 
3458   // Repeated checking whether the constant pool should be emitted is rather
3459   // expensive. By default we only check again once a number of instructions
3460   // has been generated. That also means that the sizing of the buffers is not
3461   // an exact science, and that we rely on some slop to not overrun buffers.
3462   static constexpr int kCheckConstPoolInterval = 128;
3463 
3464   // Distance to first use after a which a pool will be emitted. Pool entries
3465   // are accessed with pc relative load therefore this cannot be more than
3466   // 1 * MB. Since constant pool emission checks are interval based this value
3467   // is an approximation.
3468   static constexpr int kApproxMaxDistToConstPool = 64 * KB;
3469 
3470   // Number of pool entries after which a pool will be emitted. Since constant
3471   // pool emission checks are interval based this value is an approximation.
3472   static constexpr int kApproxMaxPoolEntryCount = 512;
3473 
3474   // Emission of the constant pool may be blocked in some code sequences.
3475   int const_pool_blocked_nesting_;  // Block emission if this is not zero.
3476   int no_const_pool_before_;  // Block emission before this pc offset.
3477 
3478   // Emission of the veneer pools may be blocked in some code sequences.
3479   int veneer_pool_blocked_nesting_;  // Block emission if this is not zero.
3480 
3481   // Relocation info generation
3482   // Each relocation is encoded as a variable size value
3483   static constexpr int kMaxRelocSize = RelocInfoWriter::kMaxSize;
3484   RelocInfoWriter reloc_info_writer;
3485 
3486   // Internal reference positions, required for (potential) patching in
3487   // GrowBuffer(); contains only those internal references whose labels
3488   // are already bound.
3489   std::deque<int> internal_reference_positions_;
3490 
3491   // Relocation info records are also used during code generation as temporary
3492   // containers for constants and code target addresses until they are emitted
3493   // to the constant pool. These pending relocation info records are temporarily
3494   // stored in a separate buffer until a constant pool is emitted.
3495   // If every instruction in a long sequence is accessing the pool, we need one
3496   // pending relocation entry per instruction.
3497 
3498   // The pending constant pool.
3499   ConstPool constpool_;
3500 
3501  protected:
3502   // Code generation
3503   // The relocation writer's position is at least kGap bytes below the end of
3504   // the generated instructions. This is so that multi-instruction sequences do
3505   // not have to check for overflow. The same is true for writes of large
3506   // relocation info entries, and debug strings encoded in the instruction
3507   // stream.
3508   static constexpr int kGap = 128;
3509 
3510  public:
3511 #ifdef DEBUG
3512   // Functions used for testing.
GetConstantPoolEntriesSizeForTesting()3513   int GetConstantPoolEntriesSizeForTesting() const {
3514     // Do not include branch over the pool.
3515     return constpool_.EntryCount() * kPointerSize;
3516   }
3517 
GetCheckConstPoolIntervalForTesting()3518   static constexpr int GetCheckConstPoolIntervalForTesting() {
3519     return kCheckConstPoolInterval;
3520   }
3521 
GetApproxMaxDistToConstPoolForTesting()3522   static constexpr int GetApproxMaxDistToConstPoolForTesting() {
3523     return kApproxMaxDistToConstPool;
3524   }
3525 #endif
3526 
3527   class FarBranchInfo {
3528    public:
FarBranchInfo(int offset,Label * label)3529     FarBranchInfo(int offset, Label* label)
3530         : pc_offset_(offset), label_(label) {}
3531     // Offset of the branch in the code generation buffer.
3532     int pc_offset_;
3533     // The label branched to.
3534     Label* label_;
3535   };
3536 
3537  protected:
3538   // Information about unresolved (forward) branches.
3539   // The Assembler is only allowed to delete out-of-date information from here
3540   // after a label is bound. The MacroAssembler uses this information to
3541   // generate veneers.
3542   //
3543   // The second member gives information about the unresolved branch. The first
3544   // member of the pair is the maximum offset that the branch can reach in the
3545   // buffer. The map is sorted according to this reachable offset, allowing to
3546   // easily check when veneers need to be emitted.
3547   // Note that the maximum reachable offset (first member of the pairs) should
3548   // always be positive but has the same type as the return value for
3549   // pc_offset() for convenience.
3550   std::multimap<int, FarBranchInfo> unresolved_branches_;
3551 
3552   // We generate a veneer for a branch if we reach within this distance of the
3553   // limit of the range.
3554   static constexpr int kVeneerDistanceMargin = 1 * KB;
3555   // The factor of 2 is a finger in the air guess. With a default margin of
3556   // 1KB, that leaves us an addional 256 instructions to avoid generating a
3557   // protective branch.
3558   static constexpr int kVeneerNoProtectionFactor = 2;
3559   static constexpr int kVeneerDistanceCheckMargin =
3560       kVeneerNoProtectionFactor * kVeneerDistanceMargin;
unresolved_branches_first_limit()3561   int unresolved_branches_first_limit() const {
3562     DCHECK(!unresolved_branches_.empty());
3563     return unresolved_branches_.begin()->first;
3564   }
3565   // This is similar to next_constant_pool_check_ and helps reduce the overhead
3566   // of checking for veneer pools.
3567   // It is maintained to the closest unresolved branch limit minus the maximum
3568   // veneer margin (or kMaxInt if there are no unresolved branches).
3569   int next_veneer_pool_check_;
3570 
3571  private:
3572   // Avoid overflows for displacements etc.
3573   static const int kMaximalBufferSize = 512 * MB;
3574 
3575   // If a veneer is emitted for a branch instruction, that instruction must be
3576   // removed from the associated label's link chain so that the assembler does
3577   // not later attempt (likely unsuccessfully) to patch it to branch directly to
3578   // the label.
3579   void DeleteUnresolvedBranchInfoForLabel(Label* label);
3580   // This function deletes the information related to the label by traversing
3581   // the label chain, and for each PC-relative instruction in the chain checking
3582   // if pending unresolved information exists. Its complexity is proportional to
3583   // the length of the label chain.
3584   void DeleteUnresolvedBranchInfoForLabelTraverse(Label* label);
3585 
3586   void AllocateAndInstallRequestedHeapObjects(Isolate* isolate);
3587 
3588   friend class EnsureSpace;
3589   friend class ConstPool;
3590 };
3591 
3592 class PatchingAssembler : public Assembler {
3593  public:
3594   // Create an Assembler with a buffer starting at 'start'.
3595   // The buffer size is
3596   //   size of instructions to patch + kGap
3597   // Where kGap is the distance from which the Assembler tries to grow the
3598   // buffer.
3599   // If more or fewer instructions than expected are generated or if some
3600   // relocation information takes space in the buffer, the PatchingAssembler
3601   // will crash trying to grow the buffer.
3602   // Note that the instruction cache will not be flushed.
PatchingAssembler(const AssemblerOptions & options,byte * start,unsigned count)3603   PatchingAssembler(const AssemblerOptions& options, byte* start,
3604                     unsigned count)
3605       : Assembler(options, start, count * kInstrSize + kGap) {
3606     // Block constant pool emission.
3607     StartBlockPools();
3608   }
3609 
~PatchingAssembler()3610   ~PatchingAssembler() {
3611     // Const pool should still be blocked.
3612     DCHECK(is_const_pool_blocked());
3613     EndBlockPools();
3614     // Verify we have generated the number of instruction we expected.
3615     DCHECK((pc_offset() + kGap) == buffer_size_);
3616     // Verify no relocation information has been emitted.
3617     DCHECK(IsConstPoolEmpty());
3618   }
3619 
3620   // See definition of PatchAdrFar() for details.
3621   static constexpr int kAdrFarPatchableNNops = 2;
3622   static constexpr int kAdrFarPatchableNInstrs = kAdrFarPatchableNNops + 2;
3623   void PatchAdrFar(int64_t target_offset);
3624   void PatchSubSp(uint32_t immediate);
3625 };
3626 
3627 
3628 class EnsureSpace BASE_EMBEDDED {
3629  public:
EnsureSpace(Assembler * assembler)3630   explicit EnsureSpace(Assembler* assembler) {
3631     assembler->CheckBufferSpace();
3632   }
3633 };
3634 
3635 }  // namespace internal
3636 }  // namespace v8
3637 
3638 #endif  // V8_ARM64_ASSEMBLER_ARM64_H_
3639