• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 //
11 // - Redistribution in binary form must reproduce the above copyright
12 // notice, this list of conditions and the following disclaimer in the
13 // documentation and/or other materials provided with the distribution.
14 //
15 // - Neither the name of Sun Microsystems or the names of contributors may
16 // be used to endorse or promote products derived from this software without
17 // specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // The original source code covered by the above license above has been
32 // modified significantly by Google Inc.
33 // Copyright 2012 the V8 project authors. All rights reserved.
34 
35 // A lightweight X64 Assembler.
36 
37 #ifndef V8_X64_ASSEMBLER_X64_H_
38 #define V8_X64_ASSEMBLER_X64_H_
39 
40 #include <deque>
41 
42 #include "src/assembler.h"
43 
44 namespace v8 {
45 namespace internal {
46 
47 // Utility functions
48 
49 #define GENERAL_REGISTERS(V) \
50   V(rax)                     \
51   V(rcx)                     \
52   V(rdx)                     \
53   V(rbx)                     \
54   V(rsp)                     \
55   V(rbp)                     \
56   V(rsi)                     \
57   V(rdi)                     \
58   V(r8)                      \
59   V(r9)                      \
60   V(r10)                     \
61   V(r11)                     \
62   V(r12)                     \
63   V(r13)                     \
64   V(r14)                     \
65   V(r15)
66 
67 #define ALLOCATABLE_GENERAL_REGISTERS(V) \
68   V(rax)                                 \
69   V(rbx)                                 \
70   V(rdx)                                 \
71   V(rcx)                                 \
72   V(rsi)                                 \
73   V(rdi)                                 \
74   V(r8)                                  \
75   V(r9)                                  \
76   V(r11)                                 \
77   V(r12)                                 \
78   V(r14)                                 \
79   V(r15)
80 
81 
82 // CPU Registers.
83 //
84 // 1) We would prefer to use an enum, but enum values are assignment-
85 // compatible with int, which has caused code-generation bugs.
86 //
87 // 2) We would prefer to use a class instead of a struct but we don't like
88 // the register initialization to depend on the particular initialization
89 // order (which appears to be different on OS X, Linux, and Windows for the
90 // installed versions of C++ we tried). Using a struct permits C-style
91 // "initialization". Also, the Register objects cannot be const as this
92 // forces initialization stubs in MSVC, making us dependent on initialization
93 // order.
94 //
95 // 3) By not using an enum, we are possibly preventing the compiler from
96 // doing certain constant folds, which may significantly reduce the
97 // code generated for some assembly instructions (because they boil down
98 // to a few constants). If this is a problem, we could change the code
99 // such that we use an enum in optimized mode, and the struct in debug
100 // mode. This way we get the compile-time error checking in debug mode
101 // and best performance in optimized code.
102 //
103 struct Register {
104   enum Code {
105 #define REGISTER_CODE(R) kCode_##R,
106     GENERAL_REGISTERS(REGISTER_CODE)
107 #undef REGISTER_CODE
108         kAfterLast,
109     kCode_no_reg = -1
110   };
111 
112   static const int kNumRegisters = Code::kAfterLast;
113 
from_codeRegister114   static Register from_code(int code) {
115     DCHECK(code >= 0);
116     DCHECK(code < kNumRegisters);
117     Register r = {code};
118     return r;
119   }
is_validRegister120   bool is_valid() const { return 0 <= reg_code && reg_code < kNumRegisters; }
isRegister121   bool is(Register reg) const { return reg_code == reg.reg_code; }
codeRegister122   int code() const {
123     DCHECK(is_valid());
124     return reg_code;
125   }
bitRegister126   int bit() const {
127     DCHECK(is_valid());
128     return 1 << reg_code;
129   }
130 
is_byte_registerRegister131   bool is_byte_register() const { return reg_code <= 3; }
132   // Return the high bit of the register code as a 0 or 1.  Used often
133   // when constructing the REX prefix byte.
high_bitRegister134   int high_bit() const { return reg_code >> 3; }
135   // Return the 3 low bits of the register code.  Used when encoding registers
136   // in modR/M, SIB, and opcode bytes.
low_bitsRegister137   int low_bits() const { return reg_code & 0x7; }
138 
139   // Unfortunately we can't make this private in a struct when initializing
140   // by assignment.
141   int reg_code;
142 };
143 
144 
145 #define DECLARE_REGISTER(R) const Register R = {Register::kCode_##R};
146 GENERAL_REGISTERS(DECLARE_REGISTER)
147 #undef DECLARE_REGISTER
148 const Register no_reg = {Register::kCode_no_reg};
149 
150 
151 #ifdef _WIN64
152   // Windows calling convention
153 const Register arg_reg_1 = {Register::kCode_rcx};
154 const Register arg_reg_2 = {Register::kCode_rdx};
155 const Register arg_reg_3 = {Register::kCode_r8};
156 const Register arg_reg_4 = {Register::kCode_r9};
157 #else
158   // AMD64 calling convention
159 const Register arg_reg_1 = {Register::kCode_rdi};
160 const Register arg_reg_2 = {Register::kCode_rsi};
161 const Register arg_reg_3 = {Register::kCode_rdx};
162 const Register arg_reg_4 = {Register::kCode_rcx};
163 #endif  // _WIN64
164 
165 
166 #define DOUBLE_REGISTERS(V) \
167   V(xmm0)                   \
168   V(xmm1)                   \
169   V(xmm2)                   \
170   V(xmm3)                   \
171   V(xmm4)                   \
172   V(xmm5)                   \
173   V(xmm6)                   \
174   V(xmm7)                   \
175   V(xmm8)                   \
176   V(xmm9)                   \
177   V(xmm10)                  \
178   V(xmm11)                  \
179   V(xmm12)                  \
180   V(xmm13)                  \
181   V(xmm14)                  \
182   V(xmm15)
183 
184 #define FLOAT_REGISTERS DOUBLE_REGISTERS
185 
186 #define ALLOCATABLE_DOUBLE_REGISTERS(V) \
187   V(xmm0)                               \
188   V(xmm1)                               \
189   V(xmm2)                               \
190   V(xmm3)                               \
191   V(xmm4)                               \
192   V(xmm5)                               \
193   V(xmm6)                               \
194   V(xmm7)                               \
195   V(xmm8)                               \
196   V(xmm9)                               \
197   V(xmm10)                              \
198   V(xmm11)                              \
199   V(xmm12)                              \
200   V(xmm13)                              \
201   V(xmm14)
202 
203 static const bool kSimpleFPAliasing = true;
204 
205 struct XMMRegister {
206   enum Code {
207 #define REGISTER_CODE(R) kCode_##R,
208     DOUBLE_REGISTERS(REGISTER_CODE)
209 #undef REGISTER_CODE
210         kAfterLast,
211     kCode_no_reg = -1
212   };
213 
214   static const int kMaxNumRegisters = Code::kAfterLast;
215 
from_codeXMMRegister216   static XMMRegister from_code(int code) {
217     XMMRegister result = {code};
218     return result;
219   }
220 
is_validXMMRegister221   bool is_valid() const { return 0 <= reg_code && reg_code < kMaxNumRegisters; }
isXMMRegister222   bool is(XMMRegister reg) const { return reg_code == reg.reg_code; }
codeXMMRegister223   int code() const {
224     DCHECK(is_valid());
225     return reg_code;
226   }
227 
228   // Return the high bit of the register code as a 0 or 1.  Used often
229   // when constructing the REX prefix byte.
high_bitXMMRegister230   int high_bit() const { return reg_code >> 3; }
231   // Return the 3 low bits of the register code.  Used when encoding registers
232   // in modR/M, SIB, and opcode bytes.
low_bitsXMMRegister233   int low_bits() const { return reg_code & 0x7; }
234 
235   // Unfortunately we can't make this private in a struct when initializing
236   // by assignment.
237   int reg_code;
238 };
239 
240 typedef XMMRegister FloatRegister;
241 
242 typedef XMMRegister DoubleRegister;
243 
244 typedef XMMRegister Simd128Register;
245 
246 #define DECLARE_REGISTER(R) \
247   const DoubleRegister R = {DoubleRegister::kCode_##R};
248 DOUBLE_REGISTERS(DECLARE_REGISTER)
249 #undef DECLARE_REGISTER
250 const DoubleRegister no_double_reg = {DoubleRegister::kCode_no_reg};
251 
252 enum Condition {
253   // any value < 0 is considered no_condition
254   no_condition  = -1,
255 
256   overflow      =  0,
257   no_overflow   =  1,
258   below         =  2,
259   above_equal   =  3,
260   equal         =  4,
261   not_equal     =  5,
262   below_equal   =  6,
263   above         =  7,
264   negative      =  8,
265   positive      =  9,
266   parity_even   = 10,
267   parity_odd    = 11,
268   less          = 12,
269   greater_equal = 13,
270   less_equal    = 14,
271   greater       = 15,
272 
273   // Fake conditions that are handled by the
274   // opcodes using them.
275   always        = 16,
276   never         = 17,
277   // aliases
278   carry         = below,
279   not_carry     = above_equal,
280   zero          = equal,
281   not_zero      = not_equal,
282   sign          = negative,
283   not_sign      = positive,
284   last_condition = greater
285 };
286 
287 
288 // Returns the equivalent of !cc.
289 // Negation of the default no_condition (-1) results in a non-default
290 // no_condition value (-2). As long as tests for no_condition check
291 // for condition < 0, this will work as expected.
NegateCondition(Condition cc)292 inline Condition NegateCondition(Condition cc) {
293   return static_cast<Condition>(cc ^ 1);
294 }
295 
296 
297 // Commute a condition such that {a cond b == b cond' a}.
CommuteCondition(Condition cc)298 inline Condition CommuteCondition(Condition cc) {
299   switch (cc) {
300     case below:
301       return above;
302     case above:
303       return below;
304     case above_equal:
305       return below_equal;
306     case below_equal:
307       return above_equal;
308     case less:
309       return greater;
310     case greater:
311       return less;
312     case greater_equal:
313       return less_equal;
314     case less_equal:
315       return greater_equal;
316     default:
317       return cc;
318   }
319 }
320 
321 
322 enum RoundingMode {
323   kRoundToNearest = 0x0,
324   kRoundDown = 0x1,
325   kRoundUp = 0x2,
326   kRoundToZero = 0x3
327 };
328 
329 
330 // -----------------------------------------------------------------------------
331 // Machine instruction Immediates
332 
333 class Immediate BASE_EMBEDDED {
334  public:
Immediate(int32_t value)335   explicit Immediate(int32_t value) : value_(value) {}
Immediate(int32_t value,RelocInfo::Mode rmode)336   explicit Immediate(int32_t value, RelocInfo::Mode rmode)
337       : value_(value), rmode_(rmode) {}
Immediate(Smi * value)338   explicit Immediate(Smi* value) {
339     DCHECK(SmiValuesAre31Bits());  // Only available for 31-bit SMI.
340     value_ = static_cast<int32_t>(reinterpret_cast<intptr_t>(value));
341   }
342 
343  private:
344   int32_t value_;
345   RelocInfo::Mode rmode_ = RelocInfo::NONE32;
346 
347   friend class Assembler;
348 };
349 
350 
351 // -----------------------------------------------------------------------------
352 // Machine instruction Operands
353 
354 enum ScaleFactor {
355   times_1 = 0,
356   times_2 = 1,
357   times_4 = 2,
358   times_8 = 3,
359   times_int_size = times_4,
360   times_pointer_size = (kPointerSize == 8) ? times_8 : times_4
361 };
362 
363 
364 class Operand BASE_EMBEDDED {
365  public:
366   // [base + disp/r]
367   Operand(Register base, int32_t disp);
368 
369   // [base + index*scale + disp/r]
370   Operand(Register base,
371           Register index,
372           ScaleFactor scale,
373           int32_t disp);
374 
375   // [index*scale + disp/r]
376   Operand(Register index,
377           ScaleFactor scale,
378           int32_t disp);
379 
380   // Offset from existing memory operand.
381   // Offset is added to existing displacement as 32-bit signed values and
382   // this must not overflow.
383   Operand(const Operand& base, int32_t offset);
384 
385   // [rip + disp/r]
386   explicit Operand(Label* label);
387 
388   // Checks whether either base or index register is the given register.
389   // Does not check the "reg" part of the Operand.
390   bool AddressUsesRegister(Register reg) const;
391 
392   // Queries related to the size of the generated instruction.
393   // Whether the generated instruction will have a REX prefix.
requires_rex()394   bool requires_rex() const { return rex_ != 0; }
395   // Size of the ModR/M, SIB and displacement parts of the generated
396   // instruction.
operand_size()397   int operand_size() const { return len_; }
398 
399  private:
400   byte rex_;
401   byte buf_[9];
402   // The number of bytes of buf_ in use.
403   byte len_;
404 
405   // Set the ModR/M byte without an encoded 'reg' register. The
406   // register is encoded later as part of the emit_operand operation.
407   // set_modrm can be called before or after set_sib and set_disp*.
408   inline void set_modrm(int mod, Register rm);
409 
410   // Set the SIB byte if one is needed. Sets the length to 2 rather than 1.
411   inline void set_sib(ScaleFactor scale, Register index, Register base);
412 
413   // Adds operand displacement fields (offsets added to the memory address).
414   // Needs to be called after set_sib, not before it.
415   inline void set_disp8(int disp);
416   inline void set_disp32(int disp);
417   inline void set_disp64(int64_t disp);  // for labels.
418 
419   friend class Assembler;
420 };
421 
422 #define ASSEMBLER_INSTRUCTION_LIST(V) \
423   V(add)                              \
424   V(and)                              \
425   V(cmp)                              \
426   V(cmpxchg)                          \
427   V(dec)                              \
428   V(idiv)                             \
429   V(div)                              \
430   V(imul)                             \
431   V(inc)                              \
432   V(lea)                              \
433   V(mov)                              \
434   V(movzxb)                           \
435   V(movzxw)                           \
436   V(neg)                              \
437   V(not)                              \
438   V(or)                               \
439   V(repmovs)                          \
440   V(sbb)                              \
441   V(sub)                              \
442   V(test)                             \
443   V(xchg)                             \
444   V(xor)
445 
446 // Shift instructions on operands/registers with kPointerSize, kInt32Size and
447 // kInt64Size.
448 #define SHIFT_INSTRUCTION_LIST(V)       \
449   V(rol, 0x0)                           \
450   V(ror, 0x1)                           \
451   V(rcl, 0x2)                           \
452   V(rcr, 0x3)                           \
453   V(shl, 0x4)                           \
454   V(shr, 0x5)                           \
455   V(sar, 0x7)                           \
456 
457 
458 class Assembler : public AssemblerBase {
459  private:
460   // We check before assembling an instruction that there is sufficient
461   // space to write an instruction and its relocation information.
462   // The relocation writer's position must be kGap bytes above the end of
463   // the generated instructions. This leaves enough space for the
464   // longest possible x64 instruction, 15 bytes, and the longest possible
465   // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
466   // (There is a 15 byte limit on x64 instruction length that rules out some
467   // otherwise valid instructions.)
468   // This allows for a single, fast space check per instruction.
469   static const int kGap = 32;
470 
471  public:
472   // Create an assembler. Instructions and relocation information are emitted
473   // into a buffer, with the instructions starting from the beginning and the
474   // relocation information starting from the end of the buffer. See CodeDesc
475   // for a detailed comment on the layout (globals.h).
476   //
477   // If the provided buffer is NULL, the assembler allocates and grows its own
478   // buffer, and buffer_size determines the initial buffer size. The buffer is
479   // owned by the assembler and deallocated upon destruction of the assembler.
480   //
481   // If the provided buffer is not NULL, the assembler uses the provided buffer
482   // for code generation and assumes its size to be buffer_size. If the buffer
483   // is too small, a fatal error occurs. No deallocation of the buffer is done
484   // upon destruction of the assembler.
485   Assembler(Isolate* isolate, void* buffer, int buffer_size);
~Assembler()486   virtual ~Assembler() { }
487 
488   // GetCode emits any pending (non-emitted) code and fills the descriptor
489   // desc. GetCode() is idempotent; it returns the same result if no other
490   // Assembler functions are invoked in between GetCode() calls.
491   void GetCode(CodeDesc* desc);
492 
493   // Read/Modify the code target in the relative branch/call instruction at pc.
494   // On the x64 architecture, we use relative jumps with a 32-bit displacement
495   // to jump to other Code objects in the Code space in the heap.
496   // Jumps to C functions are done indirectly through a 64-bit register holding
497   // the absolute address of the target.
498   // These functions convert between absolute Addresses of Code objects and
499   // the relative displacements stored in the code.
500   static inline Address target_address_at(Address pc, Address constant_pool);
501   static inline void set_target_address_at(
502       Isolate* isolate, Address pc, Address constant_pool, Address target,
503       ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
target_address_at(Address pc,Code * code)504   static inline Address target_address_at(Address pc, Code* code) {
505     Address constant_pool = code ? code->constant_pool() : NULL;
506     return target_address_at(pc, constant_pool);
507   }
508   static inline void set_target_address_at(
509       Isolate* isolate, Address pc, Code* code, Address target,
510       ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED) {
511     Address constant_pool = code ? code->constant_pool() : NULL;
512     set_target_address_at(isolate, pc, constant_pool, target,
513                           icache_flush_mode);
514   }
515 
516   // Return the code target address at a call site from the return address
517   // of that call in the instruction stream.
518   static inline Address target_address_from_return_address(Address pc);
519 
520   // This sets the branch destination (which is in the instruction on x64).
521   // This is for calls and branches within generated code.
deserialization_set_special_target_at(Isolate * isolate,Address instruction_payload,Code * code,Address target)522   inline static void deserialization_set_special_target_at(
523       Isolate* isolate, Address instruction_payload, Code* code,
524       Address target) {
525     set_target_address_at(isolate, instruction_payload, code, target);
526   }
527 
528   // This sets the internal reference at the pc.
529   inline static void deserialization_set_target_internal_reference_at(
530       Isolate* isolate, Address pc, Address target,
531       RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
532 
RelocInfoNone()533   static inline RelocInfo::Mode RelocInfoNone() {
534     if (kPointerSize == kInt64Size) {
535       return RelocInfo::NONE64;
536     } else {
537       DCHECK(kPointerSize == kInt32Size);
538       return RelocInfo::NONE32;
539     }
540   }
541 
542   inline Handle<Object> code_target_object_handle_at(Address pc);
543   inline Address runtime_entry_at(Address pc);
544   // Number of bytes taken up by the branch target in the code.
545   static const int kSpecialTargetSize = 4;  // Use 32-bit displacement.
546   // Distance between the address of the code target in the call instruction
547   // and the return address pushed on the stack.
548   static const int kCallTargetAddressOffset = 4;  // Use 32-bit displacement.
549   // The length of call(kScratchRegister).
550   static const int kCallScratchRegisterInstructionLength = 3;
551   // The length of call(Immediate32).
552   static const int kShortCallInstructionLength = 5;
553   // The length of movq(kScratchRegister, address).
554   static const int kMoveAddressIntoScratchRegisterInstructionLength =
555       2 + kPointerSize;
556   // The length of movq(kScratchRegister, address) and call(kScratchRegister).
557   static const int kCallSequenceLength =
558       kMoveAddressIntoScratchRegisterInstructionLength +
559       kCallScratchRegisterInstructionLength;
560 
561   // The debug break slot must be able to contain an indirect call sequence.
562   static const int kDebugBreakSlotLength = kCallSequenceLength;
563   // Distance between start of patched debug break slot and the emitted address
564   // to jump to.
565   static const int kPatchDebugBreakSlotAddressOffset =
566       kMoveAddressIntoScratchRegisterInstructionLength - kPointerSize;
567 
568   // One byte opcode for test eax,0xXXXXXXXX.
569   static const byte kTestEaxByte = 0xA9;
570   // One byte opcode for test al, 0xXX.
571   static const byte kTestAlByte = 0xA8;
572   // One byte opcode for nop.
573   static const byte kNopByte = 0x90;
574 
575   // One byte prefix for a short conditional jump.
576   static const byte kJccShortPrefix = 0x70;
577   static const byte kJncShortOpcode = kJccShortPrefix | not_carry;
578   static const byte kJcShortOpcode = kJccShortPrefix | carry;
579   static const byte kJnzShortOpcode = kJccShortPrefix | not_zero;
580   static const byte kJzShortOpcode = kJccShortPrefix | zero;
581 
582   // VEX prefix encodings.
583   enum SIMDPrefix { kNone = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
584   enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
585   enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
586   enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
587 
588   // ---------------------------------------------------------------------------
589   // Code generation
590   //
591   // Function names correspond one-to-one to x64 instruction mnemonics.
592   // Unless specified otherwise, instructions operate on 64-bit operands.
593   //
594   // If we need versions of an assembly instruction that operate on different
595   // width arguments, we add a single-letter suffix specifying the width.
596   // This is done for the following instructions: mov, cmp, inc, dec,
597   // add, sub, and test.
598   // There are no versions of these instructions without the suffix.
599   // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'.
600   // - Instructions on 16-bit (word) operands/registers have a trailing 'w'.
601   // - Instructions on 32-bit (doubleword) operands/registers use 'l'.
602   // - Instructions on 64-bit (quadword) operands/registers use 'q'.
603   // - Instructions on operands/registers with pointer size use 'p'.
604 
605   STATIC_ASSERT(kPointerSize == kInt64Size || kPointerSize == kInt32Size);
606 
607 #define DECLARE_INSTRUCTION(instruction)                \
608   template<class P1>                                    \
609   void instruction##p(P1 p1) {                          \
610     emit_##instruction(p1, kPointerSize);               \
611   }                                                     \
612                                                         \
613   template<class P1>                                    \
614   void instruction##l(P1 p1) {                          \
615     emit_##instruction(p1, kInt32Size);                 \
616   }                                                     \
617                                                         \
618   template<class P1>                                    \
619   void instruction##q(P1 p1) {                          \
620     emit_##instruction(p1, kInt64Size);                 \
621   }                                                     \
622                                                         \
623   template<class P1, class P2>                          \
624   void instruction##p(P1 p1, P2 p2) {                   \
625     emit_##instruction(p1, p2, kPointerSize);           \
626   }                                                     \
627                                                         \
628   template<class P1, class P2>                          \
629   void instruction##l(P1 p1, P2 p2) {                   \
630     emit_##instruction(p1, p2, kInt32Size);             \
631   }                                                     \
632                                                         \
633   template<class P1, class P2>                          \
634   void instruction##q(P1 p1, P2 p2) {                   \
635     emit_##instruction(p1, p2, kInt64Size);             \
636   }                                                     \
637                                                         \
638   template<class P1, class P2, class P3>                \
639   void instruction##p(P1 p1, P2 p2, P3 p3) {            \
640     emit_##instruction(p1, p2, p3, kPointerSize);       \
641   }                                                     \
642                                                         \
643   template<class P1, class P2, class P3>                \
644   void instruction##l(P1 p1, P2 p2, P3 p3) {            \
645     emit_##instruction(p1, p2, p3, kInt32Size);         \
646   }                                                     \
647                                                         \
648   template<class P1, class P2, class P3>                \
649   void instruction##q(P1 p1, P2 p2, P3 p3) {            \
650     emit_##instruction(p1, p2, p3, kInt64Size);         \
651   }
652   ASSEMBLER_INSTRUCTION_LIST(DECLARE_INSTRUCTION)
653 #undef DECLARE_INSTRUCTION
654 
655   // Insert the smallest number of nop instructions
656   // possible to align the pc offset to a multiple
657   // of m, where m must be a power of 2.
658   void Align(int m);
659   // Insert the smallest number of zero bytes possible to align the pc offset
660   // to a mulitple of m. m must be a power of 2 (>= 2).
661   void DataAlign(int m);
662   void Nop(int bytes = 1);
663   // Aligns code to something that's optimal for a jump target for the platform.
664   void CodeTargetAlign();
665 
666   // Stack
667   void pushfq();
668   void popfq();
669 
670   void pushq(Immediate value);
671   // Push a 32 bit integer, and guarantee that it is actually pushed as a
672   // 32 bit value, the normal push will optimize the 8 bit case.
673   void pushq_imm32(int32_t imm32);
674   void pushq(Register src);
675   void pushq(const Operand& src);
676 
677   void popq(Register dst);
678   void popq(const Operand& dst);
679 
680   void enter(Immediate size);
681   void leave();
682 
683   // Moves
684   void movb(Register dst, const Operand& src);
685   void movb(Register dst, Immediate imm);
686   void movb(const Operand& dst, Register src);
687   void movb(const Operand& dst, Immediate imm);
688 
689   // Move the low 16 bits of a 64-bit register value to a 16-bit
690   // memory location.
691   void movw(Register dst, const Operand& src);
692   void movw(const Operand& dst, Register src);
693   void movw(const Operand& dst, Immediate imm);
694 
695   // Move the offset of the label location relative to the current
696   // position (after the move) to the destination.
697   void movl(const Operand& dst, Label* src);
698 
699   // Loads a pointer into a register with a relocation mode.
700   void movp(Register dst, void* ptr, RelocInfo::Mode rmode);
701 
702   // Loads a 64-bit immediate into a register.
703   void movq(Register dst, int64_t value,
704             RelocInfo::Mode rmode = RelocInfo::NONE64);
705   void movq(Register dst, uint64_t value,
706             RelocInfo::Mode rmode = RelocInfo::NONE64);
707 
708   void movsxbl(Register dst, Register src);
709   void movsxbl(Register dst, const Operand& src);
710   void movsxbq(Register dst, const Operand& src);
711   void movsxwl(Register dst, Register src);
712   void movsxwl(Register dst, const Operand& src);
713   void movsxwq(Register dst, const Operand& src);
714   void movsxlq(Register dst, Register src);
715   void movsxlq(Register dst, const Operand& src);
716 
717   // Repeated moves.
718 
719   void repmovsb();
720   void repmovsw();
repmovsp()721   void repmovsp() { emit_repmovs(kPointerSize); }
repmovsl()722   void repmovsl() { emit_repmovs(kInt32Size); }
repmovsq()723   void repmovsq() { emit_repmovs(kInt64Size); }
724 
725   // Instruction to load from an immediate 64-bit pointer into RAX.
726   void load_rax(void* ptr, RelocInfo::Mode rmode);
727   void load_rax(ExternalReference ext);
728 
729   // Conditional moves.
730   void cmovq(Condition cc, Register dst, Register src);
731   void cmovq(Condition cc, Register dst, const Operand& src);
732   void cmovl(Condition cc, Register dst, Register src);
733   void cmovl(Condition cc, Register dst, const Operand& src);
734 
cmpb(Register dst,Immediate src)735   void cmpb(Register dst, Immediate src) {
736     immediate_arithmetic_op_8(0x7, dst, src);
737   }
738 
739   void cmpb_al(Immediate src);
740 
cmpb(Register dst,Register src)741   void cmpb(Register dst, Register src) {
742     arithmetic_op_8(0x3A, dst, src);
743   }
744 
cmpb(Register dst,const Operand & src)745   void cmpb(Register dst, const Operand& src) {
746     arithmetic_op_8(0x3A, dst, src);
747   }
748 
cmpb(const Operand & dst,Register src)749   void cmpb(const Operand& dst, Register src) {
750     arithmetic_op_8(0x38, src, dst);
751   }
752 
cmpb(const Operand & dst,Immediate src)753   void cmpb(const Operand& dst, Immediate src) {
754     immediate_arithmetic_op_8(0x7, dst, src);
755   }
756 
cmpw(const Operand & dst,Immediate src)757   void cmpw(const Operand& dst, Immediate src) {
758     immediate_arithmetic_op_16(0x7, dst, src);
759   }
760 
cmpw(Register dst,Immediate src)761   void cmpw(Register dst, Immediate src) {
762     immediate_arithmetic_op_16(0x7, dst, src);
763   }
764 
cmpw(Register dst,const Operand & src)765   void cmpw(Register dst, const Operand& src) {
766     arithmetic_op_16(0x3B, dst, src);
767   }
768 
cmpw(Register dst,Register src)769   void cmpw(Register dst, Register src) {
770     arithmetic_op_16(0x3B, dst, src);
771   }
772 
cmpw(const Operand & dst,Register src)773   void cmpw(const Operand& dst, Register src) {
774     arithmetic_op_16(0x39, src, dst);
775   }
776 
testb(Register reg,const Operand & op)777   void testb(Register reg, const Operand& op) { testb(op, reg); }
778 
testw(Register reg,const Operand & op)779   void testw(Register reg, const Operand& op) { testw(op, reg); }
780 
andb(Register dst,Immediate src)781   void andb(Register dst, Immediate src) {
782     immediate_arithmetic_op_8(0x4, dst, src);
783   }
784 
785   void decb(Register dst);
786   void decb(const Operand& dst);
787 
788   // Lock prefix.
789   void lock();
790 
791   void xchgb(Register reg, const Operand& op);
792   void xchgw(Register reg, const Operand& op);
793 
794   void cmpxchgb(const Operand& dst, Register src);
795   void cmpxchgw(const Operand& dst, Register src);
796 
797   // Sign-extends rax into rdx:rax.
798   void cqo();
799   // Sign-extends eax into edx:eax.
800   void cdq();
801 
802   // Multiply eax by src, put the result in edx:eax.
803   void mull(Register src);
804   void mull(const Operand& src);
805   // Multiply rax by src, put the result in rdx:rax.
806   void mulq(Register src);
807 
808 #define DECLARE_SHIFT_INSTRUCTION(instruction, subcode)                       \
809   void instruction##p(Register dst, Immediate imm8) {                         \
810     shift(dst, imm8, subcode, kPointerSize);                                  \
811   }                                                                           \
812                                                                               \
813   void instruction##l(Register dst, Immediate imm8) {                         \
814     shift(dst, imm8, subcode, kInt32Size);                                    \
815   }                                                                           \
816                                                                               \
817   void instruction##q(Register dst, Immediate imm8) {                         \
818     shift(dst, imm8, subcode, kInt64Size);                                    \
819   }                                                                           \
820                                                                               \
821   void instruction##p(Operand dst, Immediate imm8) {                          \
822     shift(dst, imm8, subcode, kPointerSize);                                  \
823   }                                                                           \
824                                                                               \
825   void instruction##l(Operand dst, Immediate imm8) {                          \
826     shift(dst, imm8, subcode, kInt32Size);                                    \
827   }                                                                           \
828                                                                               \
829   void instruction##q(Operand dst, Immediate imm8) {                          \
830     shift(dst, imm8, subcode, kInt64Size);                                    \
831   }                                                                           \
832                                                                               \
833   void instruction##p_cl(Register dst) { shift(dst, subcode, kPointerSize); } \
834                                                                               \
835   void instruction##l_cl(Register dst) { shift(dst, subcode, kInt32Size); }   \
836                                                                               \
837   void instruction##q_cl(Register dst) { shift(dst, subcode, kInt64Size); }   \
838                                                                               \
839   void instruction##p_cl(Operand dst) { shift(dst, subcode, kPointerSize); }  \
840                                                                               \
841   void instruction##l_cl(Operand dst) { shift(dst, subcode, kInt32Size); }    \
842                                                                               \
843   void instruction##q_cl(Operand dst) { shift(dst, subcode, kInt64Size); }
844   SHIFT_INSTRUCTION_LIST(DECLARE_SHIFT_INSTRUCTION)
845 #undef DECLARE_SHIFT_INSTRUCTION
846 
847   // Shifts dst:src left by cl bits, affecting only dst.
848   void shld(Register dst, Register src);
849 
850   // Shifts src:dst right by cl bits, affecting only dst.
851   void shrd(Register dst, Register src);
852 
853   void store_rax(void* dst, RelocInfo::Mode mode);
854   void store_rax(ExternalReference ref);
855 
subb(Register dst,Immediate src)856   void subb(Register dst, Immediate src) {
857     immediate_arithmetic_op_8(0x5, dst, src);
858   }
859 
860   void testb(Register dst, Register src);
861   void testb(Register reg, Immediate mask);
862   void testb(const Operand& op, Immediate mask);
863   void testb(const Operand& op, Register reg);
864 
865   void testw(Register dst, Register src);
866   void testw(Register reg, Immediate mask);
867   void testw(const Operand& op, Immediate mask);
868   void testw(const Operand& op, Register reg);
869 
870   // Bit operations.
871   void bt(const Operand& dst, Register src);
872   void bts(const Operand& dst, Register src);
873   void bsrq(Register dst, Register src);
874   void bsrq(Register dst, const Operand& src);
875   void bsrl(Register dst, Register src);
876   void bsrl(Register dst, const Operand& src);
877   void bsfq(Register dst, Register src);
878   void bsfq(Register dst, const Operand& src);
879   void bsfl(Register dst, Register src);
880   void bsfl(Register dst, const Operand& src);
881 
882   // Miscellaneous
883   void clc();
884   void cld();
885   void cpuid();
886   void hlt();
887   void int3();
888   void nop();
889   void ret(int imm16);
890   void ud2();
891   void setcc(Condition cc, Register reg);
892 
893   // Label operations & relative jumps (PPUM Appendix D)
894   //
895   // Takes a branch opcode (cc) and a label (L) and generates
896   // either a backward branch or a forward branch and links it
897   // to the label fixup chain. Usage:
898   //
899   // Label L;    // unbound label
900   // j(cc, &L);  // forward branch to unbound label
901   // bind(&L);   // bind label to the current pc
902   // j(cc, &L);  // backward branch to bound label
903   // bind(&L);   // illegal: a label may be bound only once
904   //
905   // Note: The same Label can be used for forward and backward branches
906   // but it may be bound only once.
907 
908   void bind(Label* L);  // binds an unbound label L to the current code position
909 
910   // Calls
911   // Call near relative 32-bit displacement, relative to next instruction.
912   void call(Label* L);
913   void call(Address entry, RelocInfo::Mode rmode);
914   void call(Handle<Code> target,
915             RelocInfo::Mode rmode = RelocInfo::CODE_TARGET,
916             TypeFeedbackId ast_id = TypeFeedbackId::None());
917 
918   // Calls directly to the given address using a relative offset.
919   // Should only ever be used in Code objects for calls within the
920   // same Code object. Should not be used when generating new code (use labels),
921   // but only when patching existing code.
922   void call(Address target);
923 
924   // Call near absolute indirect, address in register
925   void call(Register adr);
926 
927   // Jumps
928   // Jump short or near relative.
929   // Use a 32-bit signed displacement.
930   // Unconditional jump to L
931   void jmp(Label* L, Label::Distance distance = Label::kFar);
932   void jmp(Address entry, RelocInfo::Mode rmode);
933   void jmp(Handle<Code> target, RelocInfo::Mode rmode);
934 
935   // Jump near absolute indirect (r64)
936   void jmp(Register adr);
937   void jmp(const Operand& src);
938 
939   // Conditional jumps
940   void j(Condition cc,
941          Label* L,
942          Label::Distance distance = Label::kFar);
943   void j(Condition cc, Address entry, RelocInfo::Mode rmode);
944   void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode);
945 
946   // Floating-point operations
947   void fld(int i);
948 
949   void fld1();
950   void fldz();
951   void fldpi();
952   void fldln2();
953 
954   void fld_s(const Operand& adr);
955   void fld_d(const Operand& adr);
956 
957   void fstp_s(const Operand& adr);
958   void fstp_d(const Operand& adr);
959   void fstp(int index);
960 
961   void fild_s(const Operand& adr);
962   void fild_d(const Operand& adr);
963 
964   void fist_s(const Operand& adr);
965 
966   void fistp_s(const Operand& adr);
967   void fistp_d(const Operand& adr);
968 
969   void fisttp_s(const Operand& adr);
970   void fisttp_d(const Operand& adr);
971 
972   void fabs();
973   void fchs();
974 
975   void fadd(int i);
976   void fsub(int i);
977   void fmul(int i);
978   void fdiv(int i);
979 
980   void fisub_s(const Operand& adr);
981 
982   void faddp(int i = 1);
983   void fsubp(int i = 1);
984   void fsubrp(int i = 1);
985   void fmulp(int i = 1);
986   void fdivp(int i = 1);
987   void fprem();
988   void fprem1();
989 
990   void fxch(int i = 1);
991   void fincstp();
992   void ffree(int i = 0);
993 
994   void ftst();
995   void fucomp(int i);
996   void fucompp();
997   void fucomi(int i);
998   void fucomip();
999 
1000   void fcompp();
1001   void fnstsw_ax();
1002   void fwait();
1003   void fnclex();
1004 
1005   void fsin();
1006   void fcos();
1007   void fptan();
1008   void fyl2x();
1009   void f2xm1();
1010   void fscale();
1011   void fninit();
1012 
1013   void frndint();
1014 
1015   void sahf();
1016 
1017   // SSE instructions
1018   void addss(XMMRegister dst, XMMRegister src);
1019   void addss(XMMRegister dst, const Operand& src);
1020   void subss(XMMRegister dst, XMMRegister src);
1021   void subss(XMMRegister dst, const Operand& src);
1022   void mulss(XMMRegister dst, XMMRegister src);
1023   void mulss(XMMRegister dst, const Operand& src);
1024   void divss(XMMRegister dst, XMMRegister src);
1025   void divss(XMMRegister dst, const Operand& src);
1026 
1027   void maxss(XMMRegister dst, XMMRegister src);
1028   void maxss(XMMRegister dst, const Operand& src);
1029   void minss(XMMRegister dst, XMMRegister src);
1030   void minss(XMMRegister dst, const Operand& src);
1031 
1032   void sqrtss(XMMRegister dst, XMMRegister src);
1033   void sqrtss(XMMRegister dst, const Operand& src);
1034 
1035   void ucomiss(XMMRegister dst, XMMRegister src);
1036   void ucomiss(XMMRegister dst, const Operand& src);
1037   void movaps(XMMRegister dst, XMMRegister src);
1038 
1039   // Don't use this unless it's important to keep the
1040   // top half of the destination register unchanged.
1041   // Use movaps when moving float values and movd for integer
1042   // values in xmm registers.
1043   void movss(XMMRegister dst, XMMRegister src);
1044 
1045   void movss(XMMRegister dst, const Operand& src);
1046   void movss(const Operand& dst, XMMRegister src);
1047   void shufps(XMMRegister dst, XMMRegister src, byte imm8);
1048 
1049   void cvttss2si(Register dst, const Operand& src);
1050   void cvttss2si(Register dst, XMMRegister src);
1051   void cvtlsi2ss(XMMRegister dst, const Operand& src);
1052   void cvtlsi2ss(XMMRegister dst, Register src);
1053 
1054   void andps(XMMRegister dst, XMMRegister src);
1055   void andps(XMMRegister dst, const Operand& src);
1056   void orps(XMMRegister dst, XMMRegister src);
1057   void orps(XMMRegister dst, const Operand& src);
1058   void xorps(XMMRegister dst, XMMRegister src);
1059   void xorps(XMMRegister dst, const Operand& src);
1060 
1061   void addps(XMMRegister dst, XMMRegister src);
1062   void addps(XMMRegister dst, const Operand& src);
1063   void subps(XMMRegister dst, XMMRegister src);
1064   void subps(XMMRegister dst, const Operand& src);
1065   void mulps(XMMRegister dst, XMMRegister src);
1066   void mulps(XMMRegister dst, const Operand& src);
1067   void divps(XMMRegister dst, XMMRegister src);
1068   void divps(XMMRegister dst, const Operand& src);
1069 
1070   void movmskps(Register dst, XMMRegister src);
1071 
1072   // SSE2 instructions
1073   void movd(XMMRegister dst, Register src);
1074   void movd(XMMRegister dst, const Operand& src);
1075   void movd(Register dst, XMMRegister src);
1076   void movq(XMMRegister dst, Register src);
1077   void movq(Register dst, XMMRegister src);
1078   void movq(XMMRegister dst, XMMRegister src);
1079 
1080   // Don't use this unless it's important to keep the
1081   // top half of the destination register unchanged.
1082   // Use movapd when moving double values and movq for integer
1083   // values in xmm registers.
1084   void movsd(XMMRegister dst, XMMRegister src);
1085 
1086   void movsd(const Operand& dst, XMMRegister src);
1087   void movsd(XMMRegister dst, const Operand& src);
1088 
1089   void movdqa(const Operand& dst, XMMRegister src);
1090   void movdqa(XMMRegister dst, const Operand& src);
1091 
1092   void movdqu(const Operand& dst, XMMRegister src);
1093   void movdqu(XMMRegister dst, const Operand& src);
1094 
1095   void movapd(XMMRegister dst, XMMRegister src);
1096 
1097   void psllq(XMMRegister reg, byte imm8);
1098   void psrlq(XMMRegister reg, byte imm8);
1099   void pslld(XMMRegister reg, byte imm8);
1100   void psrld(XMMRegister reg, byte imm8);
1101 
1102   void cvttsd2si(Register dst, const Operand& src);
1103   void cvttsd2si(Register dst, XMMRegister src);
1104   void cvttss2siq(Register dst, XMMRegister src);
1105   void cvttss2siq(Register dst, const Operand& src);
1106   void cvttsd2siq(Register dst, XMMRegister src);
1107   void cvttsd2siq(Register dst, const Operand& src);
1108 
1109   void cvtlsi2sd(XMMRegister dst, const Operand& src);
1110   void cvtlsi2sd(XMMRegister dst, Register src);
1111 
1112   void cvtqsi2ss(XMMRegister dst, const Operand& src);
1113   void cvtqsi2ss(XMMRegister dst, Register src);
1114 
1115   void cvtqsi2sd(XMMRegister dst, const Operand& src);
1116   void cvtqsi2sd(XMMRegister dst, Register src);
1117 
1118 
1119   void cvtss2sd(XMMRegister dst, XMMRegister src);
1120   void cvtss2sd(XMMRegister dst, const Operand& src);
1121   void cvtsd2ss(XMMRegister dst, XMMRegister src);
1122   void cvtsd2ss(XMMRegister dst, const Operand& src);
1123 
1124   void cvtsd2si(Register dst, XMMRegister src);
1125   void cvtsd2siq(Register dst, XMMRegister src);
1126 
1127   void addsd(XMMRegister dst, XMMRegister src);
1128   void addsd(XMMRegister dst, const Operand& src);
1129   void subsd(XMMRegister dst, XMMRegister src);
1130   void subsd(XMMRegister dst, const Operand& src);
1131   void mulsd(XMMRegister dst, XMMRegister src);
1132   void mulsd(XMMRegister dst, const Operand& src);
1133   void divsd(XMMRegister dst, XMMRegister src);
1134   void divsd(XMMRegister dst, const Operand& src);
1135 
1136   void maxsd(XMMRegister dst, XMMRegister src);
1137   void maxsd(XMMRegister dst, const Operand& src);
1138   void minsd(XMMRegister dst, XMMRegister src);
1139   void minsd(XMMRegister dst, const Operand& src);
1140 
1141   void andpd(XMMRegister dst, XMMRegister src);
1142   void orpd(XMMRegister dst, XMMRegister src);
1143   void xorpd(XMMRegister dst, XMMRegister src);
1144   void sqrtsd(XMMRegister dst, XMMRegister src);
1145   void sqrtsd(XMMRegister dst, const Operand& src);
1146 
1147   void ucomisd(XMMRegister dst, XMMRegister src);
1148   void ucomisd(XMMRegister dst, const Operand& src);
1149   void cmpltsd(XMMRegister dst, XMMRegister src);
1150   void pcmpeqd(XMMRegister dst, XMMRegister src);
1151 
1152   void movmskpd(Register dst, XMMRegister src);
1153 
1154   void punpckldq(XMMRegister dst, XMMRegister src);
1155   void punpckldq(XMMRegister dst, const Operand& src);
1156   void punpckhdq(XMMRegister dst, XMMRegister src);
1157 
1158   // SSE 4.1 instruction
1159   void insertps(XMMRegister dst, XMMRegister src, byte imm8);
1160   void extractps(Register dst, XMMRegister src, byte imm8);
1161   void pextrd(Register dst, XMMRegister src, int8_t imm8);
1162   void pinsrd(XMMRegister dst, Register src, int8_t imm8);
1163   void pinsrd(XMMRegister dst, const Operand& src, int8_t imm8);
1164 
1165   void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
1166   void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1167 
1168   void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp);
1169   void cmpeqps(XMMRegister dst, XMMRegister src);
1170   void cmpltps(XMMRegister dst, XMMRegister src);
1171   void cmpleps(XMMRegister dst, XMMRegister src);
1172   void cmpneqps(XMMRegister dst, XMMRegister src);
1173   void cmpnltps(XMMRegister dst, XMMRegister src);
1174   void cmpnleps(XMMRegister dst, XMMRegister src);
1175 
1176   void minps(XMMRegister dst, XMMRegister src);
1177   void minps(XMMRegister dst, const Operand& src);
1178   void maxps(XMMRegister dst, XMMRegister src);
1179   void maxps(XMMRegister dst, const Operand& src);
1180   void rcpps(XMMRegister dst, XMMRegister src);
1181   void rcpps(XMMRegister dst, const Operand& src);
1182   void rsqrtps(XMMRegister dst, XMMRegister src);
1183   void rsqrtps(XMMRegister dst, const Operand& src);
1184   void sqrtps(XMMRegister dst, XMMRegister src);
1185   void sqrtps(XMMRegister dst, const Operand& src);
1186   void movups(XMMRegister dst, XMMRegister src);
1187   void movups(XMMRegister dst, const Operand& src);
1188   void movups(const Operand& dst, XMMRegister src);
1189   void paddd(XMMRegister dst, XMMRegister src);
1190   void paddd(XMMRegister dst, const Operand& src);
1191   void psubd(XMMRegister dst, XMMRegister src);
1192   void psubd(XMMRegister dst, const Operand& src);
1193   void pmulld(XMMRegister dst, XMMRegister src);
1194   void pmulld(XMMRegister dst, const Operand& src);
1195   void pmuludq(XMMRegister dst, XMMRegister src);
1196   void pmuludq(XMMRegister dst, const Operand& src);
1197   void psrldq(XMMRegister dst, uint8_t shift);
1198   void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1199   void cvtps2dq(XMMRegister dst, XMMRegister src);
1200   void cvtps2dq(XMMRegister dst, const Operand& src);
1201   void cvtdq2ps(XMMRegister dst, XMMRegister src);
1202   void cvtdq2ps(XMMRegister dst, const Operand& src);
1203 
1204   // AVX instruction
vfmadd132sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1205   void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1206     vfmasd(0x99, dst, src1, src2);
1207   }
vfmadd213sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1208   void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1209     vfmasd(0xa9, dst, src1, src2);
1210   }
vfmadd231sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1211   void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1212     vfmasd(0xb9, dst, src1, src2);
1213   }
vfmadd132sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1214   void vfmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1215     vfmasd(0x99, dst, src1, src2);
1216   }
vfmadd213sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1217   void vfmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1218     vfmasd(0xa9, dst, src1, src2);
1219   }
vfmadd231sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1220   void vfmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1221     vfmasd(0xb9, dst, src1, src2);
1222   }
vfmsub132sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1223   void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1224     vfmasd(0x9b, dst, src1, src2);
1225   }
vfmsub213sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1226   void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1227     vfmasd(0xab, dst, src1, src2);
1228   }
vfmsub231sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1229   void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1230     vfmasd(0xbb, dst, src1, src2);
1231   }
vfmsub132sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1232   void vfmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1233     vfmasd(0x9b, dst, src1, src2);
1234   }
vfmsub213sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1235   void vfmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1236     vfmasd(0xab, dst, src1, src2);
1237   }
vfmsub231sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1238   void vfmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1239     vfmasd(0xbb, dst, src1, src2);
1240   }
vfnmadd132sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1241   void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1242     vfmasd(0x9d, dst, src1, src2);
1243   }
vfnmadd213sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1244   void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1245     vfmasd(0xad, dst, src1, src2);
1246   }
vfnmadd231sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1247   void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1248     vfmasd(0xbd, dst, src1, src2);
1249   }
vfnmadd132sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1250   void vfnmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1251     vfmasd(0x9d, dst, src1, src2);
1252   }
vfnmadd213sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1253   void vfnmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1254     vfmasd(0xad, dst, src1, src2);
1255   }
vfnmadd231sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1256   void vfnmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1257     vfmasd(0xbd, dst, src1, src2);
1258   }
vfnmsub132sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1259   void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1260     vfmasd(0x9f, dst, src1, src2);
1261   }
vfnmsub213sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1262   void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1263     vfmasd(0xaf, dst, src1, src2);
1264   }
vfnmsub231sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1265   void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1266     vfmasd(0xbf, dst, src1, src2);
1267   }
vfnmsub132sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1268   void vfnmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1269     vfmasd(0x9f, dst, src1, src2);
1270   }
vfnmsub213sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1271   void vfnmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1272     vfmasd(0xaf, dst, src1, src2);
1273   }
vfnmsub231sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1274   void vfnmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1275     vfmasd(0xbf, dst, src1, src2);
1276   }
1277   void vfmasd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1278   void vfmasd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1279 
vfmadd132ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1280   void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1281     vfmass(0x99, dst, src1, src2);
1282   }
vfmadd213ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1283   void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1284     vfmass(0xa9, dst, src1, src2);
1285   }
vfmadd231ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1286   void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1287     vfmass(0xb9, dst, src1, src2);
1288   }
vfmadd132ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1289   void vfmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1290     vfmass(0x99, dst, src1, src2);
1291   }
vfmadd213ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1292   void vfmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1293     vfmass(0xa9, dst, src1, src2);
1294   }
vfmadd231ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1295   void vfmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1296     vfmass(0xb9, dst, src1, src2);
1297   }
vfmsub132ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1298   void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1299     vfmass(0x9b, dst, src1, src2);
1300   }
vfmsub213ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1301   void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1302     vfmass(0xab, dst, src1, src2);
1303   }
vfmsub231ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1304   void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1305     vfmass(0xbb, dst, src1, src2);
1306   }
vfmsub132ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1307   void vfmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1308     vfmass(0x9b, dst, src1, src2);
1309   }
vfmsub213ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1310   void vfmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1311     vfmass(0xab, dst, src1, src2);
1312   }
vfmsub231ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1313   void vfmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1314     vfmass(0xbb, dst, src1, src2);
1315   }
vfnmadd132ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1316   void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1317     vfmass(0x9d, dst, src1, src2);
1318   }
vfnmadd213ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1319   void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1320     vfmass(0xad, dst, src1, src2);
1321   }
vfnmadd231ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1322   void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1323     vfmass(0xbd, dst, src1, src2);
1324   }
vfnmadd132ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1325   void vfnmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1326     vfmass(0x9d, dst, src1, src2);
1327   }
vfnmadd213ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1328   void vfnmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1329     vfmass(0xad, dst, src1, src2);
1330   }
vfnmadd231ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1331   void vfnmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1332     vfmass(0xbd, dst, src1, src2);
1333   }
vfnmsub132ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1334   void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1335     vfmass(0x9f, dst, src1, src2);
1336   }
vfnmsub213ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1337   void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1338     vfmass(0xaf, dst, src1, src2);
1339   }
vfnmsub231ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1340   void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1341     vfmass(0xbf, dst, src1, src2);
1342   }
vfnmsub132ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1343   void vfnmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1344     vfmass(0x9f, dst, src1, src2);
1345   }
vfnmsub213ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1346   void vfnmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1347     vfmass(0xaf, dst, src1, src2);
1348   }
vfnmsub231ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1349   void vfnmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1350     vfmass(0xbf, dst, src1, src2);
1351   }
1352   void vfmass(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1353   void vfmass(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1354 
1355   void vmovd(XMMRegister dst, Register src);
1356   void vmovd(XMMRegister dst, const Operand& src);
1357   void vmovd(Register dst, XMMRegister src);
1358   void vmovq(XMMRegister dst, Register src);
1359   void vmovq(XMMRegister dst, const Operand& src);
1360   void vmovq(Register dst, XMMRegister src);
1361 
vmovsd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1362   void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1363     vsd(0x10, dst, src1, src2);
1364   }
vmovsd(XMMRegister dst,const Operand & src)1365   void vmovsd(XMMRegister dst, const Operand& src) {
1366     vsd(0x10, dst, xmm0, src);
1367   }
vmovsd(const Operand & dst,XMMRegister src)1368   void vmovsd(const Operand& dst, XMMRegister src) {
1369     vsd(0x11, src, xmm0, dst);
1370   }
1371 
1372 #define AVX_SP_3(instr, opcode) \
1373   AVX_S_3(instr, opcode)        \
1374   AVX_P_3(instr, opcode)
1375 
1376 #define AVX_S_3(instr, opcode)  \
1377   AVX_3(instr##ss, opcode, vss) \
1378   AVX_3(instr##sd, opcode, vsd)
1379 
1380 #define AVX_P_3(instr, opcode)  \
1381   AVX_3(instr##ps, opcode, vps) \
1382   AVX_3(instr##pd, opcode, vpd)
1383 
1384 #define AVX_3(instr, opcode, impl)                                     \
1385   void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) {    \
1386     impl(opcode, dst, src1, src2);                                     \
1387   }                                                                    \
1388   void instr(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
1389     impl(opcode, dst, src1, src2);                                     \
1390   }
1391 
1392   AVX_SP_3(vsqrt, 0x51);
1393   AVX_SP_3(vadd, 0x58);
1394   AVX_SP_3(vsub, 0x5c);
1395   AVX_SP_3(vmul, 0x59);
1396   AVX_SP_3(vdiv, 0x5e);
1397   AVX_SP_3(vmin, 0x5d);
1398   AVX_SP_3(vmax, 0x5f);
1399   AVX_P_3(vand, 0x54);
1400   AVX_P_3(vor, 0x56);
1401   AVX_P_3(vxor, 0x57);
1402   AVX_3(vpcmpeqd, 0x76, vpd);
1403   AVX_3(vcvtsd2ss, 0x5a, vsd);
1404 
1405 #undef AVX_3
1406 #undef AVX_S_3
1407 #undef AVX_P_3
1408 #undef AVX_SP_3
1409 
vpsrlq(XMMRegister dst,XMMRegister src,byte imm8)1410   void vpsrlq(XMMRegister dst, XMMRegister src, byte imm8) {
1411     XMMRegister iop = {2};
1412     vpd(0x73, iop, dst, src);
1413     emit(imm8);
1414   }
vpsllq(XMMRegister dst,XMMRegister src,byte imm8)1415   void vpsllq(XMMRegister dst, XMMRegister src, byte imm8) {
1416     XMMRegister iop = {6};
1417     vpd(0x73, iop, dst, src);
1418     emit(imm8);
1419   }
vcvtss2sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1420   void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1421     vsd(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1422   }
vcvtss2sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1423   void vcvtss2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1424     vsd(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1425   }
vcvtlsi2sd(XMMRegister dst,XMMRegister src1,Register src2)1426   void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1427     XMMRegister isrc2 = {src2.code()};
1428     vsd(0x2a, dst, src1, isrc2, kF2, k0F, kW0);
1429   }
vcvtlsi2sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1430   void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1431     vsd(0x2a, dst, src1, src2, kF2, k0F, kW0);
1432   }
vcvtlsi2ss(XMMRegister dst,XMMRegister src1,Register src2)1433   void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1434     XMMRegister isrc2 = {src2.code()};
1435     vsd(0x2a, dst, src1, isrc2, kF3, k0F, kW0);
1436   }
vcvtlsi2ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1437   void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1438     vsd(0x2a, dst, src1, src2, kF3, k0F, kW0);
1439   }
vcvtqsi2ss(XMMRegister dst,XMMRegister src1,Register src2)1440   void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1441     XMMRegister isrc2 = {src2.code()};
1442     vsd(0x2a, dst, src1, isrc2, kF3, k0F, kW1);
1443   }
vcvtqsi2ss(XMMRegister dst,XMMRegister src1,const Operand & src2)1444   void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1445     vsd(0x2a, dst, src1, src2, kF3, k0F, kW1);
1446   }
vcvtqsi2sd(XMMRegister dst,XMMRegister src1,Register src2)1447   void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1448     XMMRegister isrc2 = {src2.code()};
1449     vsd(0x2a, dst, src1, isrc2, kF2, k0F, kW1);
1450   }
vcvtqsi2sd(XMMRegister dst,XMMRegister src1,const Operand & src2)1451   void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1452     vsd(0x2a, dst, src1, src2, kF2, k0F, kW1);
1453   }
vcvttss2si(Register dst,XMMRegister src)1454   void vcvttss2si(Register dst, XMMRegister src) {
1455     XMMRegister idst = {dst.code()};
1456     vsd(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1457   }
vcvttss2si(Register dst,const Operand & src)1458   void vcvttss2si(Register dst, const Operand& src) {
1459     XMMRegister idst = {dst.code()};
1460     vsd(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1461   }
vcvttsd2si(Register dst,XMMRegister src)1462   void vcvttsd2si(Register dst, XMMRegister src) {
1463     XMMRegister idst = {dst.code()};
1464     vsd(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1465   }
vcvttsd2si(Register dst,const Operand & src)1466   void vcvttsd2si(Register dst, const Operand& src) {
1467     XMMRegister idst = {dst.code()};
1468     vsd(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1469   }
vcvttss2siq(Register dst,XMMRegister src)1470   void vcvttss2siq(Register dst, XMMRegister src) {
1471     XMMRegister idst = {dst.code()};
1472     vsd(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1473   }
vcvttss2siq(Register dst,const Operand & src)1474   void vcvttss2siq(Register dst, const Operand& src) {
1475     XMMRegister idst = {dst.code()};
1476     vsd(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1477   }
vcvttsd2siq(Register dst,XMMRegister src)1478   void vcvttsd2siq(Register dst, XMMRegister src) {
1479     XMMRegister idst = {dst.code()};
1480     vsd(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1481   }
vcvttsd2siq(Register dst,const Operand & src)1482   void vcvttsd2siq(Register dst, const Operand& src) {
1483     XMMRegister idst = {dst.code()};
1484     vsd(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1485   }
vcvtsd2si(Register dst,XMMRegister src)1486   void vcvtsd2si(Register dst, XMMRegister src) {
1487     XMMRegister idst = {dst.code()};
1488     vsd(0x2d, idst, xmm0, src, kF2, k0F, kW0);
1489   }
vucomisd(XMMRegister dst,XMMRegister src)1490   void vucomisd(XMMRegister dst, XMMRegister src) {
1491     vsd(0x2e, dst, xmm0, src, k66, k0F, kWIG);
1492   }
vucomisd(XMMRegister dst,const Operand & src)1493   void vucomisd(XMMRegister dst, const Operand& src) {
1494     vsd(0x2e, dst, xmm0, src, k66, k0F, kWIG);
1495   }
vroundss(XMMRegister dst,XMMRegister src1,XMMRegister src2,RoundingMode mode)1496   void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1497                 RoundingMode mode) {
1498     vsd(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
1499     emit(static_cast<byte>(mode) | 0x8);  // Mask precision exception.
1500   }
vroundsd(XMMRegister dst,XMMRegister src1,XMMRegister src2,RoundingMode mode)1501   void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1502                 RoundingMode mode) {
1503     vsd(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
1504     emit(static_cast<byte>(mode) | 0x8);  // Mask precision exception.
1505   }
1506 
vsd(byte op,XMMRegister dst,XMMRegister src1,XMMRegister src2)1507   void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1508     vsd(op, dst, src1, src2, kF2, k0F, kWIG);
1509   }
vsd(byte op,XMMRegister dst,XMMRegister src1,const Operand & src2)1510   void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2) {
1511     vsd(op, dst, src1, src2, kF2, k0F, kWIG);
1512   }
1513   void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1514            SIMDPrefix pp, LeadingOpcode m, VexW w);
1515   void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
1516            SIMDPrefix pp, LeadingOpcode m, VexW w);
1517 
vmovss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1518   void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1519     vss(0x10, dst, src1, src2);
1520   }
vmovss(XMMRegister dst,const Operand & src)1521   void vmovss(XMMRegister dst, const Operand& src) {
1522     vss(0x10, dst, xmm0, src);
1523   }
vmovss(const Operand & dst,XMMRegister src)1524   void vmovss(const Operand& dst, XMMRegister src) {
1525     vss(0x11, src, xmm0, dst);
1526   }
1527   void vucomiss(XMMRegister dst, XMMRegister src);
1528   void vucomiss(XMMRegister dst, const Operand& src);
1529   void vss(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1530   void vss(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1531 
vmovaps(XMMRegister dst,XMMRegister src)1532   void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
vmovapd(XMMRegister dst,XMMRegister src)1533   void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); }
vmovmskpd(Register dst,XMMRegister src)1534   void vmovmskpd(Register dst, XMMRegister src) {
1535     XMMRegister idst = {dst.code()};
1536     vpd(0x50, idst, xmm0, src);
1537   }
1538 
1539   void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1540   void vps(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1541   void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1542   void vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1543 
1544   // BMI instruction
andnq(Register dst,Register src1,Register src2)1545   void andnq(Register dst, Register src1, Register src2) {
1546     bmi1q(0xf2, dst, src1, src2);
1547   }
andnq(Register dst,Register src1,const Operand & src2)1548   void andnq(Register dst, Register src1, const Operand& src2) {
1549     bmi1q(0xf2, dst, src1, src2);
1550   }
andnl(Register dst,Register src1,Register src2)1551   void andnl(Register dst, Register src1, Register src2) {
1552     bmi1l(0xf2, dst, src1, src2);
1553   }
andnl(Register dst,Register src1,const Operand & src2)1554   void andnl(Register dst, Register src1, const Operand& src2) {
1555     bmi1l(0xf2, dst, src1, src2);
1556   }
bextrq(Register dst,Register src1,Register src2)1557   void bextrq(Register dst, Register src1, Register src2) {
1558     bmi1q(0xf7, dst, src2, src1);
1559   }
bextrq(Register dst,const Operand & src1,Register src2)1560   void bextrq(Register dst, const Operand& src1, Register src2) {
1561     bmi1q(0xf7, dst, src2, src1);
1562   }
bextrl(Register dst,Register src1,Register src2)1563   void bextrl(Register dst, Register src1, Register src2) {
1564     bmi1l(0xf7, dst, src2, src1);
1565   }
bextrl(Register dst,const Operand & src1,Register src2)1566   void bextrl(Register dst, const Operand& src1, Register src2) {
1567     bmi1l(0xf7, dst, src2, src1);
1568   }
blsiq(Register dst,Register src)1569   void blsiq(Register dst, Register src) {
1570     Register ireg = {3};
1571     bmi1q(0xf3, ireg, dst, src);
1572   }
blsiq(Register dst,const Operand & src)1573   void blsiq(Register dst, const Operand& src) {
1574     Register ireg = {3};
1575     bmi1q(0xf3, ireg, dst, src);
1576   }
blsil(Register dst,Register src)1577   void blsil(Register dst, Register src) {
1578     Register ireg = {3};
1579     bmi1l(0xf3, ireg, dst, src);
1580   }
blsil(Register dst,const Operand & src)1581   void blsil(Register dst, const Operand& src) {
1582     Register ireg = {3};
1583     bmi1l(0xf3, ireg, dst, src);
1584   }
blsmskq(Register dst,Register src)1585   void blsmskq(Register dst, Register src) {
1586     Register ireg = {2};
1587     bmi1q(0xf3, ireg, dst, src);
1588   }
blsmskq(Register dst,const Operand & src)1589   void blsmskq(Register dst, const Operand& src) {
1590     Register ireg = {2};
1591     bmi1q(0xf3, ireg, dst, src);
1592   }
blsmskl(Register dst,Register src)1593   void blsmskl(Register dst, Register src) {
1594     Register ireg = {2};
1595     bmi1l(0xf3, ireg, dst, src);
1596   }
blsmskl(Register dst,const Operand & src)1597   void blsmskl(Register dst, const Operand& src) {
1598     Register ireg = {2};
1599     bmi1l(0xf3, ireg, dst, src);
1600   }
blsrq(Register dst,Register src)1601   void blsrq(Register dst, Register src) {
1602     Register ireg = {1};
1603     bmi1q(0xf3, ireg, dst, src);
1604   }
blsrq(Register dst,const Operand & src)1605   void blsrq(Register dst, const Operand& src) {
1606     Register ireg = {1};
1607     bmi1q(0xf3, ireg, dst, src);
1608   }
blsrl(Register dst,Register src)1609   void blsrl(Register dst, Register src) {
1610     Register ireg = {1};
1611     bmi1l(0xf3, ireg, dst, src);
1612   }
blsrl(Register dst,const Operand & src)1613   void blsrl(Register dst, const Operand& src) {
1614     Register ireg = {1};
1615     bmi1l(0xf3, ireg, dst, src);
1616   }
1617   void tzcntq(Register dst, Register src);
1618   void tzcntq(Register dst, const Operand& src);
1619   void tzcntl(Register dst, Register src);
1620   void tzcntl(Register dst, const Operand& src);
1621 
1622   void lzcntq(Register dst, Register src);
1623   void lzcntq(Register dst, const Operand& src);
1624   void lzcntl(Register dst, Register src);
1625   void lzcntl(Register dst, const Operand& src);
1626 
1627   void popcntq(Register dst, Register src);
1628   void popcntq(Register dst, const Operand& src);
1629   void popcntl(Register dst, Register src);
1630   void popcntl(Register dst, const Operand& src);
1631 
bzhiq(Register dst,Register src1,Register src2)1632   void bzhiq(Register dst, Register src1, Register src2) {
1633     bmi2q(kNone, 0xf5, dst, src2, src1);
1634   }
bzhiq(Register dst,const Operand & src1,Register src2)1635   void bzhiq(Register dst, const Operand& src1, Register src2) {
1636     bmi2q(kNone, 0xf5, dst, src2, src1);
1637   }
bzhil(Register dst,Register src1,Register src2)1638   void bzhil(Register dst, Register src1, Register src2) {
1639     bmi2l(kNone, 0xf5, dst, src2, src1);
1640   }
bzhil(Register dst,const Operand & src1,Register src2)1641   void bzhil(Register dst, const Operand& src1, Register src2) {
1642     bmi2l(kNone, 0xf5, dst, src2, src1);
1643   }
mulxq(Register dst1,Register dst2,Register src)1644   void mulxq(Register dst1, Register dst2, Register src) {
1645     bmi2q(kF2, 0xf6, dst1, dst2, src);
1646   }
mulxq(Register dst1,Register dst2,const Operand & src)1647   void mulxq(Register dst1, Register dst2, const Operand& src) {
1648     bmi2q(kF2, 0xf6, dst1, dst2, src);
1649   }
mulxl(Register dst1,Register dst2,Register src)1650   void mulxl(Register dst1, Register dst2, Register src) {
1651     bmi2l(kF2, 0xf6, dst1, dst2, src);
1652   }
mulxl(Register dst1,Register dst2,const Operand & src)1653   void mulxl(Register dst1, Register dst2, const Operand& src) {
1654     bmi2l(kF2, 0xf6, dst1, dst2, src);
1655   }
pdepq(Register dst,Register src1,Register src2)1656   void pdepq(Register dst, Register src1, Register src2) {
1657     bmi2q(kF2, 0xf5, dst, src1, src2);
1658   }
pdepq(Register dst,Register src1,const Operand & src2)1659   void pdepq(Register dst, Register src1, const Operand& src2) {
1660     bmi2q(kF2, 0xf5, dst, src1, src2);
1661   }
pdepl(Register dst,Register src1,Register src2)1662   void pdepl(Register dst, Register src1, Register src2) {
1663     bmi2l(kF2, 0xf5, dst, src1, src2);
1664   }
pdepl(Register dst,Register src1,const Operand & src2)1665   void pdepl(Register dst, Register src1, const Operand& src2) {
1666     bmi2l(kF2, 0xf5, dst, src1, src2);
1667   }
pextq(Register dst,Register src1,Register src2)1668   void pextq(Register dst, Register src1, Register src2) {
1669     bmi2q(kF3, 0xf5, dst, src1, src2);
1670   }
pextq(Register dst,Register src1,const Operand & src2)1671   void pextq(Register dst, Register src1, const Operand& src2) {
1672     bmi2q(kF3, 0xf5, dst, src1, src2);
1673   }
pextl(Register dst,Register src1,Register src2)1674   void pextl(Register dst, Register src1, Register src2) {
1675     bmi2l(kF3, 0xf5, dst, src1, src2);
1676   }
pextl(Register dst,Register src1,const Operand & src2)1677   void pextl(Register dst, Register src1, const Operand& src2) {
1678     bmi2l(kF3, 0xf5, dst, src1, src2);
1679   }
sarxq(Register dst,Register src1,Register src2)1680   void sarxq(Register dst, Register src1, Register src2) {
1681     bmi2q(kF3, 0xf7, dst, src2, src1);
1682   }
sarxq(Register dst,const Operand & src1,Register src2)1683   void sarxq(Register dst, const Operand& src1, Register src2) {
1684     bmi2q(kF3, 0xf7, dst, src2, src1);
1685   }
sarxl(Register dst,Register src1,Register src2)1686   void sarxl(Register dst, Register src1, Register src2) {
1687     bmi2l(kF3, 0xf7, dst, src2, src1);
1688   }
sarxl(Register dst,const Operand & src1,Register src2)1689   void sarxl(Register dst, const Operand& src1, Register src2) {
1690     bmi2l(kF3, 0xf7, dst, src2, src1);
1691   }
shlxq(Register dst,Register src1,Register src2)1692   void shlxq(Register dst, Register src1, Register src2) {
1693     bmi2q(k66, 0xf7, dst, src2, src1);
1694   }
shlxq(Register dst,const Operand & src1,Register src2)1695   void shlxq(Register dst, const Operand& src1, Register src2) {
1696     bmi2q(k66, 0xf7, dst, src2, src1);
1697   }
shlxl(Register dst,Register src1,Register src2)1698   void shlxl(Register dst, Register src1, Register src2) {
1699     bmi2l(k66, 0xf7, dst, src2, src1);
1700   }
shlxl(Register dst,const Operand & src1,Register src2)1701   void shlxl(Register dst, const Operand& src1, Register src2) {
1702     bmi2l(k66, 0xf7, dst, src2, src1);
1703   }
shrxq(Register dst,Register src1,Register src2)1704   void shrxq(Register dst, Register src1, Register src2) {
1705     bmi2q(kF2, 0xf7, dst, src2, src1);
1706   }
shrxq(Register dst,const Operand & src1,Register src2)1707   void shrxq(Register dst, const Operand& src1, Register src2) {
1708     bmi2q(kF2, 0xf7, dst, src2, src1);
1709   }
shrxl(Register dst,Register src1,Register src2)1710   void shrxl(Register dst, Register src1, Register src2) {
1711     bmi2l(kF2, 0xf7, dst, src2, src1);
1712   }
shrxl(Register dst,const Operand & src1,Register src2)1713   void shrxl(Register dst, const Operand& src1, Register src2) {
1714     bmi2l(kF2, 0xf7, dst, src2, src1);
1715   }
1716   void rorxq(Register dst, Register src, byte imm8);
1717   void rorxq(Register dst, const Operand& src, byte imm8);
1718   void rorxl(Register dst, Register src, byte imm8);
1719   void rorxl(Register dst, const Operand& src, byte imm8);
1720 
1721   // Check the code size generated from label to here.
SizeOfCodeGeneratedSince(Label * label)1722   int SizeOfCodeGeneratedSince(Label* label) {
1723     return pc_offset() - label->pos();
1724   }
1725 
1726   // Mark generator continuation.
1727   void RecordGeneratorContinuation();
1728 
1729   // Mark address of a debug break slot.
1730   void RecordDebugBreakSlot(RelocInfo::Mode mode);
1731 
1732   // Record a comment relocation entry that can be used by a disassembler.
1733   // Use --code-comments to enable.
1734   void RecordComment(const char* msg);
1735 
1736   // Record a deoptimization reason that can be used by a log or cpu profiler.
1737   // Use --trace-deopt to enable.
1738   void RecordDeoptReason(const int reason, int raw_position, int id);
1739 
PatchConstantPoolAccessInstruction(int pc_offset,int offset,ConstantPoolEntry::Access access,ConstantPoolEntry::Type type)1740   void PatchConstantPoolAccessInstruction(int pc_offset, int offset,
1741                                           ConstantPoolEntry::Access access,
1742                                           ConstantPoolEntry::Type type) {
1743     // No embedded constant pool support.
1744     UNREACHABLE();
1745   }
1746 
1747   // Writes a single word of data in the code stream.
1748   // Used for inline tables, e.g., jump-tables.
1749   void db(uint8_t data);
1750   void dd(uint32_t data);
1751   void dq(uint64_t data);
dp(uintptr_t data)1752   void dp(uintptr_t data) { dq(data); }
1753   void dq(Label* label);
1754 
positions_recorder()1755   AssemblerPositionsRecorder* positions_recorder() {
1756     return &positions_recorder_;
1757   }
1758 
1759   // Check if there is less than kGap bytes available in the buffer.
1760   // If this is the case, we need to grow the buffer before emitting
1761   // an instruction or relocation information.
buffer_overflow()1762   inline bool buffer_overflow() const {
1763     return pc_ >= reloc_info_writer.pos() - kGap;
1764   }
1765 
1766   // Get the number of bytes available in the buffer.
available_space()1767   inline int available_space() const {
1768     return static_cast<int>(reloc_info_writer.pos() - pc_);
1769   }
1770 
1771   static bool IsNop(Address addr);
1772 
1773   // Avoid overflows for displacements etc.
1774   static const int kMaximalBufferSize = 512*MB;
1775 
byte_at(int pos)1776   byte byte_at(int pos)  { return buffer_[pos]; }
set_byte_at(int pos,byte value)1777   void set_byte_at(int pos, byte value) { buffer_[pos] = value; }
1778 
1779  protected:
1780   // Call near indirect
1781   void call(const Operand& operand);
1782 
1783  private:
addr_at(int pos)1784   byte* addr_at(int pos)  { return buffer_ + pos; }
long_at(int pos)1785   uint32_t long_at(int pos)  {
1786     return *reinterpret_cast<uint32_t*>(addr_at(pos));
1787   }
long_at_put(int pos,uint32_t x)1788   void long_at_put(int pos, uint32_t x)  {
1789     *reinterpret_cast<uint32_t*>(addr_at(pos)) = x;
1790   }
1791 
1792   // code emission
1793   void GrowBuffer();
1794 
emit(byte x)1795   void emit(byte x) { *pc_++ = x; }
1796   inline void emitl(uint32_t x);
1797   inline void emitp(void* x, RelocInfo::Mode rmode);
1798   inline void emitq(uint64_t x);
1799   inline void emitw(uint16_t x);
1800   inline void emit_code_target(Handle<Code> target,
1801                                RelocInfo::Mode rmode,
1802                                TypeFeedbackId ast_id = TypeFeedbackId::None());
1803   inline void emit_runtime_entry(Address entry, RelocInfo::Mode rmode);
emit(Immediate x)1804   void emit(Immediate x) {
1805     if (!RelocInfo::IsNone(x.rmode_)) {
1806       RecordRelocInfo(x.rmode_);
1807     }
1808     emitl(x.value_);
1809   }
1810 
1811   // Emits a REX prefix that encodes a 64-bit operand size and
1812   // the top bit of both register codes.
1813   // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1814   // REX.W is set.
1815   inline void emit_rex_64(XMMRegister reg, Register rm_reg);
1816   inline void emit_rex_64(Register reg, XMMRegister rm_reg);
1817   inline void emit_rex_64(Register reg, Register rm_reg);
1818 
1819   // Emits a REX prefix that encodes a 64-bit operand size and
1820   // the top bit of the destination, index, and base register codes.
1821   // The high bit of reg is used for REX.R, the high bit of op's base
1822   // register is used for REX.B, and the high bit of op's index register
1823   // is used for REX.X.  REX.W is set.
1824   inline void emit_rex_64(Register reg, const Operand& op);
1825   inline void emit_rex_64(XMMRegister reg, const Operand& op);
1826 
1827   // Emits a REX prefix that encodes a 64-bit operand size and
1828   // the top bit of the register code.
1829   // The high bit of register is used for REX.B.
1830   // REX.W is set and REX.R and REX.X are clear.
1831   inline void emit_rex_64(Register rm_reg);
1832 
1833   // Emits a REX prefix that encodes a 64-bit operand size and
1834   // the top bit of the index and base register codes.
1835   // The high bit of op's base register is used for REX.B, and the high
1836   // bit of op's index register is used for REX.X.
1837   // REX.W is set and REX.R clear.
1838   inline void emit_rex_64(const Operand& op);
1839 
1840   // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size.
emit_rex_64()1841   void emit_rex_64() { emit(0x48); }
1842 
1843   // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1844   // REX.W is clear.
1845   inline void emit_rex_32(Register reg, Register rm_reg);
1846 
1847   // The high bit of reg is used for REX.R, the high bit of op's base
1848   // register is used for REX.B, and the high bit of op's index register
1849   // is used for REX.X.  REX.W is cleared.
1850   inline void emit_rex_32(Register reg, const Operand& op);
1851 
1852   // High bit of rm_reg goes to REX.B.
1853   // REX.W, REX.R and REX.X are clear.
1854   inline void emit_rex_32(Register rm_reg);
1855 
1856   // High bit of base goes to REX.B and high bit of index to REX.X.
1857   // REX.W and REX.R are clear.
1858   inline void emit_rex_32(const Operand& op);
1859 
1860   // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1861   // REX.W is cleared.  If no REX bits are set, no byte is emitted.
1862   inline void emit_optional_rex_32(Register reg, Register rm_reg);
1863 
1864   // The high bit of reg is used for REX.R, the high bit of op's base
1865   // register is used for REX.B, and the high bit of op's index register
1866   // is used for REX.X.  REX.W is cleared.  If no REX bits are set, nothing
1867   // is emitted.
1868   inline void emit_optional_rex_32(Register reg, const Operand& op);
1869 
1870   // As for emit_optional_rex_32(Register, Register), except that
1871   // the registers are XMM registers.
1872   inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base);
1873 
1874   // As for emit_optional_rex_32(Register, Register), except that
1875   // one of the registers is an XMM registers.
1876   inline void emit_optional_rex_32(XMMRegister reg, Register base);
1877 
1878   // As for emit_optional_rex_32(Register, Register), except that
1879   // one of the registers is an XMM registers.
1880   inline void emit_optional_rex_32(Register reg, XMMRegister base);
1881 
1882   // As for emit_optional_rex_32(Register, const Operand&), except that
1883   // the register is an XMM register.
1884   inline void emit_optional_rex_32(XMMRegister reg, const Operand& op);
1885 
1886   // Optionally do as emit_rex_32(Register) if the register number has
1887   // the high bit set.
1888   inline void emit_optional_rex_32(Register rm_reg);
1889   inline void emit_optional_rex_32(XMMRegister rm_reg);
1890 
1891   // Optionally do as emit_rex_32(const Operand&) if the operand register
1892   // numbers have a high bit set.
1893   inline void emit_optional_rex_32(const Operand& op);
1894 
emit_rex(int size)1895   void emit_rex(int size) {
1896     if (size == kInt64Size) {
1897       emit_rex_64();
1898     } else {
1899       DCHECK(size == kInt32Size);
1900     }
1901   }
1902 
1903   template<class P1>
emit_rex(P1 p1,int size)1904   void emit_rex(P1 p1, int size) {
1905     if (size == kInt64Size) {
1906       emit_rex_64(p1);
1907     } else {
1908       DCHECK(size == kInt32Size);
1909       emit_optional_rex_32(p1);
1910     }
1911   }
1912 
1913   template<class P1, class P2>
emit_rex(P1 p1,P2 p2,int size)1914   void emit_rex(P1 p1, P2 p2, int size) {
1915     if (size == kInt64Size) {
1916       emit_rex_64(p1, p2);
1917     } else {
1918       DCHECK(size == kInt32Size);
1919       emit_optional_rex_32(p1, p2);
1920     }
1921   }
1922 
1923   // Emit vex prefix
emit_vex2_byte0()1924   void emit_vex2_byte0() { emit(0xc5); }
1925   inline void emit_vex2_byte1(XMMRegister reg, XMMRegister v, VectorLength l,
1926                               SIMDPrefix pp);
emit_vex3_byte0()1927   void emit_vex3_byte0() { emit(0xc4); }
1928   inline void emit_vex3_byte1(XMMRegister reg, XMMRegister rm, LeadingOpcode m);
1929   inline void emit_vex3_byte1(XMMRegister reg, const Operand& rm,
1930                               LeadingOpcode m);
1931   inline void emit_vex3_byte2(VexW w, XMMRegister v, VectorLength l,
1932                               SIMDPrefix pp);
1933   inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, XMMRegister rm,
1934                               VectorLength l, SIMDPrefix pp, LeadingOpcode m,
1935                               VexW w);
1936   inline void emit_vex_prefix(Register reg, Register v, Register rm,
1937                               VectorLength l, SIMDPrefix pp, LeadingOpcode m,
1938                               VexW w);
1939   inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, const Operand& rm,
1940                               VectorLength l, SIMDPrefix pp, LeadingOpcode m,
1941                               VexW w);
1942   inline void emit_vex_prefix(Register reg, Register v, const Operand& rm,
1943                               VectorLength l, SIMDPrefix pp, LeadingOpcode m,
1944                               VexW w);
1945 
1946   // Emit the ModR/M byte, and optionally the SIB byte and
1947   // 1- or 4-byte offset for a memory operand.  Also encodes
1948   // the second operand of the operation, a register or operation
1949   // subcode, into the reg field of the ModR/M byte.
emit_operand(Register reg,const Operand & adr)1950   void emit_operand(Register reg, const Operand& adr) {
1951     emit_operand(reg.low_bits(), adr);
1952   }
1953 
1954   // Emit the ModR/M byte, and optionally the SIB byte and
1955   // 1- or 4-byte offset for a memory operand.  Also used to encode
1956   // a three-bit opcode extension into the ModR/M byte.
1957   void emit_operand(int rm, const Operand& adr);
1958 
1959   // Emit a ModR/M byte with registers coded in the reg and rm_reg fields.
emit_modrm(Register reg,Register rm_reg)1960   void emit_modrm(Register reg, Register rm_reg) {
1961     emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits());
1962   }
1963 
1964   // Emit a ModR/M byte with an operation subcode in the reg field and
1965   // a register in the rm_reg field.
emit_modrm(int code,Register rm_reg)1966   void emit_modrm(int code, Register rm_reg) {
1967     DCHECK(is_uint3(code));
1968     emit(0xC0 | code << 3 | rm_reg.low_bits());
1969   }
1970 
1971   // Emit the code-object-relative offset of the label's position
1972   inline void emit_code_relative_offset(Label* label);
1973 
1974   // The first argument is the reg field, the second argument is the r/m field.
1975   void emit_sse_operand(XMMRegister dst, XMMRegister src);
1976   void emit_sse_operand(XMMRegister reg, const Operand& adr);
1977   void emit_sse_operand(Register reg, const Operand& adr);
1978   void emit_sse_operand(XMMRegister dst, Register src);
1979   void emit_sse_operand(Register dst, XMMRegister src);
1980   void emit_sse_operand(XMMRegister dst);
1981 
1982   // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
1983   // AND, OR, XOR, or CMP.  The encodings of these operations are all
1984   // similar, differing just in the opcode or in the reg field of the
1985   // ModR/M byte.
1986   void arithmetic_op_8(byte opcode, Register reg, Register rm_reg);
1987   void arithmetic_op_8(byte opcode, Register reg, const Operand& rm_reg);
1988   void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
1989   void arithmetic_op_16(byte opcode, Register reg, const Operand& rm_reg);
1990   // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
1991   void arithmetic_op(byte opcode, Register reg, Register rm_reg, int size);
1992   void arithmetic_op(byte opcode,
1993                      Register reg,
1994                      const Operand& rm_reg,
1995                      int size);
1996   // Operate on a byte in memory or register.
1997   void immediate_arithmetic_op_8(byte subcode,
1998                                  Register dst,
1999                                  Immediate src);
2000   void immediate_arithmetic_op_8(byte subcode,
2001                                  const Operand& dst,
2002                                  Immediate src);
2003   // Operate on a word in memory or register.
2004   void immediate_arithmetic_op_16(byte subcode,
2005                                   Register dst,
2006                                   Immediate src);
2007   void immediate_arithmetic_op_16(byte subcode,
2008                                   const Operand& dst,
2009                                   Immediate src);
2010   // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2011   void immediate_arithmetic_op(byte subcode,
2012                                Register dst,
2013                                Immediate src,
2014                                int size);
2015   void immediate_arithmetic_op(byte subcode,
2016                                const Operand& dst,
2017                                Immediate src,
2018                                int size);
2019 
2020   // Emit machine code for a shift operation.
2021   void shift(Operand dst, Immediate shift_amount, int subcode, int size);
2022   void shift(Register dst, Immediate shift_amount, int subcode, int size);
2023   // Shift dst by cl % 64 bits.
2024   void shift(Register dst, int subcode, int size);
2025   void shift(Operand dst, int subcode, int size);
2026 
2027   void emit_farith(int b1, int b2, int i);
2028 
2029   // labels
2030   // void print(Label* L);
2031   void bind_to(Label* L, int pos);
2032 
2033   // record reloc info for current pc_
2034   void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
2035 
2036   // Arithmetics
emit_add(Register dst,Register src,int size)2037   void emit_add(Register dst, Register src, int size) {
2038     arithmetic_op(0x03, dst, src, size);
2039   }
2040 
emit_add(Register dst,Immediate src,int size)2041   void emit_add(Register dst, Immediate src, int size) {
2042     immediate_arithmetic_op(0x0, dst, src, size);
2043   }
2044 
emit_add(Register dst,const Operand & src,int size)2045   void emit_add(Register dst, const Operand& src, int size) {
2046     arithmetic_op(0x03, dst, src, size);
2047   }
2048 
emit_add(const Operand & dst,Register src,int size)2049   void emit_add(const Operand& dst, Register src, int size) {
2050     arithmetic_op(0x1, src, dst, size);
2051   }
2052 
emit_add(const Operand & dst,Immediate src,int size)2053   void emit_add(const Operand& dst, Immediate src, int size) {
2054     immediate_arithmetic_op(0x0, dst, src, size);
2055   }
2056 
emit_and(Register dst,Register src,int size)2057   void emit_and(Register dst, Register src, int size) {
2058     arithmetic_op(0x23, dst, src, size);
2059   }
2060 
emit_and(Register dst,const Operand & src,int size)2061   void emit_and(Register dst, const Operand& src, int size) {
2062     arithmetic_op(0x23, dst, src, size);
2063   }
2064 
emit_and(const Operand & dst,Register src,int size)2065   void emit_and(const Operand& dst, Register src, int size) {
2066     arithmetic_op(0x21, src, dst, size);
2067   }
2068 
emit_and(Register dst,Immediate src,int size)2069   void emit_and(Register dst, Immediate src, int size) {
2070     immediate_arithmetic_op(0x4, dst, src, size);
2071   }
2072 
emit_and(const Operand & dst,Immediate src,int size)2073   void emit_and(const Operand& dst, Immediate src, int size) {
2074     immediate_arithmetic_op(0x4, dst, src, size);
2075   }
2076 
emit_cmp(Register dst,Register src,int size)2077   void emit_cmp(Register dst, Register src, int size) {
2078     arithmetic_op(0x3B, dst, src, size);
2079   }
2080 
emit_cmp(Register dst,const Operand & src,int size)2081   void emit_cmp(Register dst, const Operand& src, int size) {
2082     arithmetic_op(0x3B, dst, src, size);
2083   }
2084 
emit_cmp(const Operand & dst,Register src,int size)2085   void emit_cmp(const Operand& dst, Register src, int size) {
2086     arithmetic_op(0x39, src, dst, size);
2087   }
2088 
emit_cmp(Register dst,Immediate src,int size)2089   void emit_cmp(Register dst, Immediate src, int size) {
2090     immediate_arithmetic_op(0x7, dst, src, size);
2091   }
2092 
emit_cmp(const Operand & dst,Immediate src,int size)2093   void emit_cmp(const Operand& dst, Immediate src, int size) {
2094     immediate_arithmetic_op(0x7, dst, src, size);
2095   }
2096 
2097   // Compare {al,ax,eax,rax} with src.  If equal, set ZF and write dst into
2098   // src. Otherwise clear ZF and write src into {al,ax,eax,rax}.  This
2099   // operation is only atomic if prefixed by the lock instruction.
2100   void emit_cmpxchg(const Operand& dst, Register src, int size);
2101 
2102   void emit_dec(Register dst, int size);
2103   void emit_dec(const Operand& dst, int size);
2104 
2105   // Divide rdx:rax by src.  Quotient in rax, remainder in rdx when size is 64.
2106   // Divide edx:eax by lower 32 bits of src.  Quotient in eax, remainder in edx
2107   // when size is 32.
2108   void emit_idiv(Register src, int size);
2109   void emit_div(Register src, int size);
2110 
2111   // Signed multiply instructions.
2112   // rdx:rax = rax * src when size is 64 or edx:eax = eax * src when size is 32.
2113   void emit_imul(Register src, int size);
2114   void emit_imul(const Operand& src, int size);
2115   void emit_imul(Register dst, Register src, int size);
2116   void emit_imul(Register dst, const Operand& src, int size);
2117   void emit_imul(Register dst, Register src, Immediate imm, int size);
2118   void emit_imul(Register dst, const Operand& src, Immediate imm, int size);
2119 
2120   void emit_inc(Register dst, int size);
2121   void emit_inc(const Operand& dst, int size);
2122 
2123   void emit_lea(Register dst, const Operand& src, int size);
2124 
2125   void emit_mov(Register dst, const Operand& src, int size);
2126   void emit_mov(Register dst, Register src, int size);
2127   void emit_mov(const Operand& dst, Register src, int size);
2128   void emit_mov(Register dst, Immediate value, int size);
2129   void emit_mov(const Operand& dst, Immediate value, int size);
2130 
2131   void emit_movzxb(Register dst, const Operand& src, int size);
2132   void emit_movzxb(Register dst, Register src, int size);
2133   void emit_movzxw(Register dst, const Operand& src, int size);
2134   void emit_movzxw(Register dst, Register src, int size);
2135 
2136   void emit_neg(Register dst, int size);
2137   void emit_neg(const Operand& dst, int size);
2138 
2139   void emit_not(Register dst, int size);
2140   void emit_not(const Operand& dst, int size);
2141 
emit_or(Register dst,Register src,int size)2142   void emit_or(Register dst, Register src, int size) {
2143     arithmetic_op(0x0B, dst, src, size);
2144   }
2145 
emit_or(Register dst,const Operand & src,int size)2146   void emit_or(Register dst, const Operand& src, int size) {
2147     arithmetic_op(0x0B, dst, src, size);
2148   }
2149 
emit_or(const Operand & dst,Register src,int size)2150   void emit_or(const Operand& dst, Register src, int size) {
2151     arithmetic_op(0x9, src, dst, size);
2152   }
2153 
emit_or(Register dst,Immediate src,int size)2154   void emit_or(Register dst, Immediate src, int size) {
2155     immediate_arithmetic_op(0x1, dst, src, size);
2156   }
2157 
emit_or(const Operand & dst,Immediate src,int size)2158   void emit_or(const Operand& dst, Immediate src, int size) {
2159     immediate_arithmetic_op(0x1, dst, src, size);
2160   }
2161 
2162   void emit_repmovs(int size);
2163 
emit_sbb(Register dst,Register src,int size)2164   void emit_sbb(Register dst, Register src, int size) {
2165     arithmetic_op(0x1b, dst, src, size);
2166   }
2167 
emit_sub(Register dst,Register src,int size)2168   void emit_sub(Register dst, Register src, int size) {
2169     arithmetic_op(0x2B, dst, src, size);
2170   }
2171 
emit_sub(Register dst,Immediate src,int size)2172   void emit_sub(Register dst, Immediate src, int size) {
2173     immediate_arithmetic_op(0x5, dst, src, size);
2174   }
2175 
emit_sub(Register dst,const Operand & src,int size)2176   void emit_sub(Register dst, const Operand& src, int size) {
2177     arithmetic_op(0x2B, dst, src, size);
2178   }
2179 
emit_sub(const Operand & dst,Register src,int size)2180   void emit_sub(const Operand& dst, Register src, int size) {
2181     arithmetic_op(0x29, src, dst, size);
2182   }
2183 
emit_sub(const Operand & dst,Immediate src,int size)2184   void emit_sub(const Operand& dst, Immediate src, int size) {
2185     immediate_arithmetic_op(0x5, dst, src, size);
2186   }
2187 
2188   void emit_test(Register dst, Register src, int size);
2189   void emit_test(Register reg, Immediate mask, int size);
2190   void emit_test(const Operand& op, Register reg, int size);
2191   void emit_test(const Operand& op, Immediate mask, int size);
emit_test(Register reg,const Operand & op,int size)2192   void emit_test(Register reg, const Operand& op, int size) {
2193     return emit_test(op, reg, size);
2194   }
2195 
2196   void emit_xchg(Register dst, Register src, int size);
2197   void emit_xchg(Register dst, const Operand& src, int size);
2198 
emit_xor(Register dst,Register src,int size)2199   void emit_xor(Register dst, Register src, int size) {
2200     if (size == kInt64Size && dst.code() == src.code()) {
2201     // 32 bit operations zero the top 32 bits of 64 bit registers. Therefore
2202     // there is no need to make this a 64 bit operation.
2203       arithmetic_op(0x33, dst, src, kInt32Size);
2204     } else {
2205       arithmetic_op(0x33, dst, src, size);
2206     }
2207   }
2208 
emit_xor(Register dst,const Operand & src,int size)2209   void emit_xor(Register dst, const Operand& src, int size) {
2210     arithmetic_op(0x33, dst, src, size);
2211   }
2212 
emit_xor(Register dst,Immediate src,int size)2213   void emit_xor(Register dst, Immediate src, int size) {
2214     immediate_arithmetic_op(0x6, dst, src, size);
2215   }
2216 
emit_xor(const Operand & dst,Immediate src,int size)2217   void emit_xor(const Operand& dst, Immediate src, int size) {
2218     immediate_arithmetic_op(0x6, dst, src, size);
2219   }
2220 
emit_xor(const Operand & dst,Register src,int size)2221   void emit_xor(const Operand& dst, Register src, int size) {
2222     arithmetic_op(0x31, src, dst, size);
2223   }
2224 
2225   // Most BMI instructions are similiar.
2226   void bmi1q(byte op, Register reg, Register vreg, Register rm);
2227   void bmi1q(byte op, Register reg, Register vreg, const Operand& rm);
2228   void bmi1l(byte op, Register reg, Register vreg, Register rm);
2229   void bmi1l(byte op, Register reg, Register vreg, const Operand& rm);
2230   void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2231   void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg,
2232              const Operand& rm);
2233   void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2234   void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg,
2235              const Operand& rm);
2236 
2237   friend class CodePatcher;
2238   friend class EnsureSpace;
2239   friend class RegExpMacroAssemblerX64;
2240 
2241   // code generation
2242   RelocInfoWriter reloc_info_writer;
2243 
2244   // Internal reference positions, required for (potential) patching in
2245   // GrowBuffer(); contains only those internal references whose labels
2246   // are already bound.
2247   std::deque<int> internal_reference_positions_;
2248 
2249   List< Handle<Code> > code_targets_;
2250 
2251   AssemblerPositionsRecorder positions_recorder_;
2252   friend class AssemblerPositionsRecorder;
2253 };
2254 
2255 
2256 // Helper class that ensures that there is enough space for generating
2257 // instructions and relocation information.  The constructor makes
2258 // sure that there is enough space and (in debug mode) the destructor
2259 // checks that we did not generate too much.
2260 class EnsureSpace BASE_EMBEDDED {
2261  public:
EnsureSpace(Assembler * assembler)2262   explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
2263     if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
2264 #ifdef DEBUG
2265     space_before_ = assembler_->available_space();
2266 #endif
2267   }
2268 
2269 #ifdef DEBUG
~EnsureSpace()2270   ~EnsureSpace() {
2271     int bytes_generated = space_before_ - assembler_->available_space();
2272     DCHECK(bytes_generated < assembler_->kGap);
2273   }
2274 #endif
2275 
2276  private:
2277   Assembler* assembler_;
2278 #ifdef DEBUG
2279   int space_before_;
2280 #endif
2281 };
2282 
2283 }  // namespace internal
2284 }  // namespace v8
2285 
2286 #endif  // V8_X64_ASSEMBLER_X64_H_
2287