• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 //
11 // - Redistribution in binary form must reproduce the above copyright
12 // notice, this list of conditions and the following disclaimer in the
13 // documentation and/or other materials provided with the distribution.
14 //
15 // - Neither the name of Sun Microsystems or the names of contributors may
16 // be used to endorse or promote products derived from this software without
17 // specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // The original source code covered by the above license above has been
32 // modified significantly by Google Inc.
33 // Copyright 2012 the V8 project authors. All rights reserved.
34 
35 // A lightweight X64 Assembler.
36 
37 #ifndef V8_CODEGEN_X64_ASSEMBLER_X64_H_
38 #define V8_CODEGEN_X64_ASSEMBLER_X64_H_
39 
40 #include <deque>
41 #include <map>
42 #include <memory>
43 #include <vector>
44 
45 #include "src/base/export-template.h"
46 #include "src/codegen/assembler.h"
47 #include "src/codegen/cpu-features.h"
48 #include "src/codegen/label.h"
49 #include "src/codegen/x64/constants-x64.h"
50 #include "src/codegen/x64/fma-instr.h"
51 #include "src/codegen/x64/register-x64.h"
52 #include "src/codegen/x64/sse-instr.h"
53 #include "src/objects/smi.h"
54 #if defined(V8_OS_WIN_X64)
55 #include "src/diagnostics/unwinding-info-win64.h"
56 #endif
57 
58 namespace v8 {
59 namespace internal {
60 
61 class SafepointTableBuilder;
62 
63 // Utility functions
64 
65 enum Condition {
66   // any value < 0 is considered no_condition
67   no_condition = -1,
68 
69   overflow = 0,
70   no_overflow = 1,
71   below = 2,
72   above_equal = 3,
73   equal = 4,
74   not_equal = 5,
75   below_equal = 6,
76   above = 7,
77   negative = 8,
78   positive = 9,
79   parity_even = 10,
80   parity_odd = 11,
81   less = 12,
82   greater_equal = 13,
83   less_equal = 14,
84   greater = 15,
85 
86   // Fake conditions that are handled by the
87   // opcodes using them.
88   always = 16,
89   never = 17,
90   // aliases
91   carry = below,
92   not_carry = above_equal,
93   zero = equal,
94   not_zero = not_equal,
95   sign = negative,
96   not_sign = positive,
97   last_condition = greater
98 };
99 
100 // Returns the equivalent of !cc.
101 // Negation of the default no_condition (-1) results in a non-default
102 // no_condition value (-2). As long as tests for no_condition check
103 // for condition < 0, this will work as expected.
NegateCondition(Condition cc)104 inline Condition NegateCondition(Condition cc) {
105   return static_cast<Condition>(cc ^ 1);
106 }
107 
108 enum RoundingMode {
109   kRoundToNearest = 0x0,
110   kRoundDown = 0x1,
111   kRoundUp = 0x2,
112   kRoundToZero = 0x3
113 };
114 
115 // -----------------------------------------------------------------------------
116 // Machine instruction Immediates
117 
118 class Immediate {
119  public:
Immediate(int32_t value)120   explicit constexpr Immediate(int32_t value) : value_(value) {}
Immediate(int32_t value,RelocInfo::Mode rmode)121   explicit constexpr Immediate(int32_t value, RelocInfo::Mode rmode)
122       : value_(value), rmode_(rmode) {}
Immediate(Smi value)123   explicit Immediate(Smi value)
124       : value_(static_cast<int32_t>(static_cast<intptr_t>(value.ptr()))) {
125     DCHECK(SmiValuesAre31Bits());  // Only available for 31-bit SMI.
126   }
127 
value()128   int32_t value() const { return value_; }
rmode()129   RelocInfo::Mode rmode() const { return rmode_; }
130 
131  private:
132   const int32_t value_;
133   const RelocInfo::Mode rmode_ = RelocInfo::NO_INFO;
134 
135   friend class Assembler;
136 };
137 ASSERT_TRIVIALLY_COPYABLE(Immediate);
138 static_assert(sizeof(Immediate) <= kSystemPointerSize,
139               "Immediate must be small enough to pass it by value");
140 
141 class Immediate64 {
142  public:
Immediate64(int64_t value)143   explicit constexpr Immediate64(int64_t value) : value_(value) {}
Immediate64(int64_t value,RelocInfo::Mode rmode)144   explicit constexpr Immediate64(int64_t value, RelocInfo::Mode rmode)
145       : value_(value), rmode_(rmode) {}
Immediate64(Address value,RelocInfo::Mode rmode)146   explicit constexpr Immediate64(Address value, RelocInfo::Mode rmode)
147       : value_(static_cast<int64_t>(value)), rmode_(rmode) {}
148 
149  private:
150   const int64_t value_;
151   const RelocInfo::Mode rmode_ = RelocInfo::NO_INFO;
152 
153   friend class Assembler;
154 };
155 
156 // -----------------------------------------------------------------------------
157 // Machine instruction Operands
158 
159 enum ScaleFactor : int8_t {
160   times_1 = 0,
161   times_2 = 1,
162   times_4 = 2,
163   times_8 = 3,
164   times_int_size = times_4,
165 
166   times_half_system_pointer_size = times_4,
167   times_system_pointer_size = times_8,
168   times_tagged_size = (kTaggedSize == 8) ? times_8 : times_4,
169 };
170 
171 class V8_EXPORT_PRIVATE Operand {
172  public:
173   struct Data {
174     byte rex = 0;
175     byte buf[9];
176     byte len = 1;   // number of bytes of buf_ in use.
177     int8_t addend;  // for rip + offset + addend.
178   };
179 
180   // [base + disp/r]
Operand(Register base,int32_t disp)181   V8_INLINE Operand(Register base, int32_t disp) {
182     if (base == rsp || base == r12) {
183       // SIB byte is needed to encode (rsp + offset) or (r12 + offset).
184       set_sib(times_1, rsp, base);
185     }
186 
187     if (disp == 0 && base != rbp && base != r13) {
188       set_modrm(0, base);
189     } else if (is_int8(disp)) {
190       set_modrm(1, base);
191       set_disp8(disp);
192     } else {
193       set_modrm(2, base);
194       set_disp32(disp);
195     }
196   }
197 
198   // [base + index*scale + disp/r]
Operand(Register base,Register index,ScaleFactor scale,int32_t disp)199   V8_INLINE Operand(Register base, Register index, ScaleFactor scale,
200                     int32_t disp) {
201     DCHECK(index != rsp);
202     set_sib(scale, index, base);
203     if (disp == 0 && base != rbp && base != r13) {
204       // This call to set_modrm doesn't overwrite the REX.B (or REX.X) bits
205       // possibly set by set_sib.
206       set_modrm(0, rsp);
207     } else if (is_int8(disp)) {
208       set_modrm(1, rsp);
209       set_disp8(disp);
210     } else {
211       set_modrm(2, rsp);
212       set_disp32(disp);
213     }
214   }
215 
216   // [index*scale + disp/r]
Operand(Register index,ScaleFactor scale,int32_t disp)217   V8_INLINE Operand(Register index, ScaleFactor scale, int32_t disp) {
218     DCHECK(index != rsp);
219     set_modrm(0, rsp);
220     set_sib(scale, index, rbp);
221     set_disp32(disp);
222   }
223 
224   // Offset from existing memory operand.
225   // Offset is added to existing displacement as 32-bit signed values and
226   // this must not overflow.
227   Operand(Operand base, int32_t offset);
228 
229   // [rip + disp/r]
230   V8_INLINE explicit Operand(Label* label, int addend = 0) {
231     data_.addend = addend;
232     DCHECK_NOT_NULL(label);
233     DCHECK(addend == 0 || (is_int8(addend) && label->is_bound()));
234     set_modrm(0, rbp);
235     set_disp64(reinterpret_cast<intptr_t>(label));
236   }
237 
238   Operand(const Operand&) V8_NOEXCEPT = default;
239   Operand& operator=(const Operand&) V8_NOEXCEPT = default;
240 
data()241   const Data& data() const { return data_; }
242 
243   // Checks whether either base or index register is the given register.
244   // Does not check the "reg" part of the Operand.
245   bool AddressUsesRegister(Register reg) const;
246 
247  private:
set_modrm(int mod,Register rm_reg)248   V8_INLINE void set_modrm(int mod, Register rm_reg) {
249     DCHECK(is_uint2(mod));
250     data_.buf[0] = mod << 6 | rm_reg.low_bits();
251     // Set REX.B to the high bit of rm.code().
252     data_.rex |= rm_reg.high_bit();
253   }
254 
set_sib(ScaleFactor scale,Register index,Register base)255   V8_INLINE void set_sib(ScaleFactor scale, Register index, Register base) {
256     DCHECK_EQ(data_.len, 1);
257     DCHECK(is_uint2(scale));
258     // Use SIB with no index register only for base rsp or r12. Otherwise we
259     // would skip the SIB byte entirely.
260     DCHECK(index != rsp || base == rsp || base == r12);
261     data_.buf[1] = (scale << 6) | (index.low_bits() << 3) | base.low_bits();
262     data_.rex |= index.high_bit() << 1 | base.high_bit();
263     data_.len = 2;
264   }
265 
set_disp8(int disp)266   V8_INLINE void set_disp8(int disp) {
267     DCHECK(is_int8(disp));
268     DCHECK(data_.len == 1 || data_.len == 2);
269     int8_t* p = reinterpret_cast<int8_t*>(&data_.buf[data_.len]);
270     *p = disp;
271     data_.len += sizeof(int8_t);
272   }
273 
set_disp32(int disp)274   V8_INLINE void set_disp32(int disp) {
275     DCHECK(data_.len == 1 || data_.len == 2);
276     Address p = reinterpret_cast<Address>(&data_.buf[data_.len]);
277     WriteUnalignedValue(p, disp);
278     data_.len += sizeof(int32_t);
279   }
280 
set_disp64(int64_t disp)281   V8_INLINE void set_disp64(int64_t disp) {
282     DCHECK_EQ(1, data_.len);
283     Address p = reinterpret_cast<Address>(&data_.buf[data_.len]);
284     WriteUnalignedValue(p, disp);
285     data_.len += sizeof(disp);
286   }
287 
288   Data data_;
289 };
290 ASSERT_TRIVIALLY_COPYABLE(Operand);
291 static_assert(sizeof(Operand) <= 2 * kSystemPointerSize,
292               "Operand must be small enough to pass it by value");
293 
294 #define ASSEMBLER_INSTRUCTION_LIST(V) \
295   V(add)                              \
296   V(and)                              \
297   V(cmp)                              \
298   V(cmpxchg)                          \
299   V(dec)                              \
300   V(idiv)                             \
301   V(div)                              \
302   V(imul)                             \
303   V(inc)                              \
304   V(lea)                              \
305   V(mov)                              \
306   V(movzxb)                           \
307   V(movzxw)                           \
308   V(not)                              \
309   V(or)                               \
310   V(repmovs)                          \
311   V(sbb)                              \
312   V(sub)                              \
313   V(test)                             \
314   V(xchg)                             \
315   V(xor)
316 
317 // Shift instructions on operands/registers with kInt32Size and kInt64Size.
318 #define SHIFT_INSTRUCTION_LIST(V) \
319   V(rol, 0x0)                     \
320   V(ror, 0x1)                     \
321   V(rcl, 0x2)                     \
322   V(rcr, 0x3)                     \
323   V(shl, 0x4)                     \
324   V(shr, 0x5)                     \
325   V(sar, 0x7)
326 
327 // Partial Constant Pool
328 // Different from complete constant pool (like arm does), partial constant pool
329 // only takes effects for shareable constants in order to reduce code size.
330 // Partial constant pool does not emit constant pool entries at the end of each
331 // code object. Instead, it keeps the first shareable constant inlined in the
332 // instructions and uses rip-relative memory loadings for the same constants in
333 // subsequent instructions. These rip-relative memory loadings will target at
334 // the position of the first inlined constant. For example:
335 //
336 //  REX.W movq r10,0x7f9f75a32c20   ; 10 bytes
337 //  …
338 //  REX.W movq r10,0x7f9f75a32c20   ; 10 bytes
339 //  …
340 //
341 // turns into
342 //
343 //  REX.W movq r10,0x7f9f75a32c20   ; 10 bytes
344 //  …
345 //  REX.W movq r10,[rip+0xffffff96] ; 7 bytes
346 //  …
347 
348 class ConstPool {
349  public:
ConstPool(Assembler * assm)350   explicit ConstPool(Assembler* assm) : assm_(assm) {}
351   // Returns true when partial constant pool is valid for this entry.
352   bool TryRecordEntry(intptr_t data, RelocInfo::Mode mode);
IsEmpty()353   bool IsEmpty() const { return entries_.empty(); }
354 
355   void PatchEntries();
356   // Discard any pending pool entries.
357   void Clear();
358 
359  private:
360   // Adds a shared entry to entries_. Returns true if this is not the first time
361   // we add this entry, false otherwise.
362   bool AddSharedEntry(uint64_t data, int offset);
363 
364   // Check if the instruction is a rip-relative move.
365   bool IsMoveRipRelative(Address instr);
366 
367   Assembler* assm_;
368 
369   // Values, pc offsets of entries.
370   using EntryMap = std::multimap<uint64_t, int>;
371   EntryMap entries_;
372 
373   // Number of bytes taken up by the displacement of rip-relative addressing.
374   static constexpr int kRipRelativeDispSize = 4;  // 32-bit displacement.
375   // Distance between the address of the displacement in the rip-relative move
376   // instruction and the head address of the instruction.
377   static constexpr int kMoveRipRelativeDispOffset =
378       3;  // REX Opcode ModRM Displacement
379   // Distance between the address of the imm64 in the 'movq reg, imm64'
380   // instruction and the head address of the instruction.
381   static constexpr int kMoveImm64Offset = 2;  // REX Opcode imm64
382   // A mask for rip-relative move instruction.
383   static constexpr uint32_t kMoveRipRelativeMask = 0x00C7FFFB;
384   // The bits for a rip-relative move instruction after mask.
385   static constexpr uint32_t kMoveRipRelativeInstr = 0x00058B48;
386 };
387 
388 class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
389  private:
390   // We check before assembling an instruction that there is sufficient
391   // space to write an instruction and its relocation information.
392   // The relocation writer's position must be kGap bytes above the end of
393   // the generated instructions. This leaves enough space for the
394   // longest possible x64 instruction, 15 bytes, and the longest possible
395   // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
396   // (There is a 15 byte limit on x64 instruction length that rules out some
397   // otherwise valid instructions.)
398   // This allows for a single, fast space check per instruction.
399   static constexpr int kGap = 32;
400   STATIC_ASSERT(AssemblerBase::kMinimalBufferSize >= 2 * kGap);
401 
402  public:
403   // Create an assembler. Instructions and relocation information are emitted
404   // into a buffer, with the instructions starting from the beginning and the
405   // relocation information starting from the end of the buffer. See CodeDesc
406   // for a detailed comment on the layout (globals.h).
407   //
408   // If the provided buffer is nullptr, the assembler allocates and grows its
409   // own buffer. Otherwise it takes ownership of the provided buffer.
410   explicit Assembler(const AssemblerOptions&,
411                      std::unique_ptr<AssemblerBuffer> = {});
412   ~Assembler() override = default;
413 
414   // GetCode emits any pending (non-emitted) code and fills the descriptor desc.
415   static constexpr int kNoHandlerTable = 0;
416   static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr;
417   void GetCode(Isolate* isolate, CodeDesc* desc,
418                SafepointTableBuilder* safepoint_table_builder,
419                int handler_table_offset);
420 
421   // Convenience wrapper for code without safepoint or handler tables.
GetCode(Isolate * isolate,CodeDesc * desc)422   void GetCode(Isolate* isolate, CodeDesc* desc) {
423     GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable);
424   }
425 
426   void FinalizeJumpOptimizationInfo();
427 
428   // Unused on this architecture.
MaybeEmitOutOfLineConstantPool()429   void MaybeEmitOutOfLineConstantPool() {}
430 
431   // Read/Modify the code target in the relative branch/call instruction at pc.
432   // On the x64 architecture, we use relative jumps with a 32-bit displacement
433   // to jump to other Code objects in the Code space in the heap.
434   // Jumps to C functions are done indirectly through a 64-bit register holding
435   // the absolute address of the target.
436   // These functions convert between absolute Addresses of Code objects and
437   // the relative displacements stored in the code.
438   // The isolate argument is unused (and may be nullptr) when skipping flushing.
439   static inline Address target_address_at(Address pc, Address constant_pool);
440   static inline void set_target_address_at(
441       Address pc, Address constant_pool, Address target,
442       ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
443   static inline int32_t relative_target_offset(Address target, Address pc);
444 
445   // This sets the branch destination (which is in the instruction on x64).
446   // This is for calls and branches within generated code.
447   inline static void deserialization_set_special_target_at(
448       Address instruction_payload, Code code, Address target);
449 
450   // Get the size of the special target encoded at 'instruction_payload'.
451   inline static int deserialization_special_target_size(
452       Address instruction_payload);
453 
454   // This sets the internal reference at the pc.
455   inline static void deserialization_set_target_internal_reference_at(
456       Address pc, Address target,
457       RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
458 
459   inline Handle<CodeT> code_target_object_handle_at(Address pc);
460   inline Handle<HeapObject> compressed_embedded_object_handle_at(Address pc);
461   inline Address runtime_entry_at(Address pc);
462 
463   // Number of bytes taken up by the branch target in the code.
464   static constexpr int kSpecialTargetSize = 4;  // 32-bit displacement.
465 
466   // One byte opcode for test eax,0xXXXXXXXX.
467   static constexpr byte kTestEaxByte = 0xA9;
468   // One byte opcode for test al, 0xXX.
469   static constexpr byte kTestAlByte = 0xA8;
470   // One byte opcode for nop.
471   static constexpr byte kNopByte = 0x90;
472 
473   // One byte prefix for a short conditional jump.
474   static constexpr byte kJccShortPrefix = 0x70;
475   static constexpr byte kJncShortOpcode = kJccShortPrefix | not_carry;
476   static constexpr byte kJcShortOpcode = kJccShortPrefix | carry;
477   static constexpr byte kJnzShortOpcode = kJccShortPrefix | not_zero;
478   static constexpr byte kJzShortOpcode = kJccShortPrefix | zero;
479 
480   // VEX prefix encodings.
481   enum SIMDPrefix { kNoPrefix = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
482   enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
483   enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
484   enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
485 
486   // ---------------------------------------------------------------------------
487   // Code generation
488   //
489   // Function names correspond one-to-one to x64 instruction mnemonics.
490   // Unless specified otherwise, instructions operate on 64-bit operands.
491   //
492   // If we need versions of an assembly instruction that operate on different
493   // width arguments, we add a single-letter suffix specifying the width.
494   // This is done for the following instructions: mov, cmp, inc, dec,
495   // add, sub, and test.
496   // There are no versions of these instructions without the suffix.
497   // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'.
498   // - Instructions on 16-bit (word) operands/registers have a trailing 'w'.
499   // - Instructions on 32-bit (doubleword) operands/registers use 'l'.
500   // - Instructions on 64-bit (quadword) operands/registers use 'q'.
501   // - Instructions on operands/registers with pointer size use 'p'.
502 
503 #define DECLARE_INSTRUCTION(instruction)    \
504   template <typename... Ps>                 \
505   void instruction##_tagged(Ps... ps) {     \
506     emit_##instruction(ps..., kTaggedSize); \
507   }                                         \
508                                             \
509   template <typename... Ps>                 \
510   void instruction##l(Ps... ps) {           \
511     emit_##instruction(ps..., kInt32Size);  \
512   }                                         \
513                                             \
514   template <typename... Ps>                 \
515   void instruction##q(Ps... ps) {           \
516     emit_##instruction(ps..., kInt64Size);  \
517   }
518   ASSEMBLER_INSTRUCTION_LIST(DECLARE_INSTRUCTION)
519 #undef DECLARE_INSTRUCTION
520 
521   // Insert the smallest number of nop instructions
522   // possible to align the pc offset to a multiple
523   // of m, where m must be a power of 2.
524   void Align(int m);
525   // Insert the smallest number of zero bytes possible to align the pc offset
526   // to a mulitple of m. m must be a power of 2 (>= 2).
527   void DataAlign(int m);
528   void Nop(int bytes = 1);
529   // Aligns code to something that's optimal for a jump target for the platform.
530   void CodeTargetAlign();
531   void LoopHeaderAlign();
532 
533   // Stack
534   void pushfq();
535   void popfq();
536 
537   void pushq(Immediate value);
538   // Push a 32 bit integer, and guarantee that it is actually pushed as a
539   // 32 bit value, the normal push will optimize the 8 bit case.
540   void pushq_imm32(int32_t imm32);
541   void pushq(Register src);
542   void pushq(Operand src);
543 
544   void popq(Register dst);
545   void popq(Operand dst);
546 
547   void incsspq(Register number_of_words);
548 
549   void leave();
550 
551   // Moves
552   void movb(Register dst, Operand src);
553   void movb(Register dst, Immediate imm);
554   void movb(Operand dst, Register src);
555   void movb(Operand dst, Immediate imm);
556 
557   // Move the low 16 bits of a 64-bit register value to a 16-bit
558   // memory location.
559   void movw(Register dst, Operand src);
560   void movw(Operand dst, Register src);
561   void movw(Operand dst, Immediate imm);
562 
563   // Move the offset of the label location relative to the current
564   // position (after the move) to the destination.
565   void movl(Operand dst, Label* src);
566 
567   // Load a heap number into a register.
568   // The heap number will not be allocated and embedded into the code right
569   // away. Instead, we emit the load of a dummy object. Later, when calling
570   // Assembler::GetCode, the heap number will be allocated and the code will be
571   // patched by replacing the dummy with the actual object. The RelocInfo for
572   // the embedded object gets already recorded correctly when emitting the dummy
573   // move.
574   void movq_heap_number(Register dst, double value);
575 
576   void movq_string(Register dst, const StringConstantBase* str);
577 
578   // Loads a 64-bit immediate into a register, potentially using the constant
579   // pool.
movq(Register dst,int64_t value)580   void movq(Register dst, int64_t value) { movq(dst, Immediate64(value)); }
movq(Register dst,uint64_t value)581   void movq(Register dst, uint64_t value) {
582     movq(dst, Immediate64(static_cast<int64_t>(value)));
583   }
584 
585   // Loads a 64-bit immediate into a register without using the constant pool.
586   void movq_imm64(Register dst, int64_t value);
587 
588   void movsxbl(Register dst, Register src);
589   void movsxbl(Register dst, Operand src);
590   void movsxbq(Register dst, Register src);
591   void movsxbq(Register dst, Operand src);
592   void movsxwl(Register dst, Register src);
593   void movsxwl(Register dst, Operand src);
594   void movsxwq(Register dst, Register src);
595   void movsxwq(Register dst, Operand src);
596   void movsxlq(Register dst, Register src);
597   void movsxlq(Register dst, Operand src);
598 
599   // Repeated moves.
600   void repmovsb();
601   void repmovsw();
repmovsl()602   void repmovsl() { emit_repmovs(kInt32Size); }
repmovsq()603   void repmovsq() { emit_repmovs(kInt64Size); }
604 
605   // Repeated store of doublewords (fill (E)CX bytes at ES:[(E)DI] with EAX).
606   void repstosl();
607   // Repeated store of quadwords (fill RCX quadwords at [RDI] with RAX).
608   void repstosq();
609 
610   // Instruction to load from an immediate 64-bit pointer into RAX.
611   void load_rax(Address value, RelocInfo::Mode rmode);
612   void load_rax(ExternalReference ext);
613 
614   // Conditional moves.
615   void cmovq(Condition cc, Register dst, Register src);
616   void cmovq(Condition cc, Register dst, Operand src);
617   void cmovl(Condition cc, Register dst, Register src);
618   void cmovl(Condition cc, Register dst, Operand src);
619 
cmpb(Register dst,Immediate src)620   void cmpb(Register dst, Immediate src) {
621     immediate_arithmetic_op_8(0x7, dst, src);
622   }
623 
624   void cmpb_al(Immediate src);
625 
cmpb(Register dst,Register src)626   void cmpb(Register dst, Register src) { arithmetic_op_8(0x3A, dst, src); }
627 
cmpb(Register dst,Operand src)628   void cmpb(Register dst, Operand src) { arithmetic_op_8(0x3A, dst, src); }
629 
cmpb(Operand dst,Register src)630   void cmpb(Operand dst, Register src) { arithmetic_op_8(0x38, src, dst); }
631 
cmpb(Operand dst,Immediate src)632   void cmpb(Operand dst, Immediate src) {
633     immediate_arithmetic_op_8(0x7, dst, src);
634   }
635 
cmpw(Operand dst,Immediate src)636   void cmpw(Operand dst, Immediate src) {
637     immediate_arithmetic_op_16(0x7, dst, src);
638   }
639 
cmpw(Register dst,Immediate src)640   void cmpw(Register dst, Immediate src) {
641     immediate_arithmetic_op_16(0x7, dst, src);
642   }
643 
cmpw(Register dst,Operand src)644   void cmpw(Register dst, Operand src) { arithmetic_op_16(0x3B, dst, src); }
645 
cmpw(Register dst,Register src)646   void cmpw(Register dst, Register src) { arithmetic_op_16(0x3B, dst, src); }
647 
cmpw(Operand dst,Register src)648   void cmpw(Operand dst, Register src) { arithmetic_op_16(0x39, src, dst); }
649 
testb(Register reg,Operand op)650   void testb(Register reg, Operand op) { testb(op, reg); }
651 
testw(Register reg,Operand op)652   void testw(Register reg, Operand op) { testw(op, reg); }
653 
andb(Register dst,Immediate src)654   void andb(Register dst, Immediate src) {
655     immediate_arithmetic_op_8(0x4, dst, src);
656   }
657 
658   void decb(Register dst);
659   void decb(Operand dst);
660 
661   // Lock prefix.
662   void lock();
663 
664   void xchgb(Register reg, Operand op);
665   void xchgw(Register reg, Operand op);
666 
667   void xaddb(Operand dst, Register src);
668   void xaddw(Operand dst, Register src);
669   void xaddl(Operand dst, Register src);
670   void xaddq(Operand dst, Register src);
671 
672   void negb(Register reg);
673   void negw(Register reg);
674   void negl(Register reg);
675   void negq(Register reg);
676   void negb(Operand op);
677   void negw(Operand op);
678   void negl(Operand op);
679   void negq(Operand op);
680 
681   void cmpxchgb(Operand dst, Register src);
682   void cmpxchgw(Operand dst, Register src);
683 
684   // Sign-extends rax into rdx:rax.
685   void cqo();
686   // Sign-extends eax into edx:eax.
687   void cdq();
688 
689   // Multiply eax by src, put the result in edx:eax.
690   void mull(Register src);
691   void mull(Operand src);
692   // Multiply rax by src, put the result in rdx:rax.
693   void mulq(Register src);
694 
695 #define DECLARE_SHIFT_INSTRUCTION(instruction, subcode)                     \
696   void instruction##l(Register dst, Immediate imm8) {                       \
697     shift(dst, imm8, subcode, kInt32Size);                                  \
698   }                                                                         \
699                                                                             \
700   void instruction##q(Register dst, Immediate imm8) {                       \
701     shift(dst, imm8, subcode, kInt64Size);                                  \
702   }                                                                         \
703                                                                             \
704   void instruction##l(Operand dst, Immediate imm8) {                        \
705     shift(dst, imm8, subcode, kInt32Size);                                  \
706   }                                                                         \
707                                                                             \
708   void instruction##q(Operand dst, Immediate imm8) {                        \
709     shift(dst, imm8, subcode, kInt64Size);                                  \
710   }                                                                         \
711                                                                             \
712   void instruction##l_cl(Register dst) { shift(dst, subcode, kInt32Size); } \
713                                                                             \
714   void instruction##q_cl(Register dst) { shift(dst, subcode, kInt64Size); } \
715                                                                             \
716   void instruction##l_cl(Operand dst) { shift(dst, subcode, kInt32Size); }  \
717                                                                             \
718   void instruction##q_cl(Operand dst) { shift(dst, subcode, kInt64Size); }
719   SHIFT_INSTRUCTION_LIST(DECLARE_SHIFT_INSTRUCTION)
720 #undef DECLARE_SHIFT_INSTRUCTION
721 
722   // Shifts dst:src left by cl bits, affecting only dst.
723   void shld(Register dst, Register src);
724 
725   // Shifts src:dst right by cl bits, affecting only dst.
726   void shrd(Register dst, Register src);
727 
728   void store_rax(Address dst, RelocInfo::Mode mode);
729   void store_rax(ExternalReference ref);
730 
subb(Register dst,Immediate src)731   void subb(Register dst, Immediate src) {
732     immediate_arithmetic_op_8(0x5, dst, src);
733   }
734 
735   void sub_sp_32(uint32_t imm);
736 
737   void testb(Register dst, Register src);
738   void testb(Register reg, Immediate mask);
739   void testb(Operand op, Immediate mask);
740   void testb(Operand op, Register reg);
741 
742   void testw(Register dst, Register src);
743   void testw(Register reg, Immediate mask);
744   void testw(Operand op, Immediate mask);
745   void testw(Operand op, Register reg);
746 
747   // Bit operations.
748   void bswapl(Register dst);
749   void bswapq(Register dst);
750   void btq(Operand dst, Register src);
751   void btsq(Operand dst, Register src);
752   void btsq(Register dst, Immediate imm8);
753   void btrq(Register dst, Immediate imm8);
754   void bsrq(Register dst, Register src);
755   void bsrq(Register dst, Operand src);
756   void bsrl(Register dst, Register src);
757   void bsrl(Register dst, Operand src);
758   void bsfq(Register dst, Register src);
759   void bsfq(Register dst, Operand src);
760   void bsfl(Register dst, Register src);
761   void bsfl(Register dst, Operand src);
762 
763   // Miscellaneous
764   void clc();
765   void cld();
766   void cpuid();
767   void hlt();
768   void int3();
769   void nop();
770   void ret(int imm16);
771   void ud2();
772   void setcc(Condition cc, Register reg);
773 
774   void pblendw(XMMRegister dst, Operand src, uint8_t mask);
775   void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask);
776   void palignr(XMMRegister dst, Operand src, uint8_t mask);
777   void palignr(XMMRegister dst, XMMRegister src, uint8_t mask);
778 
779   // Label operations & relative jumps (PPUM Appendix D)
780   //
781   // Takes a branch opcode (cc) and a label (L) and generates
782   // either a backward branch or a forward branch and links it
783   // to the label fixup chain. Usage:
784   //
785   // Label L;    // unbound label
786   // j(cc, &L);  // forward branch to unbound label
787   // bind(&L);   // bind label to the current pc
788   // j(cc, &L);  // backward branch to bound label
789   // bind(&L);   // illegal: a label may be bound only once
790   //
791   // Note: The same Label can be used for forward and backward branches
792   // but it may be bound only once.
793 
794   void bind(Label* L);  // binds an unbound label L to the current code position
795 
796   // Calls
797   // Call near relative 32-bit displacement, relative to next instruction.
798   void call(Label* L);
799   void call(Address entry, RelocInfo::Mode rmode);
800 
801   // Explicitly emit a near call / near jump. The displacement is relative to
802   // the next instructions (which starts at {pc_offset() + kNearJmpInstrSize}).
803   static constexpr int kNearJmpInstrSize = 5;
804   void near_call(intptr_t disp, RelocInfo::Mode rmode);
805   void near_jmp(intptr_t disp, RelocInfo::Mode rmode);
806 
807   void call(Handle<CodeT> target,
808             RelocInfo::Mode rmode = RelocInfo::CODE_TARGET);
809 
810   // Call near absolute indirect, address in register
811   void call(Register adr);
812 
813   // Jumps
814   // Jump short or near relative.
815   // Use a 32-bit signed displacement.
816   // Unconditional jump to L
817   void jmp(Label* L, Label::Distance distance = Label::kFar);
818   void jmp(Handle<CodeT> target, RelocInfo::Mode rmode);
819   void jmp(Address entry, RelocInfo::Mode rmode);
820 
821   // Jump near absolute indirect (r64)
822   void jmp(Register adr);
823   void jmp(Operand src);
824 
825   // Unconditional jump relative to the current address. Low-level routine,
826   // use with caution!
827   void jmp_rel(int offset);
828 
829   // Conditional jumps
830   void j(Condition cc, Label* L, Label::Distance distance = Label::kFar);
831   void j(Condition cc, Address entry, RelocInfo::Mode rmode);
832   void j(Condition cc, Handle<CodeT> target, RelocInfo::Mode rmode);
833 
834   // Floating-point operations
835   void fld(int i);
836 
837   void fld1();
838   void fldz();
839   void fldpi();
840   void fldln2();
841 
842   void fld_s(Operand adr);
843   void fld_d(Operand adr);
844 
845   void fstp_s(Operand adr);
846   void fstp_d(Operand adr);
847   void fstp(int index);
848 
849   void fild_s(Operand adr);
850   void fild_d(Operand adr);
851 
852   void fist_s(Operand adr);
853 
854   void fistp_s(Operand adr);
855   void fistp_d(Operand adr);
856 
857   void fisttp_s(Operand adr);
858   void fisttp_d(Operand adr);
859 
860   void fabs();
861   void fchs();
862 
863   void fadd(int i);
864   void fsub(int i);
865   void fmul(int i);
866   void fdiv(int i);
867 
868   void fisub_s(Operand adr);
869 
870   void faddp(int i = 1);
871   void fsubp(int i = 1);
872   void fsubrp(int i = 1);
873   void fmulp(int i = 1);
874   void fdivp(int i = 1);
875   void fprem();
876   void fprem1();
877 
878   void fxch(int i = 1);
879   void fincstp();
880   void ffree(int i = 0);
881 
882   void ftst();
883   void fucomp(int i);
884   void fucompp();
885   void fucomi(int i);
886   void fucomip();
887 
888   void fcompp();
889   void fnstsw_ax();
890   void fwait();
891   void fnclex();
892 
893   void fsin();
894   void fcos();
895   void fptan();
896   void fyl2x();
897   void f2xm1();
898   void fscale();
899   void fninit();
900 
901   void frndint();
902 
903   void sahf();
904 
905   void ucomiss(XMMRegister dst, XMMRegister src);
906   void ucomiss(XMMRegister dst, Operand src);
907   void movaps(XMMRegister dst, XMMRegister src);
908   void movaps(XMMRegister dst, Operand src);
909 
910   // Don't use this unless it's important to keep the
911   // top half of the destination register unchanged.
912   // Use movaps when moving float values and movd for integer
913   // values in xmm registers.
914   void movss(XMMRegister dst, XMMRegister src);
915 
916   void movss(XMMRegister dst, Operand src);
917   void movss(Operand dst, XMMRegister src);
918 
919   void movlps(XMMRegister dst, Operand src);
920   void movlps(Operand dst, XMMRegister src);
921 
922   void movhps(XMMRegister dst, Operand src);
923   void movhps(Operand dst, XMMRegister src);
924 
925   void shufps(XMMRegister dst, XMMRegister src, byte imm8);
926 
927   void cvttss2si(Register dst, Operand src);
928   void cvttss2si(Register dst, XMMRegister src);
929   void cvtlsi2ss(XMMRegister dst, Operand src);
930   void cvtlsi2ss(XMMRegister dst, Register src);
931 
932   void movmskps(Register dst, XMMRegister src);
933 
934   void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
935               SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature feature = AVX);
936   void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
937               SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature feature = AVX);
938 
939   template <typename Reg1, typename Reg2, typename Op>
940   void vinstr(byte op, Reg1 dst, Reg2 src1, Op src2, SIMDPrefix pp,
941               LeadingOpcode m, VexW w, CpuFeature feature = AVX2);
942 
943   // SSE instructions
944   void sse_instr(XMMRegister dst, XMMRegister src, byte escape, byte opcode);
945   void sse_instr(XMMRegister dst, Operand src, byte escape, byte opcode);
946 #define DECLARE_SSE_INSTRUCTION(instruction, escape, opcode) \
947   void instruction(XMMRegister dst, XMMRegister src) {       \
948     sse_instr(dst, src, 0x##escape, 0x##opcode);             \
949   }                                                          \
950   void instruction(XMMRegister dst, Operand src) {           \
951     sse_instr(dst, src, 0x##escape, 0x##opcode);             \
952   }
953 
954   SSE_UNOP_INSTRUCTION_LIST(DECLARE_SSE_INSTRUCTION)
955   SSE_BINOP_INSTRUCTION_LIST(DECLARE_SSE_INSTRUCTION)
956 #undef DECLARE_SSE_INSTRUCTION
957 
958   // SSE instructions with prefix and SSE2 instructions
959   void sse2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape,
960                   byte opcode);
961   void sse2_instr(XMMRegister dst, Operand src, byte prefix, byte escape,
962                   byte opcode);
963 #define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
964   void instruction(XMMRegister dst, XMMRegister src) {                \
965     sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode);         \
966   }                                                                   \
967   void instruction(XMMRegister dst, Operand src) {                    \
968     sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode);         \
969   }
970 
971   // These SSE instructions have the same encoding as the SSE2 instructions.
972   SSE_INSTRUCTION_LIST_SS(DECLARE_SSE2_INSTRUCTION)
SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)973   SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
974   SSE2_INSTRUCTION_LIST_SD(DECLARE_SSE2_INSTRUCTION)
975   SSE2_UNOP_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
976 #undef DECLARE_SSE2_INSTRUCTION
977 
978   void sse2_instr(XMMRegister reg, byte imm8, byte prefix, byte escape,
979                   byte opcode, int extension) {
980     XMMRegister ext_reg = XMMRegister::from_code(extension);
981     sse2_instr(ext_reg, reg, prefix, escape, opcode);
982     emit(imm8);
983   }
984 
985 #define DECLARE_SSE2_SHIFT_IMM(instruction, prefix, escape, opcode, extension) \
986   void instruction(XMMRegister reg, byte imm8) {                               \
987     sse2_instr(reg, imm8, 0x##prefix, 0x##escape, 0x##opcode, 0x##extension);  \
988   }
989   SSE2_INSTRUCTION_LIST_SHIFT_IMM(DECLARE_SSE2_SHIFT_IMM)
990 #undef DECLARE_SSE2_SHIFT_IMM
991 
992 #define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode)    \
993   void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
994     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0);          \
995   }                                                                          \
996   void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) {     \
997     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0);          \
998   }
999 
1000 #define DECLARE_SSE2_PD_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1001   DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode)          \
1002   void v##instruction(YMMRegister dst, YMMRegister src1, YMMRegister src2) { \
1003     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0, AVX);     \
1004   }                                                                          \
1005   void v##instruction(YMMRegister dst, YMMRegister src1, Operand src2) {     \
1006     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0, AVX);     \
1007   }
1008 
1009   SSE2_INSTRUCTION_LIST_PD(DECLARE_SSE2_PD_AVX_INSTRUCTION)
1010 #undef DECLARE_SSE2_PD_AVX_INSTRUCTION
1011 
1012 #define DECLARE_SSE2_PI_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1013   DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode)          \
1014   void v##instruction(YMMRegister dst, YMMRegister src1, YMMRegister src2) { \
1015     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0, AVX2);    \
1016   }                                                                          \
1017   void v##instruction(YMMRegister dst, YMMRegister src1, Operand src2) {     \
1018     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0, AVX2);    \
1019   }
1020 
1021   SSE2_INSTRUCTION_LIST_PI(DECLARE_SSE2_PI_AVX_INSTRUCTION)
1022 #undef DECLARE_SSE2_PI_AVX_INSTRUCTION
1023 
1024 #define DECLARE_SSE2_SHIFT_AVX_INSTRUCTION(instruction, prefix, escape,      \
1025                                            opcode)                           \
1026   DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode)          \
1027   void v##instruction(YMMRegister dst, YMMRegister src1, XMMRegister src2) { \
1028     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0, AVX2);    \
1029   }                                                                          \
1030   void v##instruction(YMMRegister dst, YMMRegister src1, Operand src2) {     \
1031     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0, AVX2);    \
1032   }
1033 
1034   SSE2_INSTRUCTION_LIST_SHIFT(DECLARE_SSE2_SHIFT_AVX_INSTRUCTION)
1035 #undef DECLARE_SSE2_SHIFT_AVX_INSTRUCTION
1036 #undef DECLARE_SSE2_AVX_INSTRUCTION
1037 
1038 #define DECLARE_SSE2_UNOP_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1039   void v##instruction(XMMRegister dst, XMMRegister src) {                      \
1040     vpd(0x##opcode, dst, xmm0, src);                                           \
1041   }                                                                            \
1042   void v##instruction(XMMRegister dst, Operand src) {                          \
1043     vpd(0x##opcode, dst, xmm0, src);                                           \
1044   }
1045 
1046   SSE2_UNOP_INSTRUCTION_LIST(DECLARE_SSE2_UNOP_AVX_INSTRUCTION)
1047 #undef DECLARE_SSE2_UNOP_AVX_INSTRUCTION
1048 
1049   // SSE3
1050   void lddqu(XMMRegister dst, Operand src);
1051   void movddup(XMMRegister dst, Operand src);
1052   void movddup(XMMRegister dst, XMMRegister src);
1053   void movshdup(XMMRegister dst, XMMRegister src);
1054 
1055   // SSSE3
1056   void ssse3_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1057                    byte escape2, byte opcode);
1058   void ssse3_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1059                    byte escape2, byte opcode);
1060 
1061 #define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2,     \
1062                                   opcode)                                    \
1063   void instruction(XMMRegister dst, XMMRegister src) {                       \
1064     ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1065   }                                                                          \
1066   void instruction(XMMRegister dst, Operand src) {                           \
1067     ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1068   }
1069 
1070   SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1071   SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1072 #undef DECLARE_SSSE3_INSTRUCTION
1073 
1074   // SSE4
1075   void sse4_instr(Register dst, XMMRegister src, byte prefix, byte escape1,
1076                   byte escape2, byte opcode, int8_t imm8);
1077   void sse4_instr(Operand dst, XMMRegister src, byte prefix, byte escape1,
1078                   byte escape2, byte opcode, int8_t imm8);
1079   void sse4_instr(XMMRegister dst, Register src, byte prefix, byte escape1,
1080                   byte escape2, byte opcode, int8_t imm8);
1081   void sse4_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1082                   byte escape2, byte opcode);
1083   void sse4_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1084                   byte escape2, byte opcode);
1085 #define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2,     \
1086                                  opcode)                                    \
1087   void instruction(XMMRegister dst, XMMRegister src) {                      \
1088     sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1089   }                                                                         \
1090   void instruction(XMMRegister dst, Operand src) {                          \
1091     sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1092   }
1093 
1094   SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1095   SSE4_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1096   DECLARE_SSE4_INSTRUCTION(pblendvb, 66, 0F, 38, 10)
1097   DECLARE_SSE4_INSTRUCTION(blendvps, 66, 0F, 38, 14)
1098   DECLARE_SSE4_INSTRUCTION(blendvpd, 66, 0F, 38, 15)
1099 #undef DECLARE_SSE4_INSTRUCTION
1100 
1101 #define DECLARE_SSE4_EXTRACT_INSTRUCTION(instruction, prefix, escape1,     \
1102                                          escape2, opcode)                  \
1103   void instruction(Register dst, XMMRegister src, uint8_t imm8) {          \
1104     sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode, \
1105                imm8);                                                      \
1106   }                                                                        \
1107   void instruction(Operand dst, XMMRegister src, uint8_t imm8) {           \
1108     sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode, \
1109                imm8);                                                      \
1110   }
1111 
1112   SSE4_EXTRACT_INSTRUCTION_LIST(DECLARE_SSE4_EXTRACT_INSTRUCTION)
1113 #undef DECLARE_SSE4_EXTRACT_INSTRUCTION
1114 
1115   // SSE4.2
1116   void sse4_2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1117                     byte escape2, byte opcode);
1118   void sse4_2_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1119                     byte escape2, byte opcode);
1120 #define DECLARE_SSE4_2_INSTRUCTION(instruction, prefix, escape1, escape2,     \
1121                                    opcode)                                    \
1122   void instruction(XMMRegister dst, XMMRegister src) {                        \
1123     sse4_2_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1124   }                                                                           \
1125   void instruction(XMMRegister dst, Operand src) {                            \
1126     sse4_2_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1127   }
1128 
1129   SSE4_2_INSTRUCTION_LIST(DECLARE_SSE4_2_INSTRUCTION)
1130 #undef DECLARE_SSE4_2_INSTRUCTION
1131 
1132 #define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2,  \
1133                                       opcode)                                 \
1134   void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) {  \
1135     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1136   }                                                                           \
1137   void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) {      \
1138     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1139   }                                                                           \
1140   void v##instruction(YMMRegister dst, YMMRegister src1, YMMRegister src2) {  \
1141     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0,  \
1142            AVX2);                                                             \
1143   }                                                                           \
1144   void v##instruction(YMMRegister dst, YMMRegister src1, Operand src2) {      \
1145     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0,  \
1146            AVX2);                                                             \
1147   }
1148 
SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)1149   SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1150   SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1151   SSE4_2_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1152 #undef DECLARE_SSE34_AVX_INSTRUCTION
1153 
1154 #define DECLARE_SSSE3_UNOP_AVX_INSTRUCTION(instruction, prefix, escape1,     \
1155                                            escape2, opcode)                  \
1156   void v##instruction(XMMRegister dst, XMMRegister src) {                    \
1157     vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
1158   }                                                                          \
1159   void v##instruction(XMMRegister dst, Operand src) {                        \
1160     vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
1161   }                                                                          \
1162   void v##instruction(YMMRegister dst, YMMRegister src) {                    \
1163     vinstr(0x##opcode, dst, ymm0, src, k##prefix, k##escape1##escape2, kW0); \
1164   }                                                                          \
1165   void v##instruction(YMMRegister dst, Operand src) {                        \
1166     vinstr(0x##opcode, dst, ymm0, src, k##prefix, k##escape1##escape2, kW0); \
1167   }
1168 
1169   SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSSE3_UNOP_AVX_INSTRUCTION)
1170 #undef DECLARE_SSSE3_UNOP_AVX_INSTRUCTION
1171 
1172   void vpblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1173                  XMMRegister mask) {
1174     vinstr(0x4C, dst, src1, src2, k66, k0F3A, kW0);
1175     // The mask operand is encoded in bits[7:4] of the immediate byte.
1176     emit(mask.code() << 4);
1177   }
vpblendvb(YMMRegister dst,YMMRegister src1,YMMRegister src2,YMMRegister mask)1178   void vpblendvb(YMMRegister dst, YMMRegister src1, YMMRegister src2,
1179                  YMMRegister mask) {
1180     vinstr(0x4C, dst, src1, src2, k66, k0F3A, kW0, AVX2);
1181     // The mask operand is encoded in bits[7:4] of the immediate byte.
1182     emit(mask.code() << 4);
1183   }
1184 
vblendvps(XMMRegister dst,XMMRegister src1,XMMRegister src2,XMMRegister mask)1185   void vblendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1186                  XMMRegister mask) {
1187     vinstr(0x4A, dst, src1, src2, k66, k0F3A, kW0);
1188     // The mask operand is encoded in bits[7:4] of the immediate byte.
1189     emit(mask.code() << 4);
1190   }
vblendvps(YMMRegister dst,YMMRegister src1,YMMRegister src2,YMMRegister mask)1191   void vblendvps(YMMRegister dst, YMMRegister src1, YMMRegister src2,
1192                  YMMRegister mask) {
1193     vinstr(0x4A, dst, src1, src2, k66, k0F3A, kW0, AVX);
1194     // The mask operand is encoded in bits[7:4] of the immediate byte.
1195     emit(mask.code() << 4);
1196   }
1197 
vblendvpd(XMMRegister dst,XMMRegister src1,XMMRegister src2,XMMRegister mask)1198   void vblendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1199                  XMMRegister mask) {
1200     vinstr(0x4B, dst, src1, src2, k66, k0F3A, kW0);
1201     // The mask operand is encoded in bits[7:4] of the immediate byte.
1202     emit(mask.code() << 4);
1203   }
vblendvpd(YMMRegister dst,YMMRegister src1,YMMRegister src2,YMMRegister mask)1204   void vblendvpd(YMMRegister dst, YMMRegister src1, YMMRegister src2,
1205                  YMMRegister mask) {
1206     vinstr(0x4B, dst, src1, src2, k66, k0F3A, kW0, AVX);
1207     // The mask operand is encoded in bits[7:4] of the immediate byte.
1208     emit(mask.code() << 4);
1209   }
1210 
1211 #define DECLARE_SSE4_PMOV_AVX_INSTRUCTION(instruction, prefix, escape1,      \
1212                                           escape2, opcode)                   \
1213   void v##instruction(XMMRegister dst, XMMRegister src) {                    \
1214     vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
1215   }                                                                          \
1216   void v##instruction(XMMRegister dst, Operand src) {                        \
1217     vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
1218   }
1219   SSE4_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_PMOV_AVX_INSTRUCTION)
1220 #undef DECLARE_SSE4_PMOV_AVX_INSTRUCTION
1221 
1222 #define DECLARE_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, opcode) \
1223   void v##instruction(Register dst, XMMRegister src, uint8_t imm8) {           \
1224     XMMRegister idst = XMMRegister::from_code(dst.code());                     \
1225     vinstr(0x##opcode, src, xmm0, idst, k##prefix, k##escape1##escape2, kW0);  \
1226     emit(imm8);                                                                \
1227   }                                                                            \
1228   void v##instruction(Operand dst, XMMRegister src, uint8_t imm8) {            \
1229     vinstr(0x##opcode, src, xmm0, dst, k##prefix, k##escape1##escape2, kW0);   \
1230     emit(imm8);                                                                \
1231   }
1232 
1233   SSE4_EXTRACT_INSTRUCTION_LIST(DECLARE_AVX_INSTRUCTION)
1234 #undef DECLARE_AVX_INSTRUCTION
1235 
1236   void movd(XMMRegister dst, Register src);
1237   void movd(XMMRegister dst, Operand src);
1238   void movd(Register dst, XMMRegister src);
1239   void movq(XMMRegister dst, Register src);
1240   void movq(XMMRegister dst, Operand src);
1241   void movq(Register dst, XMMRegister src);
1242   void movq(XMMRegister dst, XMMRegister src);
1243 
1244   // Don't use this unless it's important to keep the
1245   // top half of the destination register unchanged.
1246   // Use movapd when moving double values and movq for integer
1247   // values in xmm registers.
1248   void movsd(XMMRegister dst, XMMRegister src);
1249 
1250   void movsd(Operand dst, XMMRegister src);
1251   void movsd(XMMRegister dst, Operand src);
1252 
1253   void movdqa(Operand dst, XMMRegister src);
1254   void movdqa(XMMRegister dst, Operand src);
1255   void movdqa(XMMRegister dst, XMMRegister src);
1256 
1257   void movdqu(Operand dst, XMMRegister src);
1258   void movdqu(XMMRegister dst, Operand src);
1259   void movdqu(XMMRegister dst, XMMRegister src);
1260 
1261   void movapd(XMMRegister dst, XMMRegister src);
1262   void movupd(XMMRegister dst, Operand src);
1263   void movupd(Operand dst, XMMRegister src);
1264 
1265   void cvtdq2pd(XMMRegister dst, XMMRegister src);
1266 
1267   void cvttsd2si(Register dst, Operand src);
1268   void cvttsd2si(Register dst, XMMRegister src);
1269   void cvttss2siq(Register dst, XMMRegister src);
1270   void cvttss2siq(Register dst, Operand src);
1271   void cvttsd2siq(Register dst, XMMRegister src);
1272   void cvttsd2siq(Register dst, Operand src);
1273   void cvttps2dq(XMMRegister dst, Operand src);
1274   void cvttps2dq(XMMRegister dst, XMMRegister src);
1275 
1276   void cvtlsi2sd(XMMRegister dst, Operand src);
1277   void cvtlsi2sd(XMMRegister dst, Register src);
1278 
1279   void cvtqsi2ss(XMMRegister dst, Operand src);
1280   void cvtqsi2ss(XMMRegister dst, Register src);
1281 
1282   void cvtqsi2sd(XMMRegister dst, Operand src);
1283   void cvtqsi2sd(XMMRegister dst, Register src);
1284 
1285   void cvtsd2si(Register dst, XMMRegister src);
1286   void cvtsd2siq(Register dst, XMMRegister src);
1287 
1288   void haddps(XMMRegister dst, XMMRegister src);
1289   void haddps(XMMRegister dst, Operand src);
1290 
1291   void cmpeqsd(XMMRegister dst, XMMRegister src);
1292   void cmpeqss(XMMRegister dst, XMMRegister src);
1293   void cmpltsd(XMMRegister dst, XMMRegister src);
1294 
1295   void movmskpd(Register dst, XMMRegister src);
1296 
1297   void pmovmskb(Register dst, XMMRegister src);
1298 
1299   void pinsrw(XMMRegister dst, Register src, uint8_t imm8);
1300   void pinsrw(XMMRegister dst, Operand src, uint8_t imm8);
1301 
1302   // SSE 4.1 instruction
1303   void insertps(XMMRegister dst, XMMRegister src, byte imm8);
1304   void insertps(XMMRegister dst, Operand src, byte imm8);
1305   void pextrq(Register dst, XMMRegister src, int8_t imm8);
1306   void pinsrb(XMMRegister dst, Register src, uint8_t imm8);
1307   void pinsrb(XMMRegister dst, Operand src, uint8_t imm8);
1308   void pinsrd(XMMRegister dst, Register src, uint8_t imm8);
1309   void pinsrd(XMMRegister dst, Operand src, uint8_t imm8);
1310   void pinsrq(XMMRegister dst, Register src, uint8_t imm8);
1311   void pinsrq(XMMRegister dst, Operand src, uint8_t imm8);
1312 
1313   void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
1314   void roundss(XMMRegister dst, Operand src, RoundingMode mode);
1315   void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1316   void roundsd(XMMRegister dst, Operand src, RoundingMode mode);
1317   void roundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
1318   void roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1319 
1320   void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp);
1321   void cmpps(XMMRegister dst, Operand src, int8_t cmp);
1322   void cmppd(XMMRegister dst, XMMRegister src, int8_t cmp);
1323   void cmppd(XMMRegister dst, Operand src, int8_t cmp);
1324 
1325 #define SSE_CMP_P(instr, imm8)                                                \
1326   void instr##ps(XMMRegister dst, XMMRegister src) { cmpps(dst, src, imm8); } \
1327   void instr##ps(XMMRegister dst, Operand src) { cmpps(dst, src, imm8); }     \
1328   void instr##pd(XMMRegister dst, XMMRegister src) { cmppd(dst, src, imm8); } \
1329   void instr##pd(XMMRegister dst, Operand src) { cmppd(dst, src, imm8); }
1330 
1331   SSE_CMP_P(cmpeq, 0x0)
1332   SSE_CMP_P(cmplt, 0x1)
1333   SSE_CMP_P(cmple, 0x2)
1334   SSE_CMP_P(cmpunord, 0x3)
1335   SSE_CMP_P(cmpneq, 0x4)
1336   SSE_CMP_P(cmpnlt, 0x5)
1337   SSE_CMP_P(cmpnle, 0x6)
1338 
1339 #undef SSE_CMP_P
1340 
1341   void movups(XMMRegister dst, XMMRegister src);
1342   void movups(XMMRegister dst, Operand src);
1343   void movups(Operand dst, XMMRegister src);
1344   void psrldq(XMMRegister dst, uint8_t shift);
1345   void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1346   void pshufd(XMMRegister dst, Operand src, uint8_t shuffle);
1347   void pshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1348   void pshufhw(XMMRegister dst, Operand src, uint8_t shuffle);
1349   void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1350   void pshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
1351 
movhlps(XMMRegister dst,XMMRegister src)1352   void movhlps(XMMRegister dst, XMMRegister src) {
1353     sse_instr(dst, src, 0x0F, 0x12);
1354   }
movlhps(XMMRegister dst,XMMRegister src)1355   void movlhps(XMMRegister dst, XMMRegister src) {
1356     sse_instr(dst, src, 0x0F, 0x16);
1357   }
1358 
1359   // AVX instruction
1360   void vmovddup(XMMRegister dst, XMMRegister src);
1361   void vmovddup(XMMRegister dst, Operand src);
1362   void vmovddup(YMMRegister dst, YMMRegister src);
1363   void vmovddup(YMMRegister dst, Operand src);
1364   void vmovshdup(XMMRegister dst, XMMRegister src);
1365   void vmovshdup(YMMRegister dst, YMMRegister src);
1366   void vbroadcastss(XMMRegister dst, Operand src);
1367   void vbroadcastss(XMMRegister dst, XMMRegister src);
1368   void vbroadcastss(YMMRegister dst, Operand src);
1369   void vbroadcastss(YMMRegister dst, XMMRegister src);
1370 
1371   void fma_instr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1372                  VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w);
1373   void fma_instr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
1374                  VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w);
1375 
1376 #define FMA(instr, length, prefix, escape1, escape2, extension, opcode) \
1377   void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) {     \
1378     fma_instr(0x##opcode, dst, src1, src2, k##length, k##prefix,        \
1379               k##escape1##escape2, k##extension);                       \
1380   }                                                                     \
1381   void instr(XMMRegister dst, XMMRegister src1, Operand src2) {         \
1382     fma_instr(0x##opcode, dst, src1, src2, k##length, k##prefix,        \
1383               k##escape1##escape2, k##extension);                       \
1384   }
1385   FMA_INSTRUCTION_LIST(FMA)
1386 #undef FMA
1387 
1388   void vmovd(XMMRegister dst, Register src);
1389   void vmovd(XMMRegister dst, Operand src);
1390   void vmovd(Register dst, XMMRegister src);
1391   void vmovq(XMMRegister dst, Register src);
1392   void vmovq(XMMRegister dst, Operand src);
1393   void vmovq(Register dst, XMMRegister src);
1394 
vmovsd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1395   void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1396     vsd(0x10, dst, src1, src2);
1397   }
vmovsd(XMMRegister dst,Operand src)1398   void vmovsd(XMMRegister dst, Operand src) { vsd(0x10, dst, xmm0, src); }
vmovsd(Operand dst,XMMRegister src)1399   void vmovsd(Operand dst, XMMRegister src) { vsd(0x11, src, xmm0, dst); }
1400   void vmovdqa(XMMRegister dst, Operand src);
1401   void vmovdqa(XMMRegister dst, XMMRegister src);
1402   void vmovdqa(YMMRegister dst, Operand src);
1403   void vmovdqa(YMMRegister dst, YMMRegister src);
1404   void vmovdqu(XMMRegister dst, Operand src);
1405   void vmovdqu(Operand dst, XMMRegister src);
1406   void vmovdqu(XMMRegister dst, XMMRegister src);
1407   void vmovdqu(YMMRegister dst, Operand src);
1408   void vmovdqu(Operand dst, YMMRegister src);
1409   void vmovdqu(YMMRegister dst, YMMRegister src);
1410 
1411   void vmovlps(XMMRegister dst, XMMRegister src1, Operand src2);
1412   void vmovlps(Operand dst, XMMRegister src);
1413 
1414   void vmovhps(XMMRegister dst, XMMRegister src1, Operand src2);
1415   void vmovhps(Operand dst, XMMRegister src);
1416 
1417 #define AVX_SSE_UNOP(instr, escape, opcode)          \
1418   void v##instr(XMMRegister dst, XMMRegister src2) { \
1419     vps(0x##opcode, dst, xmm0, src2);                \
1420   }                                                  \
1421   void v##instr(XMMRegister dst, Operand src2) {     \
1422     vps(0x##opcode, dst, xmm0, src2);                \
1423   }                                                  \
1424   void v##instr(YMMRegister dst, YMMRegister src2) { \
1425     vps(0x##opcode, dst, ymm0, src2);                \
1426   }                                                  \
1427   void v##instr(YMMRegister dst, Operand src2) {     \
1428     vps(0x##opcode, dst, ymm0, src2);                \
1429   }
1430   SSE_UNOP_INSTRUCTION_LIST(AVX_SSE_UNOP)
1431 #undef AVX_SSE_UNOP
1432 
1433 #define AVX_SSE_BINOP(instr, escape, opcode)                           \
1434   void v##instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1435     vps(0x##opcode, dst, src1, src2);                                  \
1436   }                                                                    \
1437   void v##instr(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1438     vps(0x##opcode, dst, src1, src2);                                  \
1439   }                                                                    \
1440   void v##instr(YMMRegister dst, YMMRegister src1, YMMRegister src2) { \
1441     vps(0x##opcode, dst, src1, src2);                                  \
1442   }                                                                    \
1443   void v##instr(YMMRegister dst, YMMRegister src1, Operand src2) {     \
1444     vps(0x##opcode, dst, src1, src2);                                  \
1445   }
SSE_BINOP_INSTRUCTION_LIST(AVX_SSE_BINOP)1446   SSE_BINOP_INSTRUCTION_LIST(AVX_SSE_BINOP)
1447 #undef AVX_SSE_BINOP
1448 
1449 #define AVX_3(instr, opcode, impl, SIMDRegister)                       \
1450   void instr(SIMDRegister dst, SIMDRegister src1, SIMDRegister src2) { \
1451     impl(opcode, dst, src1, src2);                                     \
1452   }                                                                    \
1453   void instr(SIMDRegister dst, SIMDRegister src1, Operand src2) {      \
1454     impl(opcode, dst, src1, src2);                                     \
1455   }
1456 
1457   AVX_3(vhaddps, 0x7c, vsd, XMMRegister)
1458   AVX_3(vhaddps, 0x7c, vsd, YMMRegister)
1459 
1460 #define AVX_SCALAR(instr, prefix, escape, opcode)                      \
1461   void v##instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1462     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kWIG);   \
1463   }                                                                    \
1464   void v##instr(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1465     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kWIG);   \
1466   }
1467   SSE_INSTRUCTION_LIST_SS(AVX_SCALAR)
1468   SSE2_INSTRUCTION_LIST_SD(AVX_SCALAR)
1469 #undef AVX_SCALAR
1470 
1471 #undef AVX_3
1472 
1473 #define AVX_SSE2_SHIFT_IMM(instr, prefix, escape, opcode, extension)   \
1474   void v##instr(XMMRegister dst, XMMRegister src, byte imm8) {         \
1475     XMMRegister ext_reg = XMMRegister::from_code(extension);           \
1476     vinstr(0x##opcode, ext_reg, dst, src, k##prefix, k##escape, kWIG); \
1477     emit(imm8);                                                        \
1478   }
1479   SSE2_INSTRUCTION_LIST_SHIFT_IMM(AVX_SSE2_SHIFT_IMM)
1480 #undef AVX_SSE2_SHIFT_IMM
1481 
1482   void vmovlhps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1483     vinstr(0x16, dst, src1, src2, kNoPrefix, k0F, kWIG);
1484   }
vmovhlps(XMMRegister dst,XMMRegister src1,XMMRegister src2)1485   void vmovhlps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1486     vinstr(0x12, dst, src1, src2, kNoPrefix, k0F, kWIG);
1487   }
vcvtdq2pd(XMMRegister dst,XMMRegister src)1488   void vcvtdq2pd(XMMRegister dst, XMMRegister src) {
1489     vinstr(0xe6, dst, xmm0, src, kF3, k0F, kWIG);
1490   }
vcvttps2dq(XMMRegister dst,XMMRegister src)1491   void vcvttps2dq(XMMRegister dst, XMMRegister src) {
1492     vinstr(0x5b, dst, xmm0, src, kF3, k0F, kWIG);
1493   }
vcvtlsi2sd(XMMRegister dst,XMMRegister src1,Register src2)1494   void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1495     XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1496     vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW0);
1497   }
vcvtlsi2sd(XMMRegister dst,XMMRegister src1,Operand src2)1498   void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1499     vinstr(0x2a, dst, src1, src2, kF2, k0F, kW0);
1500   }
vcvtlsi2ss(XMMRegister dst,XMMRegister src1,Register src2)1501   void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1502     XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1503     vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW0);
1504   }
vcvtlsi2ss(XMMRegister dst,XMMRegister src1,Operand src2)1505   void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1506     vinstr(0x2a, dst, src1, src2, kF3, k0F, kW0);
1507   }
vcvtqsi2ss(XMMRegister dst,XMMRegister src1,Register src2)1508   void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1509     XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1510     vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW1);
1511   }
vcvtqsi2ss(XMMRegister dst,XMMRegister src1,Operand src2)1512   void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1513     vinstr(0x2a, dst, src1, src2, kF3, k0F, kW1);
1514   }
vcvtqsi2sd(XMMRegister dst,XMMRegister src1,Register src2)1515   void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1516     XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1517     vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW1);
1518   }
vcvtqsi2sd(XMMRegister dst,XMMRegister src1,Operand src2)1519   void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1520     vinstr(0x2a, dst, src1, src2, kF2, k0F, kW1);
1521   }
vcvttss2si(Register dst,XMMRegister src)1522   void vcvttss2si(Register dst, XMMRegister src) {
1523     XMMRegister idst = XMMRegister::from_code(dst.code());
1524     vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1525   }
vcvttss2si(Register dst,Operand src)1526   void vcvttss2si(Register dst, Operand src) {
1527     XMMRegister idst = XMMRegister::from_code(dst.code());
1528     vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1529   }
vcvttsd2si(Register dst,XMMRegister src)1530   void vcvttsd2si(Register dst, XMMRegister src) {
1531     XMMRegister idst = XMMRegister::from_code(dst.code());
1532     vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1533   }
vcvttsd2si(Register dst,Operand src)1534   void vcvttsd2si(Register dst, Operand src) {
1535     XMMRegister idst = XMMRegister::from_code(dst.code());
1536     vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1537   }
vcvttss2siq(Register dst,XMMRegister src)1538   void vcvttss2siq(Register dst, XMMRegister src) {
1539     XMMRegister idst = XMMRegister::from_code(dst.code());
1540     vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1541   }
vcvttss2siq(Register dst,Operand src)1542   void vcvttss2siq(Register dst, Operand src) {
1543     XMMRegister idst = XMMRegister::from_code(dst.code());
1544     vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1545   }
vcvttsd2siq(Register dst,XMMRegister src)1546   void vcvttsd2siq(Register dst, XMMRegister src) {
1547     XMMRegister idst = XMMRegister::from_code(dst.code());
1548     vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1549   }
vcvttsd2siq(Register dst,Operand src)1550   void vcvttsd2siq(Register dst, Operand src) {
1551     XMMRegister idst = XMMRegister::from_code(dst.code());
1552     vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1553   }
vcvtsd2si(Register dst,XMMRegister src)1554   void vcvtsd2si(Register dst, XMMRegister src) {
1555     XMMRegister idst = XMMRegister::from_code(dst.code());
1556     vinstr(0x2d, idst, xmm0, src, kF2, k0F, kW0);
1557   }
vroundss(XMMRegister dst,XMMRegister src1,XMMRegister src2,RoundingMode mode)1558   void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1559                 RoundingMode mode) {
1560     vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
1561     emit(static_cast<byte>(mode) | 0x8);  // Mask precision exception.
1562   }
vroundss(XMMRegister dst,XMMRegister src1,Operand src2,RoundingMode mode)1563   void vroundss(XMMRegister dst, XMMRegister src1, Operand src2,
1564                 RoundingMode mode) {
1565     vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
1566     emit(static_cast<byte>(mode) | 0x8);  // Mask precision exception.
1567   }
vroundsd(XMMRegister dst,XMMRegister src1,XMMRegister src2,RoundingMode mode)1568   void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1569                 RoundingMode mode) {
1570     vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
1571     emit(static_cast<byte>(mode) | 0x8);  // Mask precision exception.
1572   }
vroundsd(XMMRegister dst,XMMRegister src1,Operand src2,RoundingMode mode)1573   void vroundsd(XMMRegister dst, XMMRegister src1, Operand src2,
1574                 RoundingMode mode) {
1575     vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
1576     emit(static_cast<byte>(mode) | 0x8);  // Mask precision exception.
1577   }
vroundps(XMMRegister dst,XMMRegister src,RoundingMode mode)1578   void vroundps(XMMRegister dst, XMMRegister src, RoundingMode mode) {
1579     vinstr(0x08, dst, xmm0, src, k66, k0F3A, kWIG);
1580     emit(static_cast<byte>(mode) | 0x8);  // Mask precision exception.
1581   }
vroundps(YMMRegister dst,YMMRegister src,RoundingMode mode)1582   void vroundps(YMMRegister dst, YMMRegister src, RoundingMode mode) {
1583     vinstr(0x08, dst, ymm0, src, k66, k0F3A, kWIG, AVX);
1584     emit(static_cast<byte>(mode) | 0x8);  // Mask precision exception.
1585   }
vroundpd(XMMRegister dst,XMMRegister src,RoundingMode mode)1586   void vroundpd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
1587     vinstr(0x09, dst, xmm0, src, k66, k0F3A, kWIG);
1588     emit(static_cast<byte>(mode) | 0x8);  // Mask precision exception.
1589   }
vroundpd(YMMRegister dst,YMMRegister src,RoundingMode mode)1590   void vroundpd(YMMRegister dst, YMMRegister src, RoundingMode mode) {
1591     vinstr(0x09, dst, ymm0, src, k66, k0F3A, kWIG, AVX);
1592     emit(static_cast<byte>(mode) | 0x8);  // Mask precision exception.
1593   }
1594 
1595   template <typename Reg, typename Op>
vsd(byte op,Reg dst,Reg src1,Op src2)1596   void vsd(byte op, Reg dst, Reg src1, Op src2) {
1597     vinstr(op, dst, src1, src2, kF2, k0F, kWIG, AVX);
1598   }
1599 
vmovss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1600   void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1601     vss(0x10, dst, src1, src2);
1602   }
vmovss(XMMRegister dst,Operand src)1603   void vmovss(XMMRegister dst, Operand src) { vss(0x10, dst, xmm0, src); }
vmovss(Operand dst,XMMRegister src)1604   void vmovss(Operand dst, XMMRegister src) { vss(0x11, src, xmm0, dst); }
1605   void vucomiss(XMMRegister dst, XMMRegister src);
1606   void vucomiss(XMMRegister dst, Operand src);
1607   void vss(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1608   void vss(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1609 
vshufps(XMMRegister dst,XMMRegister src1,XMMRegister src2,byte imm8)1610   void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
1611     vps(0xC6, dst, src1, src2, imm8);
1612   }
vshufps(YMMRegister dst,YMMRegister src1,YMMRegister src2,byte imm8)1613   void vshufps(YMMRegister dst, YMMRegister src1, YMMRegister src2, byte imm8) {
1614     vps(0xC6, dst, src1, src2, imm8);
1615   }
1616 
vmovaps(XMMRegister dst,XMMRegister src)1617   void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
vmovaps(YMMRegister dst,YMMRegister src)1618   void vmovaps(YMMRegister dst, YMMRegister src) { vps(0x28, dst, ymm0, src); }
vmovaps(XMMRegister dst,Operand src)1619   void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); }
vmovaps(YMMRegister dst,Operand src)1620   void vmovaps(YMMRegister dst, Operand src) { vps(0x28, dst, ymm0, src); }
vmovups(XMMRegister dst,XMMRegister src)1621   void vmovups(XMMRegister dst, XMMRegister src) { vps(0x10, dst, xmm0, src); }
vmovups(YMMRegister dst,YMMRegister src)1622   void vmovups(YMMRegister dst, YMMRegister src) { vps(0x10, dst, ymm0, src); }
vmovups(XMMRegister dst,Operand src)1623   void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); }
vmovups(YMMRegister dst,Operand src)1624   void vmovups(YMMRegister dst, Operand src) { vps(0x10, dst, ymm0, src); }
vmovups(Operand dst,XMMRegister src)1625   void vmovups(Operand dst, XMMRegister src) { vps(0x11, src, xmm0, dst); }
vmovups(Operand dst,YMMRegister src)1626   void vmovups(Operand dst, YMMRegister src) { vps(0x11, src, ymm0, dst); }
vmovapd(XMMRegister dst,XMMRegister src)1627   void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); }
vmovapd(YMMRegister dst,YMMRegister src)1628   void vmovapd(YMMRegister dst, YMMRegister src) { vpd(0x28, dst, ymm0, src); }
vmovupd(XMMRegister dst,Operand src)1629   void vmovupd(XMMRegister dst, Operand src) { vpd(0x10, dst, xmm0, src); }
vmovupd(YMMRegister dst,Operand src)1630   void vmovupd(YMMRegister dst, Operand src) { vpd(0x10, dst, ymm0, src); }
vmovupd(Operand dst,XMMRegister src)1631   void vmovupd(Operand dst, XMMRegister src) { vpd(0x11, src, xmm0, dst); }
vmovupd(Operand dst,YMMRegister src)1632   void vmovupd(Operand dst, YMMRegister src) { vpd(0x11, src, ymm0, dst); }
vmovmskps(Register dst,XMMRegister src)1633   void vmovmskps(Register dst, XMMRegister src) {
1634     XMMRegister idst = XMMRegister::from_code(dst.code());
1635     vps(0x50, idst, xmm0, src);
1636   }
vmovmskpd(Register dst,XMMRegister src)1637   void vmovmskpd(Register dst, XMMRegister src) {
1638     XMMRegister idst = XMMRegister::from_code(dst.code());
1639     vpd(0x50, idst, xmm0, src);
1640   }
1641   void vpmovmskb(Register dst, XMMRegister src);
vcmpeqss(XMMRegister dst,XMMRegister src)1642   void vcmpeqss(XMMRegister dst, XMMRegister src) {
1643     vss(0xC2, dst, dst, src);
1644     emit(0x00);  // EQ == 0
1645   }
vcmpeqsd(XMMRegister dst,XMMRegister src)1646   void vcmpeqsd(XMMRegister dst, XMMRegister src) {
1647     vsd(0xC2, dst, dst, src);
1648     emit(0x00);  // EQ == 0
1649   }
vcmpps(XMMRegister dst,XMMRegister src1,XMMRegister src2,int8_t cmp)1650   void vcmpps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1651     vps(0xC2, dst, src1, src2);
1652     emit(cmp);
1653   }
vcmpps(YMMRegister dst,YMMRegister src1,YMMRegister src2,int8_t cmp)1654   void vcmpps(YMMRegister dst, YMMRegister src1, YMMRegister src2, int8_t cmp) {
1655     vps(0xC2, dst, src1, src2);
1656     emit(cmp);
1657   }
vcmpps(XMMRegister dst,XMMRegister src1,Operand src2,int8_t cmp)1658   void vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, int8_t cmp) {
1659     vps(0xC2, dst, src1, src2);
1660     emit(cmp);
1661   }
vcmpps(YMMRegister dst,YMMRegister src1,Operand src2,int8_t cmp)1662   void vcmpps(YMMRegister dst, YMMRegister src1, Operand src2, int8_t cmp) {
1663     vps(0xC2, dst, src1, src2);
1664     emit(cmp);
1665   }
vcmppd(XMMRegister dst,XMMRegister src1,XMMRegister src2,int8_t cmp)1666   void vcmppd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1667     vpd(0xC2, dst, src1, src2);
1668     emit(cmp);
1669   }
vcmppd(YMMRegister dst,YMMRegister src1,YMMRegister src2,int8_t cmp)1670   void vcmppd(YMMRegister dst, YMMRegister src1, YMMRegister src2, int8_t cmp) {
1671     vpd(0xC2, dst, src1, src2);
1672     emit(cmp);
1673   }
vcmppd(XMMRegister dst,XMMRegister src1,Operand src2,int8_t cmp)1674   void vcmppd(XMMRegister dst, XMMRegister src1, Operand src2, int8_t cmp) {
1675     vpd(0xC2, dst, src1, src2);
1676     emit(cmp);
1677   }
vcmppd(YMMRegister dst,YMMRegister src1,Operand src2,int8_t cmp)1678   void vcmppd(YMMRegister dst, YMMRegister src1, Operand src2, int8_t cmp) {
1679     vpd(0xC2, dst, src1, src2);
1680     emit(cmp);
1681   }
1682 #define AVX_CMP_P(instr, imm8, SIMDRegister)                               \
1683   void instr##ps(SIMDRegister dst, SIMDRegister src1, SIMDRegister src2) { \
1684     vcmpps(dst, src1, src2, imm8);                                         \
1685   }                                                                        \
1686   void instr##ps(SIMDRegister dst, SIMDRegister src1, Operand src2) {      \
1687     vcmpps(dst, src1, src2, imm8);                                         \
1688   }                                                                        \
1689   void instr##pd(SIMDRegister dst, SIMDRegister src1, SIMDRegister src2) { \
1690     vcmppd(dst, src1, src2, imm8);                                         \
1691   }                                                                        \
1692   void instr##pd(SIMDRegister dst, SIMDRegister src1, Operand src2) {      \
1693     vcmppd(dst, src1, src2, imm8);                                         \
1694   }
1695 
1696   AVX_CMP_P(vcmpeq, 0x0, XMMRegister)
1697   AVX_CMP_P(vcmpeq, 0x0, YMMRegister)
1698   AVX_CMP_P(vcmplt, 0x1, XMMRegister)
1699   AVX_CMP_P(vcmplt, 0x1, YMMRegister)
1700   AVX_CMP_P(vcmple, 0x2, XMMRegister)
1701   AVX_CMP_P(vcmple, 0x2, YMMRegister)
1702   AVX_CMP_P(vcmpunord, 0x3, XMMRegister)
1703   AVX_CMP_P(vcmpunord, 0x3, YMMRegister)
1704   AVX_CMP_P(vcmpneq, 0x4, XMMRegister)
1705   AVX_CMP_P(vcmpneq, 0x4, YMMRegister)
1706   AVX_CMP_P(vcmpnlt, 0x5, XMMRegister)
1707   AVX_CMP_P(vcmpnlt, 0x5, YMMRegister)
1708   AVX_CMP_P(vcmpnle, 0x6, XMMRegister)
1709   AVX_CMP_P(vcmpnle, 0x6, YMMRegister)
1710   AVX_CMP_P(vcmpge, 0xd, XMMRegister)
1711   AVX_CMP_P(vcmpge, 0xd, YMMRegister)
1712 
1713 #undef AVX_CMP_P
1714 
vlddqu(XMMRegister dst,Operand src)1715   void vlddqu(XMMRegister dst, Operand src) {
1716     vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG);
1717   }
vinsertps(XMMRegister dst,XMMRegister src1,XMMRegister src2,byte imm8)1718   void vinsertps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1719                  byte imm8) {
1720     vinstr(0x21, dst, src1, src2, k66, k0F3A, kWIG);
1721     emit(imm8);
1722   }
vinsertps(XMMRegister dst,XMMRegister src1,Operand src2,byte imm8)1723   void vinsertps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8) {
1724     vinstr(0x21, dst, src1, src2, k66, k0F3A, kWIG);
1725     emit(imm8);
1726   }
vpextrq(Register dst,XMMRegister src,int8_t imm8)1727   void vpextrq(Register dst, XMMRegister src, int8_t imm8) {
1728     XMMRegister idst = XMMRegister::from_code(dst.code());
1729     vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW1);
1730     emit(imm8);
1731   }
vpinsrb(XMMRegister dst,XMMRegister src1,Register src2,uint8_t imm8)1732   void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
1733     XMMRegister isrc = XMMRegister::from_code(src2.code());
1734     vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0);
1735     emit(imm8);
1736   }
vpinsrb(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t imm8)1737   void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1738     vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0);
1739     emit(imm8);
1740   }
vpinsrw(XMMRegister dst,XMMRegister src1,Register src2,uint8_t imm8)1741   void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
1742     XMMRegister isrc = XMMRegister::from_code(src2.code());
1743     vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0);
1744     emit(imm8);
1745   }
vpinsrw(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t imm8)1746   void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1747     vinstr(0xc4, dst, src1, src2, k66, k0F, kW0);
1748     emit(imm8);
1749   }
vpinsrd(XMMRegister dst,XMMRegister src1,Register src2,uint8_t imm8)1750   void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
1751     XMMRegister isrc = XMMRegister::from_code(src2.code());
1752     vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0);
1753     emit(imm8);
1754   }
vpinsrd(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t imm8)1755   void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1756     vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0);
1757     emit(imm8);
1758   }
vpinsrq(XMMRegister dst,XMMRegister src1,Register src2,uint8_t imm8)1759   void vpinsrq(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
1760     XMMRegister isrc = XMMRegister::from_code(src2.code());
1761     vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW1);
1762     emit(imm8);
1763   }
vpinsrq(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t imm8)1764   void vpinsrq(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1765     vinstr(0x22, dst, src1, src2, k66, k0F3A, kW1);
1766     emit(imm8);
1767   }
1768 
vpshufd(XMMRegister dst,XMMRegister src,uint8_t imm8)1769   void vpshufd(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1770     vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
1771     emit(imm8);
1772   }
vpshufd(YMMRegister dst,YMMRegister src,uint8_t imm8)1773   void vpshufd(YMMRegister dst, YMMRegister src, uint8_t imm8) {
1774     vinstr(0x70, dst, ymm0, src, k66, k0F, kWIG);
1775     emit(imm8);
1776   }
vpshufd(XMMRegister dst,Operand src,uint8_t imm8)1777   void vpshufd(XMMRegister dst, Operand src, uint8_t imm8) {
1778     vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
1779     emit(imm8);
1780   }
vpshufd(YMMRegister dst,Operand src,uint8_t imm8)1781   void vpshufd(YMMRegister dst, Operand src, uint8_t imm8) {
1782     vinstr(0x70, dst, ymm0, src, k66, k0F, kWIG);
1783     emit(imm8);
1784   }
vpshuflw(XMMRegister dst,XMMRegister src,uint8_t imm8)1785   void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1786     vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
1787     emit(imm8);
1788   }
vpshuflw(YMMRegister dst,YMMRegister src,uint8_t imm8)1789   void vpshuflw(YMMRegister dst, YMMRegister src, uint8_t imm8) {
1790     vinstr(0x70, dst, ymm0, src, kF2, k0F, kWIG);
1791     emit(imm8);
1792   }
vpshuflw(XMMRegister dst,Operand src,uint8_t imm8)1793   void vpshuflw(XMMRegister dst, Operand src, uint8_t imm8) {
1794     vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
1795     emit(imm8);
1796   }
vpshuflw(YMMRegister dst,Operand src,uint8_t imm8)1797   void vpshuflw(YMMRegister dst, Operand src, uint8_t imm8) {
1798     vinstr(0x70, dst, ymm0, src, kF2, k0F, kWIG);
1799     emit(imm8);
1800   }
vpshufhw(XMMRegister dst,XMMRegister src,uint8_t imm8)1801   void vpshufhw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1802     vinstr(0x70, dst, xmm0, src, kF3, k0F, kWIG);
1803     emit(imm8);
1804   }
vpshufhw(YMMRegister dst,YMMRegister src,uint8_t imm8)1805   void vpshufhw(YMMRegister dst, YMMRegister src, uint8_t imm8) {
1806     vinstr(0x70, dst, ymm0, src, kF3, k0F, kWIG);
1807     emit(imm8);
1808   }
vpshufhw(XMMRegister dst,Operand src,uint8_t imm8)1809   void vpshufhw(XMMRegister dst, Operand src, uint8_t imm8) {
1810     vinstr(0x70, dst, xmm0, src, kF3, k0F, kWIG);
1811     emit(imm8);
1812   }
vpshufhw(YMMRegister dst,Operand src,uint8_t imm8)1813   void vpshufhw(YMMRegister dst, Operand src, uint8_t imm8) {
1814     vinstr(0x70, dst, ymm0, src, kF3, k0F, kWIG);
1815     emit(imm8);
1816   }
1817 
vpblendw(XMMRegister dst,XMMRegister src1,XMMRegister src2,uint8_t mask)1818   void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1819                 uint8_t mask) {
1820     vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
1821     emit(mask);
1822   }
vpblendw(YMMRegister dst,YMMRegister src1,YMMRegister src2,uint8_t mask)1823   void vpblendw(YMMRegister dst, YMMRegister src1, YMMRegister src2,
1824                 uint8_t mask) {
1825     vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
1826     emit(mask);
1827   }
vpblendw(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t mask)1828   void vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask) {
1829     vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
1830     emit(mask);
1831   }
vpblendw(YMMRegister dst,YMMRegister src1,Operand src2,uint8_t mask)1832   void vpblendw(YMMRegister dst, YMMRegister src1, Operand src2, uint8_t mask) {
1833     vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
1834     emit(mask);
1835   }
1836 
vpalignr(XMMRegister dst,XMMRegister src1,XMMRegister src2,uint8_t imm8)1837   void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1838                 uint8_t imm8) {
1839     vinstr(0x0F, dst, src1, src2, k66, k0F3A, kWIG);
1840     emit(imm8);
1841   }
vpalignr(YMMRegister dst,YMMRegister src1,YMMRegister src2,uint8_t imm8)1842   void vpalignr(YMMRegister dst, YMMRegister src1, YMMRegister src2,
1843                 uint8_t imm8) {
1844     vinstr(0x0F, dst, src1, src2, k66, k0F3A, kWIG);
1845     emit(imm8);
1846   }
vpalignr(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t imm8)1847   void vpalignr(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1848     vinstr(0x0F, dst, src1, src2, k66, k0F3A, kWIG);
1849     emit(imm8);
1850   }
vpalignr(YMMRegister dst,YMMRegister src1,Operand src2,uint8_t imm8)1851   void vpalignr(YMMRegister dst, YMMRegister src1, Operand src2, uint8_t imm8) {
1852     vinstr(0x0F, dst, src1, src2, k66, k0F3A, kWIG);
1853     emit(imm8);
1854   }
1855 
1856   void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1857   void vps(byte op, YMMRegister dst, YMMRegister src1, YMMRegister src2);
1858   void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1859   void vps(byte op, YMMRegister dst, YMMRegister src1, Operand src2);
1860   void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1861            byte imm8);
1862   void vps(byte op, YMMRegister dst, YMMRegister src1, YMMRegister src2,
1863            byte imm8);
1864   void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1865   void vpd(byte op, YMMRegister dst, YMMRegister src1, YMMRegister src2);
1866   void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1867   void vpd(byte op, YMMRegister dst, YMMRegister src1, Operand src2);
1868 
1869   // AVX2 instructions
1870 #define AVX2_INSTRUCTION(instr, prefix, escape1, escape2, opcode)           \
1871   template <typename Reg, typename Op>                                      \
1872   void instr(Reg dst, Op src) {                                             \
1873     vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0, \
1874            AVX2);                                                           \
1875   }
AVX2_BROADCAST_LIST(AVX2_INSTRUCTION)1876   AVX2_BROADCAST_LIST(AVX2_INSTRUCTION)
1877 #undef AVX2_INSTRUCTION
1878 
1879   // BMI instruction
1880   void andnq(Register dst, Register src1, Register src2) {
1881     bmi1q(0xf2, dst, src1, src2);
1882   }
andnq(Register dst,Register src1,Operand src2)1883   void andnq(Register dst, Register src1, Operand src2) {
1884     bmi1q(0xf2, dst, src1, src2);
1885   }
andnl(Register dst,Register src1,Register src2)1886   void andnl(Register dst, Register src1, Register src2) {
1887     bmi1l(0xf2, dst, src1, src2);
1888   }
andnl(Register dst,Register src1,Operand src2)1889   void andnl(Register dst, Register src1, Operand src2) {
1890     bmi1l(0xf2, dst, src1, src2);
1891   }
bextrq(Register dst,Register src1,Register src2)1892   void bextrq(Register dst, Register src1, Register src2) {
1893     bmi1q(0xf7, dst, src2, src1);
1894   }
bextrq(Register dst,Operand src1,Register src2)1895   void bextrq(Register dst, Operand src1, Register src2) {
1896     bmi1q(0xf7, dst, src2, src1);
1897   }
bextrl(Register dst,Register src1,Register src2)1898   void bextrl(Register dst, Register src1, Register src2) {
1899     bmi1l(0xf7, dst, src2, src1);
1900   }
bextrl(Register dst,Operand src1,Register src2)1901   void bextrl(Register dst, Operand src1, Register src2) {
1902     bmi1l(0xf7, dst, src2, src1);
1903   }
blsiq(Register dst,Register src)1904   void blsiq(Register dst, Register src) { bmi1q(0xf3, rbx, dst, src); }
blsiq(Register dst,Operand src)1905   void blsiq(Register dst, Operand src) { bmi1q(0xf3, rbx, dst, src); }
blsil(Register dst,Register src)1906   void blsil(Register dst, Register src) { bmi1l(0xf3, rbx, dst, src); }
blsil(Register dst,Operand src)1907   void blsil(Register dst, Operand src) { bmi1l(0xf3, rbx, dst, src); }
blsmskq(Register dst,Register src)1908   void blsmskq(Register dst, Register src) { bmi1q(0xf3, rdx, dst, src); }
blsmskq(Register dst,Operand src)1909   void blsmskq(Register dst, Operand src) { bmi1q(0xf3, rdx, dst, src); }
blsmskl(Register dst,Register src)1910   void blsmskl(Register dst, Register src) { bmi1l(0xf3, rdx, dst, src); }
blsmskl(Register dst,Operand src)1911   void blsmskl(Register dst, Operand src) { bmi1l(0xf3, rdx, dst, src); }
blsrq(Register dst,Register src)1912   void blsrq(Register dst, Register src) { bmi1q(0xf3, rcx, dst, src); }
blsrq(Register dst,Operand src)1913   void blsrq(Register dst, Operand src) { bmi1q(0xf3, rcx, dst, src); }
blsrl(Register dst,Register src)1914   void blsrl(Register dst, Register src) { bmi1l(0xf3, rcx, dst, src); }
blsrl(Register dst,Operand src)1915   void blsrl(Register dst, Operand src) { bmi1l(0xf3, rcx, dst, src); }
1916   void tzcntq(Register dst, Register src);
1917   void tzcntq(Register dst, Operand src);
1918   void tzcntl(Register dst, Register src);
1919   void tzcntl(Register dst, Operand src);
1920 
1921   void lzcntq(Register dst, Register src);
1922   void lzcntq(Register dst, Operand src);
1923   void lzcntl(Register dst, Register src);
1924   void lzcntl(Register dst, Operand src);
1925 
1926   void popcntq(Register dst, Register src);
1927   void popcntq(Register dst, Operand src);
1928   void popcntl(Register dst, Register src);
1929   void popcntl(Register dst, Operand src);
1930 
bzhiq(Register dst,Register src1,Register src2)1931   void bzhiq(Register dst, Register src1, Register src2) {
1932     bmi2q(kNoPrefix, 0xf5, dst, src2, src1);
1933   }
bzhiq(Register dst,Operand src1,Register src2)1934   void bzhiq(Register dst, Operand src1, Register src2) {
1935     bmi2q(kNoPrefix, 0xf5, dst, src2, src1);
1936   }
bzhil(Register dst,Register src1,Register src2)1937   void bzhil(Register dst, Register src1, Register src2) {
1938     bmi2l(kNoPrefix, 0xf5, dst, src2, src1);
1939   }
bzhil(Register dst,Operand src1,Register src2)1940   void bzhil(Register dst, Operand src1, Register src2) {
1941     bmi2l(kNoPrefix, 0xf5, dst, src2, src1);
1942   }
mulxq(Register dst1,Register dst2,Register src)1943   void mulxq(Register dst1, Register dst2, Register src) {
1944     bmi2q(kF2, 0xf6, dst1, dst2, src);
1945   }
mulxq(Register dst1,Register dst2,Operand src)1946   void mulxq(Register dst1, Register dst2, Operand src) {
1947     bmi2q(kF2, 0xf6, dst1, dst2, src);
1948   }
mulxl(Register dst1,Register dst2,Register src)1949   void mulxl(Register dst1, Register dst2, Register src) {
1950     bmi2l(kF2, 0xf6, dst1, dst2, src);
1951   }
mulxl(Register dst1,Register dst2,Operand src)1952   void mulxl(Register dst1, Register dst2, Operand src) {
1953     bmi2l(kF2, 0xf6, dst1, dst2, src);
1954   }
pdepq(Register dst,Register src1,Register src2)1955   void pdepq(Register dst, Register src1, Register src2) {
1956     bmi2q(kF2, 0xf5, dst, src1, src2);
1957   }
pdepq(Register dst,Register src1,Operand src2)1958   void pdepq(Register dst, Register src1, Operand src2) {
1959     bmi2q(kF2, 0xf5, dst, src1, src2);
1960   }
pdepl(Register dst,Register src1,Register src2)1961   void pdepl(Register dst, Register src1, Register src2) {
1962     bmi2l(kF2, 0xf5, dst, src1, src2);
1963   }
pdepl(Register dst,Register src1,Operand src2)1964   void pdepl(Register dst, Register src1, Operand src2) {
1965     bmi2l(kF2, 0xf5, dst, src1, src2);
1966   }
pextq(Register dst,Register src1,Register src2)1967   void pextq(Register dst, Register src1, Register src2) {
1968     bmi2q(kF3, 0xf5, dst, src1, src2);
1969   }
pextq(Register dst,Register src1,Operand src2)1970   void pextq(Register dst, Register src1, Operand src2) {
1971     bmi2q(kF3, 0xf5, dst, src1, src2);
1972   }
pextl(Register dst,Register src1,Register src2)1973   void pextl(Register dst, Register src1, Register src2) {
1974     bmi2l(kF3, 0xf5, dst, src1, src2);
1975   }
pextl(Register dst,Register src1,Operand src2)1976   void pextl(Register dst, Register src1, Operand src2) {
1977     bmi2l(kF3, 0xf5, dst, src1, src2);
1978   }
sarxq(Register dst,Register src1,Register src2)1979   void sarxq(Register dst, Register src1, Register src2) {
1980     bmi2q(kF3, 0xf7, dst, src2, src1);
1981   }
sarxq(Register dst,Operand src1,Register src2)1982   void sarxq(Register dst, Operand src1, Register src2) {
1983     bmi2q(kF3, 0xf7, dst, src2, src1);
1984   }
sarxl(Register dst,Register src1,Register src2)1985   void sarxl(Register dst, Register src1, Register src2) {
1986     bmi2l(kF3, 0xf7, dst, src2, src1);
1987   }
sarxl(Register dst,Operand src1,Register src2)1988   void sarxl(Register dst, Operand src1, Register src2) {
1989     bmi2l(kF3, 0xf7, dst, src2, src1);
1990   }
shlxq(Register dst,Register src1,Register src2)1991   void shlxq(Register dst, Register src1, Register src2) {
1992     bmi2q(k66, 0xf7, dst, src2, src1);
1993   }
shlxq(Register dst,Operand src1,Register src2)1994   void shlxq(Register dst, Operand src1, Register src2) {
1995     bmi2q(k66, 0xf7, dst, src2, src1);
1996   }
shlxl(Register dst,Register src1,Register src2)1997   void shlxl(Register dst, Register src1, Register src2) {
1998     bmi2l(k66, 0xf7, dst, src2, src1);
1999   }
shlxl(Register dst,Operand src1,Register src2)2000   void shlxl(Register dst, Operand src1, Register src2) {
2001     bmi2l(k66, 0xf7, dst, src2, src1);
2002   }
shrxq(Register dst,Register src1,Register src2)2003   void shrxq(Register dst, Register src1, Register src2) {
2004     bmi2q(kF2, 0xf7, dst, src2, src1);
2005   }
shrxq(Register dst,Operand src1,Register src2)2006   void shrxq(Register dst, Operand src1, Register src2) {
2007     bmi2q(kF2, 0xf7, dst, src2, src1);
2008   }
shrxl(Register dst,Register src1,Register src2)2009   void shrxl(Register dst, Register src1, Register src2) {
2010     bmi2l(kF2, 0xf7, dst, src2, src1);
2011   }
shrxl(Register dst,Operand src1,Register src2)2012   void shrxl(Register dst, Operand src1, Register src2) {
2013     bmi2l(kF2, 0xf7, dst, src2, src1);
2014   }
2015   void rorxq(Register dst, Register src, byte imm8);
2016   void rorxq(Register dst, Operand src, byte imm8);
2017   void rorxl(Register dst, Register src, byte imm8);
2018   void rorxl(Register dst, Operand src, byte imm8);
2019 
2020   void mfence();
2021   void lfence();
2022   void pause();
2023 
2024   // Check the code size generated from label to here.
SizeOfCodeGeneratedSince(Label * label)2025   int SizeOfCodeGeneratedSince(Label* label) {
2026     return pc_offset() - label->pos();
2027   }
2028 
2029   // Record a deoptimization reason that can be used by a log or cpu profiler.
2030   // Use --trace-deopt to enable.
2031   void RecordDeoptReason(DeoptimizeReason reason, uint32_t node_id,
2032                          SourcePosition position, int id);
2033 
2034   // Writes a single word of data in the code stream.
2035   // Used for inline tables, e.g., jump-tables.
2036   void db(uint8_t data);
2037   void dd(uint32_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO);
2038   void dq(uint64_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO);
2039   void dp(uintptr_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO) {
2040     dq(data, rmode);
2041   }
2042   void dq(Label* label);
2043 
2044   // Patch entries for partial constant pool.
2045   void PatchConstPool();
2046 
2047   // Check if use partial constant pool for this rmode.
2048   static bool UseConstPoolFor(RelocInfo::Mode rmode);
2049 
2050   // Check if there is less than kGap bytes available in the buffer.
2051   // If this is the case, we need to grow the buffer before emitting
2052   // an instruction or relocation information.
buffer_overflow()2053   inline bool buffer_overflow() const {
2054     return pc_ >= reloc_info_writer.pos() - kGap;
2055   }
2056 
2057   // Get the number of bytes available in the buffer.
available_space()2058   inline int available_space() const {
2059     return static_cast<int>(reloc_info_writer.pos() - pc_);
2060   }
2061 
2062   static bool IsNop(Address addr);
2063 
2064   // Avoid overflows for displacements etc.
2065   static constexpr int kMaximalBufferSize = 512 * MB;
2066 
byte_at(int pos)2067   byte byte_at(int pos) { return buffer_start_[pos]; }
set_byte_at(int pos,byte value)2068   void set_byte_at(int pos, byte value) { buffer_start_[pos] = value; }
2069 
2070 #if defined(V8_OS_WIN_X64)
2071   win64_unwindinfo::BuiltinUnwindInfo GetUnwindInfo() const;
2072 #endif
2073 
2074  protected:
2075   // Call near indirect
2076   void call(Operand operand);
2077 
2078  private:
addr_at(int pos)2079   Address addr_at(int pos) {
2080     return reinterpret_cast<Address>(buffer_start_ + pos);
2081   }
long_at(int pos)2082   uint32_t long_at(int pos) {
2083     return ReadUnalignedValue<uint32_t>(addr_at(pos));
2084   }
long_at_put(int pos,uint32_t x)2085   void long_at_put(int pos, uint32_t x) {
2086     WriteUnalignedValue(addr_at(pos), x);
2087   }
2088 
2089   // code emission
2090   void GrowBuffer();
2091 
emit(byte x)2092   void emit(byte x) { *pc_++ = x; }
2093   inline void emitl(uint32_t x);
2094   inline void emitq(uint64_t x);
2095   inline void emitw(uint16_t x);
2096   inline void emit_runtime_entry(Address entry, RelocInfo::Mode rmode);
2097   inline void emit(Immediate x);
2098   inline void emit(Immediate64 x);
2099 
2100   // Emits a REX prefix that encodes a 64-bit operand size and
2101   // the top bit of both register codes.
2102   // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2103   // REX.W is set.
2104   inline void emit_rex_64(XMMRegister reg, Register rm_reg);
2105   inline void emit_rex_64(Register reg, XMMRegister rm_reg);
2106   inline void emit_rex_64(Register reg, Register rm_reg);
2107   inline void emit_rex_64(XMMRegister reg, XMMRegister rm_reg);
2108 
2109   // Emits a REX prefix that encodes a 64-bit operand size and
2110   // the top bit of the destination, index, and base register codes.
2111   // The high bit of reg is used for REX.R, the high bit of op's base
2112   // register is used for REX.B, and the high bit of op's index register
2113   // is used for REX.X.  REX.W is set.
2114   inline void emit_rex_64(Register reg, Operand op);
2115   inline void emit_rex_64(XMMRegister reg, Operand op);
2116 
2117   // Emits a REX prefix that encodes a 64-bit operand size and
2118   // the top bit of the register code.
2119   // The high bit of register is used for REX.B.
2120   // REX.W is set and REX.R and REX.X are clear.
2121   inline void emit_rex_64(Register rm_reg);
2122 
2123   // Emits a REX prefix that encodes a 64-bit operand size and
2124   // the top bit of the index and base register codes.
2125   // The high bit of op's base register is used for REX.B, and the high
2126   // bit of op's index register is used for REX.X.
2127   // REX.W is set and REX.R clear.
2128   inline void emit_rex_64(Operand op);
2129 
2130   // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size.
emit_rex_64()2131   void emit_rex_64() { emit(0x48); }
2132 
2133   // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2134   // REX.W is clear.
2135   inline void emit_rex_32(Register reg, Register rm_reg);
2136 
2137   // The high bit of reg is used for REX.R, the high bit of op's base
2138   // register is used for REX.B, and the high bit of op's index register
2139   // is used for REX.X.  REX.W is cleared.
2140   inline void emit_rex_32(Register reg, Operand op);
2141 
2142   // High bit of rm_reg goes to REX.B.
2143   // REX.W, REX.R and REX.X are clear.
2144   inline void emit_rex_32(Register rm_reg);
2145 
2146   // High bit of base goes to REX.B and high bit of index to REX.X.
2147   // REX.W and REX.R are clear.
2148   inline void emit_rex_32(Operand op);
2149 
2150   // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2151   // REX.W is cleared.  If no REX bits are set, no byte is emitted.
2152   inline void emit_optional_rex_32(Register reg, Register rm_reg);
2153 
2154   // The high bit of reg is used for REX.R, the high bit of op's base
2155   // register is used for REX.B, and the high bit of op's index register
2156   // is used for REX.X.  REX.W is cleared.  If no REX bits are set, nothing
2157   // is emitted.
2158   inline void emit_optional_rex_32(Register reg, Operand op);
2159 
2160   // As for emit_optional_rex_32(Register, Register), except that
2161   // the registers are XMM registers.
2162   inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base);
2163 
2164   // As for emit_optional_rex_32(Register, Register), except that
2165   // one of the registers is an XMM registers.
2166   inline void emit_optional_rex_32(XMMRegister reg, Register base);
2167 
2168   // As for emit_optional_rex_32(Register, Register), except that
2169   // one of the registers is an XMM registers.
2170   inline void emit_optional_rex_32(Register reg, XMMRegister base);
2171 
2172   // As for emit_optional_rex_32(Register, Operand), except that
2173   // the register is an XMM register.
2174   inline void emit_optional_rex_32(XMMRegister reg, Operand op);
2175 
2176   // Optionally do as emit_rex_32(Register) if the register number has
2177   // the high bit set.
2178   inline void emit_optional_rex_32(Register rm_reg);
2179   inline void emit_optional_rex_32(XMMRegister rm_reg);
2180 
2181   // Optionally do as emit_rex_32(Operand) if the operand register
2182   // numbers have a high bit set.
2183   inline void emit_optional_rex_32(Operand op);
2184 
2185   // Calls emit_rex_32(Register) for all non-byte registers.
2186   inline void emit_optional_rex_8(Register reg);
2187 
2188   // Calls emit_rex_32(Register, Operand) for all non-byte registers, and
2189   // emit_optional_rex_32(Register, Operand) for byte registers.
2190   inline void emit_optional_rex_8(Register reg, Operand op);
2191 
emit_rex(int size)2192   void emit_rex(int size) {
2193     if (size == kInt64Size) {
2194       emit_rex_64();
2195     } else {
2196       DCHECK_EQ(size, kInt32Size);
2197     }
2198   }
2199 
2200   template <class P1>
emit_rex(P1 p1,int size)2201   void emit_rex(P1 p1, int size) {
2202     if (size == kInt64Size) {
2203       emit_rex_64(p1);
2204     } else {
2205       DCHECK_EQ(size, kInt32Size);
2206       emit_optional_rex_32(p1);
2207     }
2208   }
2209 
2210   template <class P1, class P2>
emit_rex(P1 p1,P2 p2,int size)2211   void emit_rex(P1 p1, P2 p2, int size) {
2212     if (size == kInt64Size) {
2213       emit_rex_64(p1, p2);
2214     } else {
2215       DCHECK_EQ(size, kInt32Size);
2216       emit_optional_rex_32(p1, p2);
2217     }
2218   }
2219 
2220   // Emit vex prefix
emit_vex2_byte0()2221   void emit_vex2_byte0() { emit(0xc5); }
2222   inline void emit_vex2_byte1(XMMRegister reg, XMMRegister v, VectorLength l,
2223                               SIMDPrefix pp);
emit_vex3_byte0()2224   void emit_vex3_byte0() { emit(0xc4); }
2225   inline void emit_vex3_byte1(XMMRegister reg, XMMRegister rm, LeadingOpcode m);
2226   inline void emit_vex3_byte1(XMMRegister reg, Operand rm, LeadingOpcode m);
2227   inline void emit_vex3_byte2(VexW w, XMMRegister v, VectorLength l,
2228                               SIMDPrefix pp);
2229   inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, XMMRegister rm,
2230                               VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2231                               VexW w);
2232   inline void emit_vex_prefix(Register reg, Register v, Register rm,
2233                               VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2234                               VexW w);
2235   inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, Operand rm,
2236                               VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2237                               VexW w);
2238   inline void emit_vex_prefix(Register reg, Register v, Operand rm,
2239                               VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2240                               VexW w);
2241 
2242   // Emit the ModR/M byte, and optionally the SIB byte and
2243   // 1- or 4-byte offset for a memory operand.  Also encodes
2244   // the second operand of the operation, a register or operation
2245   // subcode, into the reg field of the ModR/M byte.
emit_operand(Register reg,Operand adr)2246   void emit_operand(Register reg, Operand adr) {
2247     emit_operand(reg.low_bits(), adr);
2248   }
2249 
2250   // Emit the ModR/M byte, and optionally the SIB byte and
2251   // 1- or 4-byte offset for a memory operand.  Also used to encode
2252   // a three-bit opcode extension into the ModR/M byte.
2253   void emit_operand(int rm, Operand adr);
2254 
2255   // Emit a ModR/M byte with registers coded in the reg and rm_reg fields.
emit_modrm(Register reg,Register rm_reg)2256   void emit_modrm(Register reg, Register rm_reg) {
2257     emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits());
2258   }
2259 
2260   // Emit a ModR/M byte with an operation subcode in the reg field and
2261   // a register in the rm_reg field.
emit_modrm(int code,Register rm_reg)2262   void emit_modrm(int code, Register rm_reg) {
2263     DCHECK(is_uint3(code));
2264     emit(0xC0 | code << 3 | rm_reg.low_bits());
2265   }
2266 
2267   // Emit the code-object-relative offset of the label's position
2268   inline void emit_code_relative_offset(Label* label);
2269 
2270   // The first argument is the reg field, the second argument is the r/m field.
2271   void emit_sse_operand(XMMRegister dst, XMMRegister src);
2272   void emit_sse_operand(XMMRegister reg, Operand adr);
2273   void emit_sse_operand(Register reg, Operand adr);
2274   void emit_sse_operand(XMMRegister dst, Register src);
2275   void emit_sse_operand(Register dst, XMMRegister src);
2276   void emit_sse_operand(XMMRegister dst);
2277 
2278   // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
2279   // AND, OR, XOR, or CMP.  The encodings of these operations are all
2280   // similar, differing just in the opcode or in the reg field of the
2281   // ModR/M byte.
2282   void arithmetic_op_8(byte opcode, Register reg, Register rm_reg);
2283   void arithmetic_op_8(byte opcode, Register reg, Operand rm_reg);
2284   void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
2285   void arithmetic_op_16(byte opcode, Register reg, Operand rm_reg);
2286   // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2287   void arithmetic_op(byte opcode, Register reg, Register rm_reg, int size);
2288   void arithmetic_op(byte opcode, Register reg, Operand rm_reg, int size);
2289   // Operate on a byte in memory or register.
2290   void immediate_arithmetic_op_8(byte subcode, Register dst, Immediate src);
2291   void immediate_arithmetic_op_8(byte subcode, Operand dst, Immediate src);
2292   // Operate on a word in memory or register.
2293   void immediate_arithmetic_op_16(byte subcode, Register dst, Immediate src);
2294   void immediate_arithmetic_op_16(byte subcode, Operand dst, Immediate src);
2295   // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2296   void immediate_arithmetic_op(byte subcode, Register dst, Immediate src,
2297                                int size);
2298   void immediate_arithmetic_op(byte subcode, Operand dst, Immediate src,
2299                                int size);
2300 
2301   // Emit machine code for a shift operation.
2302   void shift(Operand dst, Immediate shift_amount, int subcode, int size);
2303   void shift(Register dst, Immediate shift_amount, int subcode, int size);
2304   // Shift dst by cl % 64 bits.
2305   void shift(Register dst, int subcode, int size);
2306   void shift(Operand dst, int subcode, int size);
2307 
2308   void emit_farith(int b1, int b2, int i);
2309 
2310   // labels
2311   // void print(Label* L);
2312   void bind_to(Label* L, int pos);
2313 
2314   // record reloc info for current pc_
2315   void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
2316 
2317   // Arithmetics
emit_add(Register dst,Register src,int size)2318   void emit_add(Register dst, Register src, int size) {
2319     arithmetic_op(0x03, dst, src, size);
2320   }
2321 
emit_add(Register dst,Immediate src,int size)2322   void emit_add(Register dst, Immediate src, int size) {
2323     immediate_arithmetic_op(0x0, dst, src, size);
2324   }
2325 
emit_add(Register dst,Operand src,int size)2326   void emit_add(Register dst, Operand src, int size) {
2327     arithmetic_op(0x03, dst, src, size);
2328   }
2329 
emit_add(Operand dst,Register src,int size)2330   void emit_add(Operand dst, Register src, int size) {
2331     arithmetic_op(0x1, src, dst, size);
2332   }
2333 
emit_add(Operand dst,Immediate src,int size)2334   void emit_add(Operand dst, Immediate src, int size) {
2335     immediate_arithmetic_op(0x0, dst, src, size);
2336   }
2337 
emit_and(Register dst,Register src,int size)2338   void emit_and(Register dst, Register src, int size) {
2339     arithmetic_op(0x23, dst, src, size);
2340   }
2341 
emit_and(Register dst,Operand src,int size)2342   void emit_and(Register dst, Operand src, int size) {
2343     arithmetic_op(0x23, dst, src, size);
2344   }
2345 
emit_and(Operand dst,Register src,int size)2346   void emit_and(Operand dst, Register src, int size) {
2347     arithmetic_op(0x21, src, dst, size);
2348   }
2349 
emit_and(Register dst,Immediate src,int size)2350   void emit_and(Register dst, Immediate src, int size) {
2351     immediate_arithmetic_op(0x4, dst, src, size);
2352   }
2353 
emit_and(Operand dst,Immediate src,int size)2354   void emit_and(Operand dst, Immediate src, int size) {
2355     immediate_arithmetic_op(0x4, dst, src, size);
2356   }
2357 
emit_cmp(Register dst,Register src,int size)2358   void emit_cmp(Register dst, Register src, int size) {
2359     arithmetic_op(0x3B, dst, src, size);
2360   }
2361 
emit_cmp(Register dst,Operand src,int size)2362   void emit_cmp(Register dst, Operand src, int size) {
2363     arithmetic_op(0x3B, dst, src, size);
2364   }
2365 
emit_cmp(Operand dst,Register src,int size)2366   void emit_cmp(Operand dst, Register src, int size) {
2367     arithmetic_op(0x39, src, dst, size);
2368   }
2369 
emit_cmp(Register dst,Immediate src,int size)2370   void emit_cmp(Register dst, Immediate src, int size) {
2371     immediate_arithmetic_op(0x7, dst, src, size);
2372   }
2373 
emit_cmp(Operand dst,Immediate src,int size)2374   void emit_cmp(Operand dst, Immediate src, int size) {
2375     immediate_arithmetic_op(0x7, dst, src, size);
2376   }
2377 
2378   // Compare {al,ax,eax,rax} with src.  If equal, set ZF and write dst into
2379   // src. Otherwise clear ZF and write src into {al,ax,eax,rax}.  This
2380   // operation is only atomic if prefixed by the lock instruction.
2381   void emit_cmpxchg(Operand dst, Register src, int size);
2382 
2383   void emit_dec(Register dst, int size);
2384   void emit_dec(Operand dst, int size);
2385 
2386   // Divide rdx:rax by src.  Quotient in rax, remainder in rdx when size is 64.
2387   // Divide edx:eax by lower 32 bits of src.  Quotient in eax, remainder in edx
2388   // when size is 32.
2389   void emit_idiv(Register src, int size);
2390   void emit_div(Register src, int size);
2391 
2392   // Signed multiply instructions.
2393   // rdx:rax = rax * src when size is 64 or edx:eax = eax * src when size is 32.
2394   void emit_imul(Register src, int size);
2395   void emit_imul(Operand src, int size);
2396   void emit_imul(Register dst, Register src, int size);
2397   void emit_imul(Register dst, Operand src, int size);
2398   void emit_imul(Register dst, Register src, Immediate imm, int size);
2399   void emit_imul(Register dst, Operand src, Immediate imm, int size);
2400 
2401   void emit_inc(Register dst, int size);
2402   void emit_inc(Operand dst, int size);
2403 
2404   void emit_lea(Register dst, Operand src, int size);
2405 
2406   void emit_mov(Register dst, Operand src, int size);
2407   void emit_mov(Register dst, Register src, int size);
2408   void emit_mov(Operand dst, Register src, int size);
2409   void emit_mov(Register dst, Immediate value, int size);
2410   void emit_mov(Operand dst, Immediate value, int size);
2411   void emit_mov(Register dst, Immediate64 value, int size);
2412 
2413   void emit_movzxb(Register dst, Operand src, int size);
2414   void emit_movzxb(Register dst, Register src, int size);
2415   void emit_movzxw(Register dst, Operand src, int size);
2416   void emit_movzxw(Register dst, Register src, int size);
2417 
2418   void emit_neg(Register dst, int size);
2419   void emit_neg(Operand dst, int size);
2420 
2421   void emit_not(Register dst, int size);
2422   void emit_not(Operand dst, int size);
2423 
emit_or(Register dst,Register src,int size)2424   void emit_or(Register dst, Register src, int size) {
2425     arithmetic_op(0x0B, dst, src, size);
2426   }
2427 
emit_or(Register dst,Operand src,int size)2428   void emit_or(Register dst, Operand src, int size) {
2429     arithmetic_op(0x0B, dst, src, size);
2430   }
2431 
emit_or(Operand dst,Register src,int size)2432   void emit_or(Operand dst, Register src, int size) {
2433     arithmetic_op(0x9, src, dst, size);
2434   }
2435 
emit_or(Register dst,Immediate src,int size)2436   void emit_or(Register dst, Immediate src, int size) {
2437     immediate_arithmetic_op(0x1, dst, src, size);
2438   }
2439 
emit_or(Operand dst,Immediate src,int size)2440   void emit_or(Operand dst, Immediate src, int size) {
2441     immediate_arithmetic_op(0x1, dst, src, size);
2442   }
2443 
2444   void emit_repmovs(int size);
2445 
emit_sbb(Register dst,Register src,int size)2446   void emit_sbb(Register dst, Register src, int size) {
2447     arithmetic_op(0x1b, dst, src, size);
2448   }
2449 
emit_sub(Register dst,Register src,int size)2450   void emit_sub(Register dst, Register src, int size) {
2451     arithmetic_op(0x2B, dst, src, size);
2452   }
2453 
emit_sub(Register dst,Immediate src,int size)2454   void emit_sub(Register dst, Immediate src, int size) {
2455     immediate_arithmetic_op(0x5, dst, src, size);
2456   }
2457 
emit_sub(Register dst,Operand src,int size)2458   void emit_sub(Register dst, Operand src, int size) {
2459     arithmetic_op(0x2B, dst, src, size);
2460   }
2461 
emit_sub(Operand dst,Register src,int size)2462   void emit_sub(Operand dst, Register src, int size) {
2463     arithmetic_op(0x29, src, dst, size);
2464   }
2465 
emit_sub(Operand dst,Immediate src,int size)2466   void emit_sub(Operand dst, Immediate src, int size) {
2467     immediate_arithmetic_op(0x5, dst, src, size);
2468   }
2469 
2470   void emit_test(Register dst, Register src, int size);
2471   void emit_test(Register reg, Immediate mask, int size);
2472   void emit_test(Operand op, Register reg, int size);
2473   void emit_test(Operand op, Immediate mask, int size);
emit_test(Register reg,Operand op,int size)2474   void emit_test(Register reg, Operand op, int size) {
2475     return emit_test(op, reg, size);
2476   }
2477 
2478   void emit_xchg(Register dst, Register src, int size);
2479   void emit_xchg(Register dst, Operand src, int size);
2480 
emit_xor(Register dst,Register src,int size)2481   void emit_xor(Register dst, Register src, int size) {
2482     if (size == kInt64Size && dst.code() == src.code()) {
2483       // 32 bit operations zero the top 32 bits of 64 bit registers. Therefore
2484       // there is no need to make this a 64 bit operation.
2485       arithmetic_op(0x33, dst, src, kInt32Size);
2486     } else {
2487       arithmetic_op(0x33, dst, src, size);
2488     }
2489   }
2490 
emit_xor(Register dst,Operand src,int size)2491   void emit_xor(Register dst, Operand src, int size) {
2492     arithmetic_op(0x33, dst, src, size);
2493   }
2494 
emit_xor(Register dst,Immediate src,int size)2495   void emit_xor(Register dst, Immediate src, int size) {
2496     immediate_arithmetic_op(0x6, dst, src, size);
2497   }
2498 
emit_xor(Operand dst,Immediate src,int size)2499   void emit_xor(Operand dst, Immediate src, int size) {
2500     immediate_arithmetic_op(0x6, dst, src, size);
2501   }
2502 
emit_xor(Operand dst,Register src,int size)2503   void emit_xor(Operand dst, Register src, int size) {
2504     arithmetic_op(0x31, src, dst, size);
2505   }
2506 
2507   // Most BMI instructions are similar.
2508   void bmi1q(byte op, Register reg, Register vreg, Register rm);
2509   void bmi1q(byte op, Register reg, Register vreg, Operand rm);
2510   void bmi1l(byte op, Register reg, Register vreg, Register rm);
2511   void bmi1l(byte op, Register reg, Register vreg, Operand rm);
2512   void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2513   void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm);
2514   void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2515   void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm);
2516 
2517   // record the position of jmp/jcc instruction
2518   void record_farjmp_position(Label* L, int pos);
2519 
2520   bool is_optimizable_farjmp(int idx);
2521 
2522   void AllocateAndInstallRequestedHeapObjects(Isolate* isolate);
2523 
2524   int WriteCodeComments();
2525 
2526   friend class EnsureSpace;
2527   friend class RegExpMacroAssemblerX64;
2528 
2529   // code generation
2530   RelocInfoWriter reloc_info_writer;
2531 
2532   // Internal reference positions, required for (potential) patching in
2533   // GrowBuffer(); contains only those internal references whose labels
2534   // are already bound.
2535   std::deque<int> internal_reference_positions_;
2536 
2537   // Variables for this instance of assembler
2538   int farjmp_num_ = 0;
2539   std::deque<int> farjmp_positions_;
2540   std::map<Label*, std::vector<int>> label_farjmp_maps_;
2541 
2542   ConstPool constpool_;
2543 
2544   friend class ConstPool;
2545 
2546 #if defined(V8_OS_WIN_X64)
2547   std::unique_ptr<win64_unwindinfo::XdataEncoder> xdata_encoder_;
2548 #endif
2549 };
2550 
2551 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
2552 void Assembler::vinstr(byte op, YMMRegister dst, YMMRegister src1,
2553                        YMMRegister src2, SIMDPrefix pp,
2554                        LeadingOpcode m, VexW w, CpuFeature feature);
2555 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
2556 void Assembler::vinstr(byte op, YMMRegister dst, XMMRegister src1,
2557                        XMMRegister src2, SIMDPrefix pp,
2558                        LeadingOpcode m, VexW w, CpuFeature feature);
2559 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
2560 void Assembler::vinstr(byte op, YMMRegister dst, YMMRegister src1,
2561                        Operand src2, SIMDPrefix pp, LeadingOpcode m,
2562                        VexW w, CpuFeature feature);
2563 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
2564 void Assembler::vinstr(byte op, YMMRegister dst, YMMRegister src1,
2565                        XMMRegister src2, SIMDPrefix pp,
2566                        LeadingOpcode m, VexW w, CpuFeature feature);
2567 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
2568 void Assembler::vinstr(byte op, YMMRegister dst, XMMRegister src1,
2569                        Operand src2, SIMDPrefix pp, LeadingOpcode m,
2570                        VexW w, CpuFeature feature);
2571 
2572 // Helper class that ensures that there is enough space for generating
2573 // instructions and relocation information.  The constructor makes
2574 // sure that there is enough space and (in debug mode) the destructor
2575 // checks that we did not generate too much.
2576 class EnsureSpace {
2577  public:
EnsureSpace(Assembler * assembler)2578   explicit V8_INLINE EnsureSpace(Assembler* assembler) : assembler_(assembler) {
2579     if (V8_UNLIKELY(assembler_->buffer_overflow())) assembler_->GrowBuffer();
2580 #ifdef DEBUG
2581     space_before_ = assembler_->available_space();
2582 #endif
2583   }
2584 
2585 #ifdef DEBUG
~EnsureSpace()2586   ~EnsureSpace() {
2587     int bytes_generated = space_before_ - assembler_->available_space();
2588     DCHECK(bytes_generated < assembler_->kGap);
2589   }
2590 #endif
2591 
2592  private:
2593   Assembler* const assembler_;
2594 #ifdef DEBUG
2595   int space_before_;
2596 #endif
2597 };
2598 
2599 }  // namespace internal
2600 }  // namespace v8
2601 
2602 #endif  // V8_CODEGEN_X64_ASSEMBLER_X64_H_
2603