• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 //
11 // - Redistribution in binary form must reproduce the above copyright
12 // notice, this list of conditions and the following disclaimer in the
13 // documentation and/or other materials provided with the distribution.
14 //
15 // - Neither the name of Sun Microsystems or the names of contributors may
16 // be used to endorse or promote products derived from this software without
17 // specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // The original source code covered by the above license above has been
32 // modified significantly by Google Inc.
33 // Copyright 2011 the V8 project authors. All rights reserved.
34 
35 // A light-weight IA32 Assembler.
36 
37 #ifndef V8_CODEGEN_IA32_ASSEMBLER_IA32_H_
38 #define V8_CODEGEN_IA32_ASSEMBLER_IA32_H_
39 
40 #include <deque>
41 #include <memory>
42 
43 #include "src/codegen/assembler.h"
44 #include "src/codegen/ia32/constants-ia32.h"
45 #include "src/codegen/ia32/fma-instr.h"
46 #include "src/codegen/ia32/register-ia32.h"
47 #include "src/codegen/ia32/sse-instr.h"
48 #include "src/codegen/label.h"
49 #include "src/execution/isolate.h"
50 #include "src/objects/smi.h"
51 #include "src/utils/utils.h"
52 
53 namespace v8 {
54 namespace internal {
55 
56 class SafepointTableBuilder;
57 
58 enum Condition {
59   // any value < 0 is considered no_condition
60   no_condition = -1,
61 
62   overflow = 0,
63   no_overflow = 1,
64   below = 2,
65   above_equal = 3,
66   equal = 4,
67   not_equal = 5,
68   below_equal = 6,
69   above = 7,
70   negative = 8,
71   positive = 9,
72   parity_even = 10,
73   parity_odd = 11,
74   less = 12,
75   greater_equal = 13,
76   less_equal = 14,
77   greater = 15,
78 
79   // aliases
80   carry = below,
81   not_carry = above_equal,
82   zero = equal,
83   not_zero = not_equal,
84   sign = negative,
85   not_sign = positive
86 };
87 
88 // Returns the equivalent of !cc.
89 // Negation of the default no_condition (-1) results in a non-default
90 // no_condition value (-2). As long as tests for no_condition check
91 // for condition < 0, this will work as expected.
NegateCondition(Condition cc)92 inline Condition NegateCondition(Condition cc) {
93   return static_cast<Condition>(cc ^ 1);
94 }
95 
96 enum RoundingMode {
97   kRoundToNearest = 0x0,
98   kRoundDown = 0x1,
99   kRoundUp = 0x2,
100   kRoundToZero = 0x3
101 };
102 
103 // -----------------------------------------------------------------------------
104 // Machine instruction Immediates
105 
106 class Immediate {
107  public:
108   // Calls where x is an Address (uintptr_t) resolve to this overload.
109   inline explicit Immediate(int x, RelocInfo::Mode rmode = RelocInfo::NO_INFO) {
110     value_.immediate = x;
111     rmode_ = rmode;
112   }
Immediate(const ExternalReference & ext)113   inline explicit Immediate(const ExternalReference& ext)
114       : Immediate(ext.address(), RelocInfo::EXTERNAL_REFERENCE) {}
Immediate(Handle<HeapObject> handle)115   inline explicit Immediate(Handle<HeapObject> handle)
116       : Immediate(handle.address(), RelocInfo::FULL_EMBEDDED_OBJECT) {}
Immediate(Smi value)117   inline explicit Immediate(Smi value)
118       : Immediate(static_cast<intptr_t>(value.ptr())) {}
119 
120   static Immediate EmbeddedNumber(double number);  // Smi or HeapNumber.
121   static Immediate EmbeddedStringConstant(const StringConstantBase* str);
122 
CodeRelativeOffset(Label * label)123   static Immediate CodeRelativeOffset(Label* label) { return Immediate(label); }
124 
is_heap_object_request()125   bool is_heap_object_request() const {
126     DCHECK_IMPLIES(is_heap_object_request_,
127                    rmode_ == RelocInfo::FULL_EMBEDDED_OBJECT ||
128                        rmode_ == RelocInfo::CODE_TARGET);
129     return is_heap_object_request_;
130   }
131 
heap_object_request()132   HeapObjectRequest heap_object_request() const {
133     DCHECK(is_heap_object_request());
134     return value_.heap_object_request;
135   }
136 
immediate()137   int immediate() const {
138     DCHECK(!is_heap_object_request());
139     return value_.immediate;
140   }
141 
is_embedded_object()142   bool is_embedded_object() const {
143     return !is_heap_object_request() &&
144            rmode() == RelocInfo::FULL_EMBEDDED_OBJECT;
145   }
146 
embedded_object()147   Handle<HeapObject> embedded_object() const {
148     return Handle<HeapObject>(reinterpret_cast<Address*>(immediate()));
149   }
150 
is_external_reference()151   bool is_external_reference() const {
152     return rmode() == RelocInfo::EXTERNAL_REFERENCE;
153   }
154 
external_reference()155   ExternalReference external_reference() const {
156     DCHECK(is_external_reference());
157     return bit_cast<ExternalReference>(immediate());
158   }
159 
is_zero()160   bool is_zero() const {
161     return RelocInfo::IsNoInfo(rmode_) && immediate() == 0;
162   }
is_int8()163   bool is_int8() const {
164     return RelocInfo::IsNoInfo(rmode_) && i::is_int8(immediate());
165   }
is_uint8()166   bool is_uint8() const {
167     return RelocInfo::IsNoInfo(rmode_) && i::is_uint8(immediate());
168   }
is_int16()169   bool is_int16() const {
170     return RelocInfo::IsNoInfo(rmode_) && i::is_int16(immediate());
171   }
172 
is_uint16()173   bool is_uint16() const {
174     return RelocInfo::IsNoInfo(rmode_) && i::is_uint16(immediate());
175   }
176 
rmode()177   RelocInfo::Mode rmode() const { return rmode_; }
178 
179  private:
Immediate(Label * value)180   inline explicit Immediate(Label* value) {
181     value_.immediate = reinterpret_cast<int32_t>(value);
182     rmode_ = RelocInfo::INTERNAL_REFERENCE;
183   }
184 
185   union Value {
Value()186     Value() {}
187     HeapObjectRequest heap_object_request;
188     int immediate;
189   } value_;
190   bool is_heap_object_request_ = false;
191   RelocInfo::Mode rmode_;
192 
193   friend class Operand;
194   friend class Assembler;
195   friend class MacroAssembler;
196 };
197 
198 // -----------------------------------------------------------------------------
199 // Machine instruction Operands
200 
201 enum ScaleFactor {
202   times_1 = 0,
203   times_2 = 1,
204   times_4 = 2,
205   times_8 = 3,
206   times_int_size = times_4,
207 
208   times_half_system_pointer_size = times_2,
209   times_system_pointer_size = times_4,
210 
211   times_tagged_size = times_4,
212 };
213 
214 class V8_EXPORT_PRIVATE Operand {
215  public:
216   // reg
Operand(Register reg)217   V8_INLINE explicit Operand(Register reg) { set_modrm(3, reg); }
218 
219   // XMM reg
Operand(XMMRegister xmm_reg)220   V8_INLINE explicit Operand(XMMRegister xmm_reg) {
221     Register reg = Register::from_code(xmm_reg.code());
222     set_modrm(3, reg);
223   }
224 
225   // [disp/r]
Operand(int32_t disp,RelocInfo::Mode rmode)226   V8_INLINE explicit Operand(int32_t disp, RelocInfo::Mode rmode) {
227     set_modrm(0, ebp);
228     set_dispr(disp, rmode);
229   }
230 
231   // [disp/r]
Operand(Immediate imm)232   V8_INLINE explicit Operand(Immediate imm) {
233     set_modrm(0, ebp);
234     set_dispr(imm.immediate(), imm.rmode_);
235   }
236 
237   // [base + disp/r]
238   explicit Operand(Register base, int32_t disp,
239                    RelocInfo::Mode rmode = RelocInfo::NO_INFO);
240 
241   // [rip + disp/r]
Operand(Label * label)242   explicit Operand(Label* label) {
243     set_modrm(0, ebp);
244     set_dispr(reinterpret_cast<intptr_t>(label), RelocInfo::INTERNAL_REFERENCE);
245   }
246 
247   // [base + index*scale + disp/r]
248   explicit Operand(Register base, Register index, ScaleFactor scale,
249                    int32_t disp, RelocInfo::Mode rmode = RelocInfo::NO_INFO);
250 
251   // [index*scale + disp/r]
252   explicit Operand(Register index, ScaleFactor scale, int32_t disp,
253                    RelocInfo::Mode rmode = RelocInfo::NO_INFO);
254 
JumpTable(Register index,ScaleFactor scale,Label * table)255   static Operand JumpTable(Register index, ScaleFactor scale, Label* table) {
256     return Operand(index, scale, reinterpret_cast<int32_t>(table),
257                    RelocInfo::INTERNAL_REFERENCE);
258   }
259 
ForRegisterPlusImmediate(Register base,Immediate imm)260   static Operand ForRegisterPlusImmediate(Register base, Immediate imm) {
261     return Operand(base, imm.value_.immediate, imm.rmode_);
262   }
263 
264   // Returns true if this Operand is a wrapper for the specified register.
is_reg(Register reg)265   bool is_reg(Register reg) const { return is_reg(reg.code()); }
is_reg(XMMRegister reg)266   bool is_reg(XMMRegister reg) const { return is_reg(reg.code()); }
267 
268   // Returns true if this Operand is a wrapper for one register.
269   bool is_reg_only() const;
270 
271   // Asserts that this Operand is a wrapper for one register and returns the
272   // register.
273   Register reg() const;
274 
encoded_bytes()275   base::Vector<const byte> encoded_bytes() const { return {buf_, len_}; }
rmode()276   RelocInfo::Mode rmode() { return rmode_; }
277 
278  private:
279   // Set the ModRM byte without an encoded 'reg' register. The
280   // register is encoded later as part of the emit_operand operation.
set_modrm(int mod,Register rm)281   inline void set_modrm(int mod, Register rm) {
282     DCHECK_EQ(mod & -4, 0);
283     buf_[0] = mod << 6 | rm.code();
284     len_ = 1;
285   }
286 
287   inline void set_sib(ScaleFactor scale, Register index, Register base);
288   inline void set_disp8(int8_t disp);
set_dispr(int32_t disp,RelocInfo::Mode rmode)289   inline void set_dispr(int32_t disp, RelocInfo::Mode rmode) {
290     DCHECK(len_ == 1 || len_ == 2);
291     Address p = reinterpret_cast<Address>(&buf_[len_]);
292     WriteUnalignedValue(p, disp);
293     len_ += sizeof(int32_t);
294     rmode_ = rmode;
295   }
296 
is_reg(int reg_code)297   inline bool is_reg(int reg_code) const {
298     return ((buf_[0] & 0xF8) == 0xC0)  // addressing mode is register only.
299            && ((buf_[0] & 0x07) == reg_code);  // register codes match.
300   }
301 
302   byte buf_[6];
303   // The number of bytes in buf_.
304   uint8_t len_ = 0;
305   // Only valid if len_ > 4.
306   RelocInfo::Mode rmode_ = RelocInfo::NO_INFO;
307 };
308 ASSERT_TRIVIALLY_COPYABLE(Operand);
309 static_assert(sizeof(Operand) <= 2 * kSystemPointerSize,
310               "Operand must be small enough to pass it by value");
311 
312 bool operator!=(Operand op, XMMRegister r);
313 
314 // -----------------------------------------------------------------------------
315 // A Displacement describes the 32bit immediate field of an instruction which
316 // may be used together with a Label in order to refer to a yet unknown code
317 // position. Displacements stored in the instruction stream are used to describe
318 // the instruction and to chain a list of instructions using the same Label.
319 // A Displacement contains 2 different fields:
320 //
321 // next field: position of next displacement in the chain (0 = end of list)
322 // type field: instruction type
323 //
324 // A next value of null (0) indicates the end of a chain (note that there can
325 // be no displacement at position zero, because there is always at least one
326 // instruction byte before the displacement).
327 //
328 // Displacement _data field layout
329 //
330 // |31.....2|1......0|
331 // [  next  |  type  |
332 
333 class Displacement {
334  public:
335   enum Type { UNCONDITIONAL_JUMP, CODE_RELATIVE, OTHER, CODE_ABSOLUTE };
336 
data()337   int data() const { return data_; }
type()338   Type type() const { return TypeField::decode(data_); }
next(Label * L)339   void next(Label* L) const {
340     int n = NextField::decode(data_);
341     n > 0 ? L->link_to(n) : L->Unuse();
342   }
link_to(Label * L)343   void link_to(Label* L) { init(L, type()); }
344 
Displacement(int data)345   explicit Displacement(int data) { data_ = data; }
346 
Displacement(Label * L,Type type)347   Displacement(Label* L, Type type) { init(L, type); }
348 
print()349   void print() {
350     PrintF("%s (%x) ", (type() == UNCONDITIONAL_JUMP ? "jmp" : "[other]"),
351            NextField::decode(data_));
352   }
353 
354  private:
355   int data_;
356 
357   using TypeField = base::BitField<Type, 0, 2>;
358   using NextField = base::BitField<int, 2, 32 - 2>;
359 
360   void init(Label* L, Type type);
361 };
362 
363 class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
364  private:
365   // We check before assembling an instruction that there is sufficient
366   // space to write an instruction and its relocation information.
367   // The relocation writer's position must be kGap bytes above the end of
368   // the generated instructions. This leaves enough space for the
369   // longest possible ia32 instruction, 15 bytes, and the longest possible
370   // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
371   // (There is a 15 byte limit on ia32 instruction length that rules out some
372   // otherwise valid instructions.)
373   // This allows for a single, fast space check per instruction.
374   static constexpr int kGap = 32;
375   STATIC_ASSERT(AssemblerBase::kMinimalBufferSize >= 2 * kGap);
376 
377  public:
378   // Create an assembler. Instructions and relocation information are emitted
379   // into a buffer, with the instructions starting from the beginning and the
380   // relocation information starting from the end of the buffer. See CodeDesc
381   // for a detailed comment on the layout (globals.h).
382   //
383   // If the provided buffer is nullptr, the assembler allocates and grows its
384   // own buffer. Otherwise it takes ownership of the provided buffer.
385   explicit Assembler(const AssemblerOptions&,
386                      std::unique_ptr<AssemblerBuffer> = {});
387 
388   // GetCode emits any pending (non-emitted) code and fills the descriptor desc.
389   static constexpr int kNoHandlerTable = 0;
390   static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr;
391   void GetCode(Isolate* isolate, CodeDesc* desc,
392                SafepointTableBuilder* safepoint_table_builder,
393                int handler_table_offset);
394 
395   // Convenience wrapper for code without safepoint or handler tables.
GetCode(Isolate * isolate,CodeDesc * desc)396   void GetCode(Isolate* isolate, CodeDesc* desc) {
397     GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable);
398   }
399 
400   void FinalizeJumpOptimizationInfo();
401 
402   // Unused on this architecture.
MaybeEmitOutOfLineConstantPool()403   void MaybeEmitOutOfLineConstantPool() {}
404 
405   // Read/Modify the code target in the branch/call instruction at pc.
406   // The isolate argument is unused (and may be nullptr) when skipping flushing.
407   inline static Address target_address_at(Address pc, Address constant_pool);
408   inline static void set_target_address_at(
409       Address pc, Address constant_pool, Address target,
410       ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
411 
412   // This sets the branch destination (which is in the instruction on x86).
413   // This is for calls and branches within generated code.
414   inline static void deserialization_set_special_target_at(
415       Address instruction_payload, Code code, Address target);
416 
417   // Get the size of the special target encoded at 'instruction_payload'.
418   inline static int deserialization_special_target_size(
419       Address instruction_payload);
420 
421   // This sets the internal reference at the pc.
422   inline static void deserialization_set_target_internal_reference_at(
423       Address pc, Address target,
424       RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
425 
426   static constexpr int kSpecialTargetSize = kSystemPointerSize;
427 
428   // One byte opcode for test al, 0xXX.
429   static constexpr byte kTestAlByte = 0xA8;
430   // One byte opcode for nop.
431   static constexpr byte kNopByte = 0x90;
432 
433   // One byte opcode for a short unconditional jump.
434   static constexpr byte kJmpShortOpcode = 0xEB;
435   // One byte prefix for a short conditional jump.
436   static constexpr byte kJccShortPrefix = 0x70;
437   static constexpr byte kJncShortOpcode = kJccShortPrefix | not_carry;
438   static constexpr byte kJcShortOpcode = kJccShortPrefix | carry;
439   static constexpr byte kJnzShortOpcode = kJccShortPrefix | not_zero;
440   static constexpr byte kJzShortOpcode = kJccShortPrefix | zero;
441 
442   // ---------------------------------------------------------------------------
443   // Code generation
444   //
445   // - function names correspond one-to-one to ia32 instruction mnemonics
446   // - unless specified otherwise, instructions operate on 32bit operands
447   // - instructions on 8bit (byte) operands/registers have a trailing '_b'
448   // - instructions on 16bit (word) operands/registers have a trailing '_w'
449   // - naming conflicts with C++ keywords are resolved via a trailing '_'
450 
451   // NOTE ON INTERFACE: Currently, the interface is not very consistent
452   // in the sense that some operations (e.g. mov()) can be called in more
453   // the one way to generate the same instruction: The Register argument
454   // can in some cases be replaced with an Operand(Register) argument.
455   // This should be cleaned up and made more orthogonal. The questions
456   // is: should we always use Operands instead of Registers where an
457   // Operand is possible, or should we have a Register (overloaded) form
458   // instead? We must be careful to make sure that the selected instruction
459   // is obvious from the parameters to avoid hard-to-find code generation
460   // bugs.
461 
462   // Insert the smallest number of nop instructions
463   // possible to align the pc offset to a multiple
464   // of m. m must be a power of 2.
465   void Align(int m);
466   // Insert the smallest number of zero bytes possible to align the pc offset
467   // to a mulitple of m. m must be a power of 2 (>= 2).
468   void DataAlign(int m);
469   void Nop(int bytes = 1);
470   // Aligns code to something that's optimal for a jump target for the platform.
471   void CodeTargetAlign();
LoopHeaderAlign()472   void LoopHeaderAlign() { CodeTargetAlign(); }
473 
474   // Stack
475   void pushad();
476   void popad();
477 
478   void pushfd();
479   void popfd();
480 
481   void push(const Immediate& x);
482   void push_imm32(int32_t imm32);
483   void push(Register src);
484   void push(Operand src);
485 
486   void pop(Register dst);
487   void pop(Operand dst);
488 
489   void leave();
490 
491   // Moves
mov_b(Register dst,Register src)492   void mov_b(Register dst, Register src) { mov_b(dst, Operand(src)); }
493   void mov_b(Register dst, Operand src);
mov_b(Register dst,int8_t imm8)494   void mov_b(Register dst, int8_t imm8) { mov_b(Operand(dst), imm8); }
mov_b(Operand dst,int8_t src)495   void mov_b(Operand dst, int8_t src) { mov_b(dst, Immediate(src)); }
496   void mov_b(Operand dst, const Immediate& src);
497   void mov_b(Operand dst, Register src);
498 
499   void mov_w(Register dst, Operand src);
mov_w(Operand dst,int16_t src)500   void mov_w(Operand dst, int16_t src) { mov_w(dst, Immediate(src)); }
501   void mov_w(Operand dst, const Immediate& src);
502   void mov_w(Operand dst, Register src);
503 
504   void mov(Register dst, int32_t imm32);
505   void mov(Register dst, const Immediate& x);
506   void mov(Register dst, Handle<HeapObject> handle);
507   void mov(Register dst, Operand src);
508   void mov(Register dst, Register src);
509   void mov(Operand dst, const Immediate& x);
510   void mov(Operand dst, Handle<HeapObject> handle);
511   void mov(Operand dst, Register src);
512   void mov(Operand dst, Address src, RelocInfo::Mode);
513 
movsx_b(Register dst,Register src)514   void movsx_b(Register dst, Register src) { movsx_b(dst, Operand(src)); }
515   void movsx_b(Register dst, Operand src);
516 
movsx_w(Register dst,Register src)517   void movsx_w(Register dst, Register src) { movsx_w(dst, Operand(src)); }
518   void movsx_w(Register dst, Operand src);
519 
movzx_b(Register dst,Register src)520   void movzx_b(Register dst, Register src) { movzx_b(dst, Operand(src)); }
521   void movzx_b(Register dst, Operand src);
522 
movzx_w(Register dst,Register src)523   void movzx_w(Register dst, Register src) { movzx_w(dst, Operand(src)); }
524   void movzx_w(Register dst, Operand src);
525 
526   void movq(XMMRegister dst, Operand src);
527   void movq(Operand dst, XMMRegister src);
528 
529   // Conditional moves
cmov(Condition cc,Register dst,Register src)530   void cmov(Condition cc, Register dst, Register src) {
531     cmov(cc, dst, Operand(src));
532   }
533   void cmov(Condition cc, Register dst, Operand src);
534 
535   // Flag management.
536   void cld();
537 
538   // Repetitive string instructions.
539   void rep_movs();
540   void rep_stos();
541   void stos();
542 
543   void xadd(Operand dst, Register src);
544   void xadd_b(Operand dst, Register src);
545   void xadd_w(Operand dst, Register src);
546 
547   // Exchange
548   void xchg(Register dst, Register src);
549   void xchg(Register dst, Operand src);
550   void xchg_b(Register reg, Operand op);
551   void xchg_w(Register reg, Operand op);
552 
553   // Lock prefix
554   void lock();
555 
556   // CompareExchange
557   void cmpxchg(Operand dst, Register src);
558   void cmpxchg_b(Operand dst, Register src);
559   void cmpxchg_w(Operand dst, Register src);
560   void cmpxchg8b(Operand dst);
561 
562   // Memory Fence
563   void mfence();
564   void lfence();
565 
566   void pause();
567 
568   // Arithmetics
569   void adc(Register dst, int32_t imm32);
adc(Register dst,Register src)570   void adc(Register dst, Register src) { adc(dst, Operand(src)); }
571   void adc(Register dst, Operand src);
572 
add(Register dst,Register src)573   void add(Register dst, Register src) { add(dst, Operand(src)); }
574   void add(Register dst, Operand src);
575   void add(Operand dst, Register src);
add(Register dst,const Immediate & imm)576   void add(Register dst, const Immediate& imm) { add(Operand(dst), imm); }
577   void add(Operand dst, const Immediate& x);
578 
579   void and_(Register dst, int32_t imm32);
580   void and_(Register dst, const Immediate& x);
and_(Register dst,Register src)581   void and_(Register dst, Register src) { and_(dst, Operand(src)); }
582   void and_(Register dst, Operand src);
583   void and_(Operand dst, Register src);
584   void and_(Operand dst, const Immediate& x);
585 
cmpb(Register reg,Immediate imm8)586   void cmpb(Register reg, Immediate imm8) {
587     DCHECK(reg.is_byte_register());
588     cmpb(Operand(reg), imm8);
589   }
590   void cmpb(Operand op, Immediate imm8);
591   void cmpb(Register reg, Operand op);
592   void cmpb(Operand op, Register reg);
cmpb(Register dst,Register src)593   void cmpb(Register dst, Register src) { cmpb(Operand(dst), src); }
594   void cmpb_al(Operand op);
595   void cmpw_ax(Operand op);
596   void cmpw(Operand dst, Immediate src);
cmpw(Register dst,Immediate src)597   void cmpw(Register dst, Immediate src) { cmpw(Operand(dst), src); }
598   void cmpw(Register dst, Operand src);
cmpw(Register dst,Register src)599   void cmpw(Register dst, Register src) { cmpw(Operand(dst), src); }
600   void cmpw(Operand dst, Register src);
601   void cmp(Register reg, int32_t imm32);
602   void cmp(Register reg, Handle<HeapObject> handle);
cmp(Register reg0,Register reg1)603   void cmp(Register reg0, Register reg1) { cmp(reg0, Operand(reg1)); }
604   void cmp(Register reg, Operand op);
cmp(Register reg,const Immediate & imm)605   void cmp(Register reg, const Immediate& imm) { cmp(Operand(reg), imm); }
606   void cmp(Operand op, Register reg);
607   void cmp(Operand op, const Immediate& imm);
608   void cmp(Operand op, Handle<HeapObject> handle);
609 
610   void dec_b(Register dst);
611   void dec_b(Operand dst);
612 
613   void dec(Register dst);
614   void dec(Operand dst);
615 
616   void cdq();
617 
idiv(Register src)618   void idiv(Register src) { idiv(Operand(src)); }
619   void idiv(Operand src);
div(Register src)620   void div(Register src) { div(Operand(src)); }
621   void div(Operand src);
622 
623   // Signed multiply instructions.
624   void imul(Register src);  // edx:eax = eax * src.
imul(Register dst,Register src)625   void imul(Register dst, Register src) { imul(dst, Operand(src)); }
626   void imul(Register dst, Operand src);                  // dst = dst * src.
627   void imul(Register dst, Register src, int32_t imm32);  // dst = src * imm32.
628   void imul(Register dst, Operand src, int32_t imm32);
629 
630   void inc(Register dst);
631   void inc(Operand dst);
632 
633   void lea(Register dst, Operand src);
634 
635   // Unsigned multiply instruction.
636   void mul(Register src);  // edx:eax = eax * reg.
637 
638   void neg(Register dst);
639   void neg(Operand dst);
640 
641   void not_(Register dst);
642   void not_(Operand dst);
643 
644   void or_(Register dst, int32_t imm32);
or_(Register dst,Register src)645   void or_(Register dst, Register src) { or_(dst, Operand(src)); }
646   void or_(Register dst, Operand src);
647   void or_(Operand dst, Register src);
or_(Register dst,const Immediate & imm)648   void or_(Register dst, const Immediate& imm) { or_(Operand(dst), imm); }
649   void or_(Operand dst, const Immediate& x);
650 
651   void rcl(Register dst, uint8_t imm8);
652   void rcr(Register dst, uint8_t imm8);
653 
rol(Register dst,uint8_t imm8)654   void rol(Register dst, uint8_t imm8) { rol(Operand(dst), imm8); }
655   void rol(Operand dst, uint8_t imm8);
rol_cl(Register dst)656   void rol_cl(Register dst) { rol_cl(Operand(dst)); }
657   void rol_cl(Operand dst);
658 
ror(Register dst,uint8_t imm8)659   void ror(Register dst, uint8_t imm8) { ror(Operand(dst), imm8); }
660   void ror(Operand dst, uint8_t imm8);
ror_cl(Register dst)661   void ror_cl(Register dst) { ror_cl(Operand(dst)); }
662   void ror_cl(Operand dst);
663 
sar(Register dst,uint8_t imm8)664   void sar(Register dst, uint8_t imm8) { sar(Operand(dst), imm8); }
665   void sar(Operand dst, uint8_t imm8);
sar_cl(Register dst)666   void sar_cl(Register dst) { sar_cl(Operand(dst)); }
667   void sar_cl(Operand dst);
668 
sbb(Register dst,Register src)669   void sbb(Register dst, Register src) { sbb(dst, Operand(src)); }
670   void sbb(Register dst, Operand src);
671 
shl(Register dst,uint8_t imm8)672   void shl(Register dst, uint8_t imm8) { shl(Operand(dst), imm8); }
673   void shl(Operand dst, uint8_t imm8);
shl_cl(Register dst)674   void shl_cl(Register dst) { shl_cl(Operand(dst)); }
675   void shl_cl(Operand dst);
676   void shld(Register dst, Register src, uint8_t shift);
677   void shld_cl(Register dst, Register src);
678 
shr(Register dst,uint8_t imm8)679   void shr(Register dst, uint8_t imm8) { shr(Operand(dst), imm8); }
680   void shr(Operand dst, uint8_t imm8);
shr_cl(Register dst)681   void shr_cl(Register dst) { shr_cl(Operand(dst)); }
682   void shr_cl(Operand dst);
683   void shrd(Register dst, Register src, uint8_t shift);
shrd_cl(Register dst,Register src)684   void shrd_cl(Register dst, Register src) { shrd_cl(Operand(dst), src); }
685   void shrd_cl(Operand dst, Register src);
686 
sub(Register dst,const Immediate & imm)687   void sub(Register dst, const Immediate& imm) { sub(Operand(dst), imm); }
688   void sub(Operand dst, const Immediate& x);
sub(Register dst,Register src)689   void sub(Register dst, Register src) { sub(dst, Operand(src)); }
690   void sub(Register dst, Operand src);
691   void sub(Operand dst, Register src);
692   void sub_sp_32(uint32_t imm);
693 
694   void test(Register reg, const Immediate& imm);
test(Register reg0,Register reg1)695   void test(Register reg0, Register reg1) { test(reg0, Operand(reg1)); }
696   void test(Register reg, Operand op);
697   void test(Operand op, const Immediate& imm);
test(Operand op,Register reg)698   void test(Operand op, Register reg) { test(reg, op); }
699   void test_b(Register reg, Operand op);
700   void test_b(Register reg, Immediate imm8);
701   void test_b(Operand op, Immediate imm8);
test_b(Operand op,Register reg)702   void test_b(Operand op, Register reg) { test_b(reg, op); }
test_b(Register dst,Register src)703   void test_b(Register dst, Register src) { test_b(dst, Operand(src)); }
704   void test_w(Register reg, Operand op);
705   void test_w(Register reg, Immediate imm16);
706   void test_w(Operand op, Immediate imm16);
test_w(Operand op,Register reg)707   void test_w(Operand op, Register reg) { test_w(reg, op); }
test_w(Register dst,Register src)708   void test_w(Register dst, Register src) { test_w(dst, Operand(src)); }
709 
710   void xor_(Register dst, int32_t imm32);
xor_(Register dst,Register src)711   void xor_(Register dst, Register src) { xor_(dst, Operand(src)); }
712   void xor_(Register dst, Operand src);
713   void xor_(Operand dst, Register src);
xor_(Register dst,const Immediate & imm)714   void xor_(Register dst, const Immediate& imm) { xor_(Operand(dst), imm); }
715   void xor_(Operand dst, const Immediate& x);
716 
717   // Bit operations.
718   void bswap(Register dst);
719   void bt(Operand dst, Register src);
bts(Register dst,Register src)720   void bts(Register dst, Register src) { bts(Operand(dst), src); }
721   void bts(Operand dst, Register src);
bsr(Register dst,Register src)722   void bsr(Register dst, Register src) { bsr(dst, Operand(src)); }
723   void bsr(Register dst, Operand src);
bsf(Register dst,Register src)724   void bsf(Register dst, Register src) { bsf(dst, Operand(src)); }
725   void bsf(Register dst, Operand src);
726 
727   // Miscellaneous
728   void hlt();
729   void int3();
730   void nop();
731   void ret(int imm16);
732   void ud2();
733 
734   // Label operations & relative jumps (PPUM Appendix D)
735   //
736   // Takes a branch opcode (cc) and a label (L) and generates
737   // either a backward branch or a forward branch and links it
738   // to the label fixup chain. Usage:
739   //
740   // Label L;    // unbound label
741   // j(cc, &L);  // forward branch to unbound label
742   // bind(&L);   // bind label to the current pc
743   // j(cc, &L);  // backward branch to bound label
744   // bind(&L);   // illegal: a label may be bound only once
745   //
746   // Note: The same Label can be used for forward and backward branches
747   // but it may be bound only once.
748 
749   void bind(Label* L);  // binds an unbound label L to the current code position
750 
751   // Calls
752   void call(Label* L);
753   void call(Address entry, RelocInfo::Mode rmode);
call(Register reg)754   void call(Register reg) { call(Operand(reg)); }
755   void call(Operand adr);
756   void call(Handle<Code> code, RelocInfo::Mode rmode);
757   void wasm_call(Address address, RelocInfo::Mode rmode);
758 
759   // Jumps
760   // unconditional jump to L
761   void jmp(Label* L, Label::Distance distance = Label::kFar);
762   void jmp(Address entry, RelocInfo::Mode rmode);
jmp(Register reg)763   void jmp(Register reg) { jmp(Operand(reg)); }
764   void jmp(Operand adr);
765   void jmp(Handle<Code> code, RelocInfo::Mode rmode);
766   // Unconditional jump relative to the current address. Low-level routine,
767   // use with caution!
768   void jmp_rel(int offset);
769 
770   // Conditional jumps
771   void j(Condition cc, Label* L, Label::Distance distance = Label::kFar);
772   void j(Condition cc, byte* entry, RelocInfo::Mode rmode);
773   void j(Condition cc, Handle<Code> code,
774          RelocInfo::Mode rmode = RelocInfo::CODE_TARGET);
775 
776   // Floating-point operations
777   void fld(int i);
778   void fstp(int i);
779 
780   void fld1();
781   void fldz();
782   void fldpi();
783   void fldln2();
784 
785   void fld_s(Operand adr);
786   void fld_d(Operand adr);
787 
788   void fstp_s(Operand adr);
789   void fst_s(Operand adr);
790   void fstp_d(Operand adr);
791   void fst_d(Operand adr);
792 
793   void fild_s(Operand adr);
794   void fild_d(Operand adr);
795 
796   void fist_s(Operand adr);
797 
798   void fistp_s(Operand adr);
799   void fistp_d(Operand adr);
800 
801   // The fisttp instructions require SSE3.
802   void fisttp_s(Operand adr);
803   void fisttp_d(Operand adr);
804 
805   void fabs();
806   void fchs();
807   void fcos();
808   void fsin();
809   void fptan();
810   void fyl2x();
811   void f2xm1();
812   void fscale();
813   void fninit();
814 
815   void fadd(int i);
816   void fadd_i(int i);
817   void fsub(int i);
818   void fsub_i(int i);
819   void fmul(int i);
820   void fmul_i(int i);
821   void fdiv(int i);
822   void fdiv_i(int i);
823 
824   void fisub_s(Operand adr);
825 
826   void faddp(int i = 1);
827   void fsubp(int i = 1);
828   void fsubrp(int i = 1);
829   void fmulp(int i = 1);
830   void fdivp(int i = 1);
831   void fprem();
832   void fprem1();
833 
834   void fxch(int i = 1);
835   void fincstp();
836   void ffree(int i = 0);
837 
838   void ftst();
839   void fucomp(int i);
840   void fucompp();
841   void fucomi(int i);
842   void fucomip();
843   void fcompp();
844   void fnstsw_ax();
845   void fwait();
846   void fnclex();
847 
848   void frndint();
849 
850   void sahf();
851   void setcc(Condition cc, Register reg);
852 
853   void cpuid();
854 
855   // SSE instructions
addss(XMMRegister dst,XMMRegister src)856   void addss(XMMRegister dst, XMMRegister src) { addss(dst, Operand(src)); }
857   void addss(XMMRegister dst, Operand src);
subss(XMMRegister dst,XMMRegister src)858   void subss(XMMRegister dst, XMMRegister src) { subss(dst, Operand(src)); }
859   void subss(XMMRegister dst, Operand src);
mulss(XMMRegister dst,XMMRegister src)860   void mulss(XMMRegister dst, XMMRegister src) { mulss(dst, Operand(src)); }
861   void mulss(XMMRegister dst, Operand src);
divss(XMMRegister dst,XMMRegister src)862   void divss(XMMRegister dst, XMMRegister src) { divss(dst, Operand(src)); }
863   void divss(XMMRegister dst, Operand src);
sqrtss(XMMRegister dst,XMMRegister src)864   void sqrtss(XMMRegister dst, XMMRegister src) { sqrtss(dst, Operand(src)); }
865   void sqrtss(XMMRegister dst, Operand src);
866 
ucomiss(XMMRegister dst,XMMRegister src)867   void ucomiss(XMMRegister dst, XMMRegister src) { ucomiss(dst, Operand(src)); }
868   void ucomiss(XMMRegister dst, Operand src);
movaps(XMMRegister dst,XMMRegister src)869   void movaps(XMMRegister dst, XMMRegister src) { movaps(dst, Operand(src)); }
870   void movaps(XMMRegister dst, Operand src);
movups(XMMRegister dst,XMMRegister src)871   void movups(XMMRegister dst, XMMRegister src) { movups(dst, Operand(src)); }
872   void movups(XMMRegister dst, Operand src);
873   void movups(Operand dst, XMMRegister src);
874   void shufps(XMMRegister dst, XMMRegister src, byte imm8);
875   void shufpd(XMMRegister dst, XMMRegister src, byte imm8);
876 
877   void movhlps(XMMRegister dst, XMMRegister src);
878   void movlhps(XMMRegister dst, XMMRegister src);
879   void movlps(XMMRegister dst, Operand src);
880   void movlps(Operand dst, XMMRegister src);
881   void movhps(XMMRegister dst, Operand src);
882   void movhps(Operand dst, XMMRegister src);
883 
maxss(XMMRegister dst,XMMRegister src)884   void maxss(XMMRegister dst, XMMRegister src) { maxss(dst, Operand(src)); }
885   void maxss(XMMRegister dst, Operand src);
minss(XMMRegister dst,XMMRegister src)886   void minss(XMMRegister dst, XMMRegister src) { minss(dst, Operand(src)); }
887   void minss(XMMRegister dst, Operand src);
888 
889   void haddps(XMMRegister dst, Operand src);
haddps(XMMRegister dst,XMMRegister src)890   void haddps(XMMRegister dst, XMMRegister src) { haddps(dst, Operand(src)); }
sqrtpd(XMMRegister dst,Operand src)891   void sqrtpd(XMMRegister dst, Operand src) {
892     sse2_instr(dst, src, 0x66, 0x0F, 0x51);
893   }
sqrtpd(XMMRegister dst,XMMRegister src)894   void sqrtpd(XMMRegister dst, XMMRegister src) { sqrtpd(dst, Operand(src)); }
895 
896   void cmpps(XMMRegister dst, Operand src, uint8_t cmp);
cmpps(XMMRegister dst,XMMRegister src,uint8_t cmp)897   void cmpps(XMMRegister dst, XMMRegister src, uint8_t cmp) {
898     cmpps(dst, Operand(src), cmp);
899   }
900   void cmppd(XMMRegister dst, Operand src, uint8_t cmp);
cmppd(XMMRegister dst,XMMRegister src,uint8_t cmp)901   void cmppd(XMMRegister dst, XMMRegister src, uint8_t cmp) {
902     cmppd(dst, Operand(src), cmp);
903   }
904 
905 // Packed floating-point comparison operations.
906 #define PACKED_CMP_LIST(V) \
907   V(cmpeq, 0x0)            \
908   V(cmplt, 0x1)            \
909   V(cmple, 0x2)            \
910   V(cmpunord, 0x3)         \
911   V(cmpneq, 0x4)
912 
913 #define SSE_CMP_P(instr, imm8)                                            \
914   void instr##ps(XMMRegister dst, XMMRegister src) {                      \
915     cmpps(dst, Operand(src), imm8);                                       \
916   }                                                                       \
917   void instr##ps(XMMRegister dst, Operand src) { cmpps(dst, src, imm8); } \
918   void instr##pd(XMMRegister dst, XMMRegister src) {                      \
919     cmppd(dst, Operand(src), imm8);                                       \
920   }                                                                       \
921   void instr##pd(XMMRegister dst, Operand src) { cmppd(dst, src, imm8); }
922 
923   PACKED_CMP_LIST(SSE_CMP_P)
924 #undef SSE_CMP_P
925 
926   // SSE2 instructions
927   void cvttss2si(Register dst, Operand src);
cvttss2si(Register dst,XMMRegister src)928   void cvttss2si(Register dst, XMMRegister src) {
929     cvttss2si(dst, Operand(src));
930   }
931   void cvttsd2si(Register dst, Operand src);
cvttsd2si(Register dst,XMMRegister src)932   void cvttsd2si(Register dst, XMMRegister src) {
933     cvttsd2si(dst, Operand(src));
934   }
935   void cvtsd2si(Register dst, XMMRegister src);
936 
cvtsi2ss(XMMRegister dst,Register src)937   void cvtsi2ss(XMMRegister dst, Register src) { cvtsi2ss(dst, Operand(src)); }
938   void cvtsi2ss(XMMRegister dst, Operand src);
cvtsi2sd(XMMRegister dst,Register src)939   void cvtsi2sd(XMMRegister dst, Register src) { cvtsi2sd(dst, Operand(src)); }
940   void cvtsi2sd(XMMRegister dst, Operand src);
941   void cvtss2sd(XMMRegister dst, Operand src);
cvtss2sd(XMMRegister dst,XMMRegister src)942   void cvtss2sd(XMMRegister dst, XMMRegister src) {
943     cvtss2sd(dst, Operand(src));
944   }
945   void cvtdq2pd(XMMRegister dst, XMMRegister src);
946   void cvtpd2ps(XMMRegister dst, XMMRegister src);
cvttps2dq(XMMRegister dst,XMMRegister src)947   void cvttps2dq(XMMRegister dst, XMMRegister src) {
948     cvttps2dq(dst, Operand(src));
949   }
950   void cvttps2dq(XMMRegister dst, Operand src);
951   void cvttpd2dq(XMMRegister dst, XMMRegister src);
952 
ucomisd(XMMRegister dst,XMMRegister src)953   void ucomisd(XMMRegister dst, XMMRegister src) { ucomisd(dst, Operand(src)); }
954   void ucomisd(XMMRegister dst, Operand src);
955 
956   void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
957   void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
958 
movapd(XMMRegister dst,XMMRegister src)959   void movapd(XMMRegister dst, XMMRegister src) { movapd(dst, Operand(src)); }
movapd(XMMRegister dst,Operand src)960   void movapd(XMMRegister dst, Operand src) {
961     sse2_instr(dst, src, 0x66, 0x0F, 0x28);
962   }
movupd(XMMRegister dst,Operand src)963   void movupd(XMMRegister dst, Operand src) {
964     sse2_instr(dst, src, 0x66, 0x0F, 0x10);
965   }
966 
967   void movmskpd(Register dst, XMMRegister src);
968   void movmskps(Register dst, XMMRegister src);
969 
970   void pmovmskb(Register dst, XMMRegister src);
971 
972   void cmpltsd(XMMRegister dst, XMMRegister src);
973 
974   void movdqa(XMMRegister dst, Operand src);
975   void movdqa(Operand dst, XMMRegister src);
976   void movdqa(XMMRegister dst, XMMRegister src);
977   void movdqu(XMMRegister dst, Operand src);
978   void movdqu(Operand dst, XMMRegister src);
979   void movdqu(XMMRegister dst, XMMRegister src);
movdq(bool aligned,XMMRegister dst,Operand src)980   void movdq(bool aligned, XMMRegister dst, Operand src) {
981     if (aligned) {
982       movdqa(dst, src);
983     } else {
984       movdqu(dst, src);
985     }
986   }
987 
movd(XMMRegister dst,Register src)988   void movd(XMMRegister dst, Register src) { movd(dst, Operand(src)); }
989   void movd(XMMRegister dst, Operand src);
movd(Register dst,XMMRegister src)990   void movd(Register dst, XMMRegister src) { movd(Operand(dst), src); }
991   void movd(Operand dst, XMMRegister src);
movsd(XMMRegister dst,XMMRegister src)992   void movsd(XMMRegister dst, XMMRegister src) { movsd(dst, Operand(src)); }
993   void movsd(XMMRegister dst, Operand src);
994   void movsd(Operand dst, XMMRegister src);
995 
996   void movss(XMMRegister dst, Operand src);
997   void movss(Operand dst, XMMRegister src);
movss(XMMRegister dst,XMMRegister src)998   void movss(XMMRegister dst, XMMRegister src) { movss(dst, Operand(src)); }
999 
1000   void extractps(Operand dst, XMMRegister src, byte imm8);
1001   void extractps(Register dst, XMMRegister src, byte imm8);
1002 
1003   void pcmpgtq(XMMRegister dst, XMMRegister src);
1004 
1005   void psllw(XMMRegister reg, uint8_t shift);
1006   void pslld(XMMRegister reg, uint8_t shift);
1007   void psrlw(XMMRegister reg, uint8_t shift);
1008   void psrld(XMMRegister reg, uint8_t shift);
1009   void psraw(XMMRegister reg, uint8_t shift);
1010   void psrad(XMMRegister reg, uint8_t shift);
1011   void psllq(XMMRegister reg, uint8_t shift);
1012   void psrlq(XMMRegister reg, uint8_t shift);
1013 
pshufhw(XMMRegister dst,XMMRegister src,uint8_t shuffle)1014   void pshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1015     pshufhw(dst, Operand(src), shuffle);
1016   }
1017   void pshufhw(XMMRegister dst, Operand src, uint8_t shuffle);
pshuflw(XMMRegister dst,XMMRegister src,uint8_t shuffle)1018   void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1019     pshuflw(dst, Operand(src), shuffle);
1020   }
1021   void pshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
pshufd(XMMRegister dst,XMMRegister src,uint8_t shuffle)1022   void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1023     pshufd(dst, Operand(src), shuffle);
1024   }
1025   void pshufd(XMMRegister dst, Operand src, uint8_t shuffle);
1026 
pblendw(XMMRegister dst,XMMRegister src,uint8_t mask)1027   void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask) {
1028     pblendw(dst, Operand(src), mask);
1029   }
1030   void pblendw(XMMRegister dst, Operand src, uint8_t mask);
1031 
palignr(XMMRegister dst,XMMRegister src,uint8_t mask)1032   void palignr(XMMRegister dst, XMMRegister src, uint8_t mask) {
1033     palignr(dst, Operand(src), mask);
1034   }
1035   void palignr(XMMRegister dst, Operand src, uint8_t mask);
1036 
pextrb(Register dst,XMMRegister src,uint8_t offset)1037   void pextrb(Register dst, XMMRegister src, uint8_t offset) {
1038     pextrb(Operand(dst), src, offset);
1039   }
1040   void pextrb(Operand dst, XMMRegister src, uint8_t offset);
1041   // SSE3 instructions
1042   void movddup(XMMRegister dst, Operand src);
movddup(XMMRegister dst,XMMRegister src)1043   void movddup(XMMRegister dst, XMMRegister src) { movddup(dst, Operand(src)); }
1044   void movshdup(XMMRegister dst, XMMRegister src);
1045 
1046   // Use SSE4_1 encoding for pextrw reg, xmm, imm8 for consistency
pextrw(Register dst,XMMRegister src,uint8_t offset)1047   void pextrw(Register dst, XMMRegister src, uint8_t offset) {
1048     pextrw(Operand(dst), src, offset);
1049   }
1050   void pextrw(Operand dst, XMMRegister src, uint8_t offset);
pextrd(Register dst,XMMRegister src,uint8_t offset)1051   void pextrd(Register dst, XMMRegister src, uint8_t offset) {
1052     pextrd(Operand(dst), src, offset);
1053   }
1054   void pextrd(Operand dst, XMMRegister src, uint8_t offset);
1055 
insertps(XMMRegister dst,XMMRegister src,uint8_t offset)1056   void insertps(XMMRegister dst, XMMRegister src, uint8_t offset) {
1057     insertps(dst, Operand(src), offset);
1058   }
1059   void insertps(XMMRegister dst, Operand src, uint8_t offset);
pinsrb(XMMRegister dst,Register src,uint8_t offset)1060   void pinsrb(XMMRegister dst, Register src, uint8_t offset) {
1061     pinsrb(dst, Operand(src), offset);
1062   }
1063   void pinsrb(XMMRegister dst, Operand src, uint8_t offset);
pinsrw(XMMRegister dst,Register src,uint8_t offset)1064   void pinsrw(XMMRegister dst, Register src, uint8_t offset) {
1065     pinsrw(dst, Operand(src), offset);
1066   }
1067   void pinsrw(XMMRegister dst, Operand src, uint8_t offset);
pinsrd(XMMRegister dst,Register src,uint8_t offset)1068   void pinsrd(XMMRegister dst, Register src, uint8_t offset) {
1069     pinsrd(dst, Operand(src), offset);
1070   }
1071   void pinsrd(XMMRegister dst, Operand src, uint8_t offset);
1072 
1073   void roundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
1074   void roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1075 
1076   // AVX instructions
vaddss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1077   void vaddss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1078     vaddss(dst, src1, Operand(src2));
1079   }
vaddss(XMMRegister dst,XMMRegister src1,Operand src2)1080   void vaddss(XMMRegister dst, XMMRegister src1, Operand src2) {
1081     vss(0x58, dst, src1, src2);
1082   }
vsubss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1083   void vsubss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1084     vsubss(dst, src1, Operand(src2));
1085   }
vsubss(XMMRegister dst,XMMRegister src1,Operand src2)1086   void vsubss(XMMRegister dst, XMMRegister src1, Operand src2) {
1087     vss(0x5c, dst, src1, src2);
1088   }
vmulss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1089   void vmulss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1090     vmulss(dst, src1, Operand(src2));
1091   }
vmulss(XMMRegister dst,XMMRegister src1,Operand src2)1092   void vmulss(XMMRegister dst, XMMRegister src1, Operand src2) {
1093     vss(0x59, dst, src1, src2);
1094   }
vdivss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1095   void vdivss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1096     vdivss(dst, src1, Operand(src2));
1097   }
vdivss(XMMRegister dst,XMMRegister src1,Operand src2)1098   void vdivss(XMMRegister dst, XMMRegister src1, Operand src2) {
1099     vss(0x5e, dst, src1, src2);
1100   }
vmaxss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1101   void vmaxss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1102     vmaxss(dst, src1, Operand(src2));
1103   }
vmaxss(XMMRegister dst,XMMRegister src1,Operand src2)1104   void vmaxss(XMMRegister dst, XMMRegister src1, Operand src2) {
1105     vss(0x5f, dst, src1, src2);
1106   }
vminss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1107   void vminss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1108     vminss(dst, src1, Operand(src2));
1109   }
vminss(XMMRegister dst,XMMRegister src1,Operand src2)1110   void vminss(XMMRegister dst, XMMRegister src1, Operand src2) {
1111     vss(0x5d, dst, src1, src2);
1112   }
vsqrtss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1113   void vsqrtss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1114     vsqrtss(dst, src1, Operand(src2));
1115   }
vsqrtss(XMMRegister dst,XMMRegister src1,Operand src2)1116   void vsqrtss(XMMRegister dst, XMMRegister src1, Operand src2) {
1117     vss(0x51, dst, src1, src2);
1118   }
1119   void vss(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1120 
vhaddps(XMMRegister dst,XMMRegister src1,XMMRegister src2)1121   void vhaddps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1122     vhaddps(dst, src1, Operand(src2));
1123   }
vhaddps(XMMRegister dst,XMMRegister src1,Operand src2)1124   void vhaddps(XMMRegister dst, XMMRegister src1, Operand src2) {
1125     vinstr(0x7C, dst, src1, src2, kF2, k0F, kWIG);
1126   }
vsqrtpd(XMMRegister dst,XMMRegister src)1127   void vsqrtpd(XMMRegister dst, XMMRegister src) { vsqrtpd(dst, Operand(src)); }
vsqrtpd(XMMRegister dst,Operand src)1128   void vsqrtpd(XMMRegister dst, Operand src) {
1129     vinstr(0x51, dst, xmm0, src, k66, k0F, kWIG);
1130   }
vmovss(Operand dst,XMMRegister src)1131   void vmovss(Operand dst, XMMRegister src) {
1132     vinstr(0x11, src, xmm0, dst, kF3, k0F, kWIG);
1133   }
vmovss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1134   void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1135     vinstr(0x10, dst, src1, src2, kF3, k0F, kWIG);
1136   }
vmovss(XMMRegister dst,Operand src)1137   void vmovss(XMMRegister dst, Operand src) {
1138     vinstr(0x10, dst, xmm0, src, kF3, k0F, kWIG);
1139   }
vmovsd(Operand dst,XMMRegister src)1140   void vmovsd(Operand dst, XMMRegister src) {
1141     vinstr(0x11, src, xmm0, dst, kF2, k0F, kWIG);
1142   }
vmovsd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1143   void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1144     vinstr(0x10, dst, src1, src2, kF2, k0F, kWIG);
1145   }
vmovsd(XMMRegister dst,Operand src)1146   void vmovsd(XMMRegister dst, Operand src) {
1147     vinstr(0x10, dst, xmm0, src, kF2, k0F, kWIG);
1148   }
1149 
1150   void vextractps(Operand dst, XMMRegister src, byte imm8);
1151 
1152   void vpcmpgtq(XMMRegister dst, XMMRegister src1, XMMRegister src2);
1153 
vmovaps(XMMRegister dst,XMMRegister src)1154   void vmovaps(XMMRegister dst, XMMRegister src) { vmovaps(dst, Operand(src)); }
vmovaps(XMMRegister dst,Operand src)1155   void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); }
vmovapd(XMMRegister dst,XMMRegister src)1156   void vmovapd(XMMRegister dst, XMMRegister src) { vmovapd(dst, Operand(src)); }
vmovapd(XMMRegister dst,Operand src)1157   void vmovapd(XMMRegister dst, Operand src) { vpd(0x28, dst, xmm0, src); }
vmovups(Operand dst,XMMRegister src)1158   void vmovups(Operand dst, XMMRegister src) { vps(0x11, src, xmm0, dst); }
vmovups(XMMRegister dst,XMMRegister src)1159   void vmovups(XMMRegister dst, XMMRegister src) { vmovups(dst, Operand(src)); }
vmovups(XMMRegister dst,Operand src)1160   void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); }
vmovupd(XMMRegister dst,Operand src)1161   void vmovupd(XMMRegister dst, Operand src) { vpd(0x10, dst, xmm0, src); }
vshufps(XMMRegister dst,XMMRegister src1,XMMRegister src2,byte imm8)1162   void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
1163     vshufps(dst, src1, Operand(src2), imm8);
1164   }
1165   void vshufps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
vshufpd(XMMRegister dst,XMMRegister src1,XMMRegister src2,byte imm8)1166   void vshufpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
1167     vshufpd(dst, src1, Operand(src2), imm8);
1168   }
1169   void vshufpd(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
1170 
1171   void vmovhlps(XMMRegister dst, XMMRegister src1, XMMRegister src2);
1172   void vmovlhps(XMMRegister dst, XMMRegister src1, XMMRegister src2);
1173   void vmovlps(XMMRegister dst, XMMRegister src1, Operand src2);
1174   void vmovlps(Operand dst, XMMRegister src);
1175   void vmovhps(XMMRegister dst, XMMRegister src1, Operand src2);
1176   void vmovhps(Operand dst, XMMRegister src);
1177 
1178   void vpsllw(XMMRegister dst, XMMRegister src, uint8_t imm8);
1179   void vpslld(XMMRegister dst, XMMRegister src, uint8_t imm8);
1180   void vpsllq(XMMRegister dst, XMMRegister src, uint8_t imm8);
1181   void vpsrlw(XMMRegister dst, XMMRegister src, uint8_t imm8);
1182   void vpsrld(XMMRegister dst, XMMRegister src, uint8_t imm8);
1183   void vpsraw(XMMRegister dst, XMMRegister src, uint8_t imm8);
1184   void vpsrad(XMMRegister dst, XMMRegister src, uint8_t imm8);
1185   void vpsrlq(XMMRegister dst, XMMRegister src, uint8_t imm8);
1186 
vpshufhw(XMMRegister dst,XMMRegister src,uint8_t shuffle)1187   void vpshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1188     vpshufhw(dst, Operand(src), shuffle);
1189   }
1190   void vpshufhw(XMMRegister dst, Operand src, uint8_t shuffle);
vpshuflw(XMMRegister dst,XMMRegister src,uint8_t shuffle)1191   void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1192     vpshuflw(dst, Operand(src), shuffle);
1193   }
1194   void vpshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
vpshufd(XMMRegister dst,XMMRegister src,uint8_t shuffle)1195   void vpshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1196     vpshufd(dst, Operand(src), shuffle);
1197   }
1198   void vpshufd(XMMRegister dst, Operand src, uint8_t shuffle);
1199 
1200   void vblendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1201                  XMMRegister mask);
1202   void vblendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1203                  XMMRegister mask);
1204   void vpblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1205                  XMMRegister mask);
1206 
vpblendw(XMMRegister dst,XMMRegister src1,XMMRegister src2,uint8_t mask)1207   void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1208                 uint8_t mask) {
1209     vpblendw(dst, src1, Operand(src2), mask);
1210   }
1211   void vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask);
1212 
vpalignr(XMMRegister dst,XMMRegister src1,XMMRegister src2,uint8_t mask)1213   void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1214                 uint8_t mask) {
1215     vpalignr(dst, src1, Operand(src2), mask);
1216   }
1217   void vpalignr(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask);
1218 
vpextrb(Register dst,XMMRegister src,uint8_t offset)1219   void vpextrb(Register dst, XMMRegister src, uint8_t offset) {
1220     vpextrb(Operand(dst), src, offset);
1221   }
1222   void vpextrb(Operand dst, XMMRegister src, uint8_t offset);
vpextrw(Register dst,XMMRegister src,uint8_t offset)1223   void vpextrw(Register dst, XMMRegister src, uint8_t offset) {
1224     vpextrw(Operand(dst), src, offset);
1225   }
1226   void vpextrw(Operand dst, XMMRegister src, uint8_t offset);
vpextrd(Register dst,XMMRegister src,uint8_t offset)1227   void vpextrd(Register dst, XMMRegister src, uint8_t offset) {
1228     vpextrd(Operand(dst), src, offset);
1229   }
1230   void vpextrd(Operand dst, XMMRegister src, uint8_t offset);
1231 
vinsertps(XMMRegister dst,XMMRegister src1,XMMRegister src2,uint8_t offset)1232   void vinsertps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1233                  uint8_t offset) {
1234     vinsertps(dst, src1, Operand(src2), offset);
1235   }
1236   void vinsertps(XMMRegister dst, XMMRegister src1, Operand src2,
1237                  uint8_t offset);
vpinsrb(XMMRegister dst,XMMRegister src1,Register src2,uint8_t offset)1238   void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2,
1239                uint8_t offset) {
1240     vpinsrb(dst, src1, Operand(src2), offset);
1241   }
1242   void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
vpinsrw(XMMRegister dst,XMMRegister src1,Register src2,uint8_t offset)1243   void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2,
1244                uint8_t offset) {
1245     vpinsrw(dst, src1, Operand(src2), offset);
1246   }
1247   void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
vpinsrd(XMMRegister dst,XMMRegister src1,Register src2,uint8_t offset)1248   void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2,
1249                uint8_t offset) {
1250     vpinsrd(dst, src1, Operand(src2), offset);
1251   }
1252   void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
1253 
1254   void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1255                 RoundingMode mode);
1256   void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1257                 RoundingMode mode);
1258   void vroundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
1259   void vroundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1260 
vcvtdq2pd(XMMRegister dst,XMMRegister src)1261   void vcvtdq2pd(XMMRegister dst, XMMRegister src) {
1262     vinstr(0xE6, dst, xmm0, src, kF3, k0F, kWIG);
1263   }
vcvtpd2ps(XMMRegister dst,XMMRegister src)1264   void vcvtpd2ps(XMMRegister dst, XMMRegister src) {
1265     vinstr(0x5A, dst, xmm0, src, k66, k0F, kWIG);
1266   }
vcvttps2dq(XMMRegister dst,XMMRegister src)1267   void vcvttps2dq(XMMRegister dst, XMMRegister src) {
1268     vcvttps2dq(dst, Operand(src));
1269   }
vcvttps2dq(XMMRegister dst,Operand src)1270   void vcvttps2dq(XMMRegister dst, Operand src) {
1271     vinstr(0x5B, dst, xmm0, src, kF3, k0F, kWIG);
1272   }
vcvttpd2dq(XMMRegister dst,XMMRegister src)1273   void vcvttpd2dq(XMMRegister dst, XMMRegister src) {
1274     vinstr(0xE6, dst, xmm0, src, k66, k0F, kWIG);
1275   }
vcvttsd2si(Register dst,XMMRegister src)1276   void vcvttsd2si(Register dst, XMMRegister src) {
1277     XMMRegister idst = XMMRegister::from_code(dst.code());
1278     vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1279   }
vcvttsd2si(Register dst,Operand src)1280   void vcvttsd2si(Register dst, Operand src) {
1281     XMMRegister idst = XMMRegister::from_code(dst.code());
1282     vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1283   }
vcvtss2sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1284   void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1285     vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1286   }
vcvtss2sd(XMMRegister dst,XMMRegister src1,Operand src2)1287   void vcvtss2sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1288     vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1289   }
vcvttss2si(Register dst,XMMRegister src)1290   void vcvttss2si(Register dst, XMMRegister src) {
1291     XMMRegister idst = XMMRegister::from_code(dst.code());
1292     vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1293   }
vcvttss2si(Register dst,Operand src)1294   void vcvttss2si(Register dst, Operand src) {
1295     XMMRegister idst = XMMRegister::from_code(dst.code());
1296     vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1297   }
1298 
vmovddup(XMMRegister dst,Operand src)1299   void vmovddup(XMMRegister dst, Operand src) {
1300     vinstr(0x12, dst, xmm0, src, kF2, k0F, kWIG);
1301   }
vmovddup(XMMRegister dst,XMMRegister src)1302   void vmovddup(XMMRegister dst, XMMRegister src) {
1303     vmovddup(dst, Operand(src));
1304   }
vmovshdup(XMMRegister dst,XMMRegister src)1305   void vmovshdup(XMMRegister dst, XMMRegister src) {
1306     vinstr(0x16, dst, xmm0, src, kF3, k0F, kWIG);
1307   }
vbroadcastss(XMMRegister dst,XMMRegister src)1308   void vbroadcastss(XMMRegister dst, XMMRegister src) {
1309     vinstr(0x18, dst, xmm0, src, k66, k0F38, kW0, AVX2);
1310   }
vbroadcastss(XMMRegister dst,Operand src)1311   void vbroadcastss(XMMRegister dst, Operand src) {
1312     vinstr(0x18, dst, xmm0, src, k66, k0F38, kW0);
1313   }
vmovdqa(XMMRegister dst,Operand src)1314   void vmovdqa(XMMRegister dst, Operand src) {
1315     vinstr(0x6F, dst, xmm0, src, k66, k0F, kWIG);
1316   }
vmovdqa(XMMRegister dst,XMMRegister src)1317   void vmovdqa(XMMRegister dst, XMMRegister src) {
1318     vinstr(0x6F, dst, xmm0, src, k66, k0F, kWIG);
1319   }
vmovdqu(XMMRegister dst,Operand src)1320   void vmovdqu(XMMRegister dst, Operand src) {
1321     vinstr(0x6F, dst, xmm0, src, kF3, k0F, kWIG);
1322   }
vmovdqu(Operand dst,XMMRegister src)1323   void vmovdqu(Operand dst, XMMRegister src) {
1324     vinstr(0x7F, src, xmm0, dst, kF3, k0F, kWIG);
1325   }
vmovd(XMMRegister dst,Register src)1326   void vmovd(XMMRegister dst, Register src) { vmovd(dst, Operand(src)); }
vmovd(XMMRegister dst,Operand src)1327   void vmovd(XMMRegister dst, Operand src) {
1328     vinstr(0x6E, dst, xmm0, src, k66, k0F, kWIG);
1329   }
vmovd(Register dst,XMMRegister src)1330   void vmovd(Register dst, XMMRegister src) { movd(Operand(dst), src); }
vmovd(Operand dst,XMMRegister src)1331   void vmovd(Operand dst, XMMRegister src) {
1332     vinstr(0x7E, src, xmm0, dst, k66, k0F, kWIG);
1333   }
1334 
1335   void vmovmskpd(Register dst, XMMRegister src);
1336   void vmovmskps(Register dst, XMMRegister src);
1337 
1338   void vpmovmskb(Register dst, XMMRegister src);
1339 
vucomisd(XMMRegister dst,XMMRegister src)1340   void vucomisd(XMMRegister dst, XMMRegister src) {
1341     vinstr(0x2E, dst, xmm0, src, k66, k0F, kWIG);
1342   }
vucomisd(XMMRegister dst,Operand src)1343   void vucomisd(XMMRegister dst, Operand src) {
1344     vinstr(0x2E, dst, xmm0, src, k66, k0F, kWIG);
1345   }
vucomiss(XMMRegister dst,XMMRegister src)1346   void vucomiss(XMMRegister dst, XMMRegister src) {
1347     vinstr(0x2E, dst, xmm0, src, kNoPrefix, k0F, kWIG);
1348   }
vucomiss(XMMRegister dst,Operand src)1349   void vucomiss(XMMRegister dst, Operand src) {
1350     vinstr(0x2E, dst, xmm0, src, kNoPrefix, k0F, kWIG);
1351   }
1352 
1353   // BMI instruction
andn(Register dst,Register src1,Register src2)1354   void andn(Register dst, Register src1, Register src2) {
1355     andn(dst, src1, Operand(src2));
1356   }
andn(Register dst,Register src1,Operand src2)1357   void andn(Register dst, Register src1, Operand src2) {
1358     bmi1(0xf2, dst, src1, src2);
1359   }
bextr(Register dst,Register src1,Register src2)1360   void bextr(Register dst, Register src1, Register src2) {
1361     bextr(dst, Operand(src1), src2);
1362   }
bextr(Register dst,Operand src1,Register src2)1363   void bextr(Register dst, Operand src1, Register src2) {
1364     bmi1(0xf7, dst, src2, src1);
1365   }
blsi(Register dst,Register src)1366   void blsi(Register dst, Register src) { blsi(dst, Operand(src)); }
blsi(Register dst,Operand src)1367   void blsi(Register dst, Operand src) { bmi1(0xf3, ebx, dst, src); }
blsmsk(Register dst,Register src)1368   void blsmsk(Register dst, Register src) { blsmsk(dst, Operand(src)); }
blsmsk(Register dst,Operand src)1369   void blsmsk(Register dst, Operand src) { bmi1(0xf3, edx, dst, src); }
blsr(Register dst,Register src)1370   void blsr(Register dst, Register src) { blsr(dst, Operand(src)); }
blsr(Register dst,Operand src)1371   void blsr(Register dst, Operand src) { bmi1(0xf3, ecx, dst, src); }
tzcnt(Register dst,Register src)1372   void tzcnt(Register dst, Register src) { tzcnt(dst, Operand(src)); }
1373   void tzcnt(Register dst, Operand src);
1374 
lzcnt(Register dst,Register src)1375   void lzcnt(Register dst, Register src) { lzcnt(dst, Operand(src)); }
1376   void lzcnt(Register dst, Operand src);
1377 
popcnt(Register dst,Register src)1378   void popcnt(Register dst, Register src) { popcnt(dst, Operand(src)); }
1379   void popcnt(Register dst, Operand src);
1380 
bzhi(Register dst,Register src1,Register src2)1381   void bzhi(Register dst, Register src1, Register src2) {
1382     bzhi(dst, Operand(src1), src2);
1383   }
bzhi(Register dst,Operand src1,Register src2)1384   void bzhi(Register dst, Operand src1, Register src2) {
1385     bmi2(kNoPrefix, 0xf5, dst, src2, src1);
1386   }
mulx(Register dst1,Register dst2,Register src)1387   void mulx(Register dst1, Register dst2, Register src) {
1388     mulx(dst1, dst2, Operand(src));
1389   }
mulx(Register dst1,Register dst2,Operand src)1390   void mulx(Register dst1, Register dst2, Operand src) {
1391     bmi2(kF2, 0xf6, dst1, dst2, src);
1392   }
pdep(Register dst,Register src1,Register src2)1393   void pdep(Register dst, Register src1, Register src2) {
1394     pdep(dst, src1, Operand(src2));
1395   }
pdep(Register dst,Register src1,Operand src2)1396   void pdep(Register dst, Register src1, Operand src2) {
1397     bmi2(kF2, 0xf5, dst, src1, src2);
1398   }
pext(Register dst,Register src1,Register src2)1399   void pext(Register dst, Register src1, Register src2) {
1400     pext(dst, src1, Operand(src2));
1401   }
pext(Register dst,Register src1,Operand src2)1402   void pext(Register dst, Register src1, Operand src2) {
1403     bmi2(kF3, 0xf5, dst, src1, src2);
1404   }
sarx(Register dst,Register src1,Register src2)1405   void sarx(Register dst, Register src1, Register src2) {
1406     sarx(dst, Operand(src1), src2);
1407   }
sarx(Register dst,Operand src1,Register src2)1408   void sarx(Register dst, Operand src1, Register src2) {
1409     bmi2(kF3, 0xf7, dst, src2, src1);
1410   }
shlx(Register dst,Register src1,Register src2)1411   void shlx(Register dst, Register src1, Register src2) {
1412     shlx(dst, Operand(src1), src2);
1413   }
shlx(Register dst,Operand src1,Register src2)1414   void shlx(Register dst, Operand src1, Register src2) {
1415     bmi2(k66, 0xf7, dst, src2, src1);
1416   }
shrx(Register dst,Register src1,Register src2)1417   void shrx(Register dst, Register src1, Register src2) {
1418     shrx(dst, Operand(src1), src2);
1419   }
shrx(Register dst,Operand src1,Register src2)1420   void shrx(Register dst, Operand src1, Register src2) {
1421     bmi2(kF2, 0xf7, dst, src2, src1);
1422   }
rorx(Register dst,Register src,byte imm8)1423   void rorx(Register dst, Register src, byte imm8) {
1424     rorx(dst, Operand(src), imm8);
1425   }
1426   void rorx(Register dst, Operand src, byte imm8);
1427 
1428   // Implementation of packed single-precision floating-point SSE instructions.
1429   void ps(byte op, XMMRegister dst, Operand src);
1430   // Implementation of packed double-precision floating-point SSE instructions.
1431   void pd(byte op, XMMRegister dst, Operand src);
1432 
1433 #define PACKED_OP_LIST(V) \
1434   V(unpckl, 0x14)         \
1435   V(and, 0x54)            \
1436   V(andn, 0x55)           \
1437   V(or, 0x56)             \
1438   V(xor, 0x57)            \
1439   V(add, 0x58)            \
1440   V(mul, 0x59)            \
1441   V(sub, 0x5c)            \
1442   V(min, 0x5d)            \
1443   V(div, 0x5e)            \
1444   V(max, 0x5f)
1445 
1446 #define SSE_PACKED_OP_DECLARE(name, opcode)                             \
1447   void name##ps(XMMRegister dst, XMMRegister src) {                     \
1448     ps(opcode, dst, Operand(src));                                      \
1449   }                                                                     \
1450   void name##ps(XMMRegister dst, Operand src) { ps(opcode, dst, src); } \
1451   void name##pd(XMMRegister dst, XMMRegister src) {                     \
1452     pd(opcode, dst, Operand(src));                                      \
1453   }                                                                     \
1454   void name##pd(XMMRegister dst, Operand src) { pd(opcode, dst, src); }
1455 
1456   PACKED_OP_LIST(SSE_PACKED_OP_DECLARE)
1457 #undef SSE_PACKED_OP_DECLARE
1458 
1459 #define AVX_PACKED_OP_DECLARE(name, opcode)                               \
1460   void v##name##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1461     vps(opcode, dst, src1, Operand(src2));                                \
1462   }                                                                       \
1463   void v##name##ps(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1464     vps(opcode, dst, src1, src2);                                         \
1465   }                                                                       \
1466   void v##name##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1467     vpd(opcode, dst, src1, Operand(src2));                                \
1468   }                                                                       \
1469   void v##name##pd(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1470     vpd(opcode, dst, src1, src2);                                         \
1471   }
1472 
1473   PACKED_OP_LIST(AVX_PACKED_OP_DECLARE)
1474 #undef AVX_PACKED_OP_DECLARE
1475 #undef PACKED_OP_LIST
1476 
1477   void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1478   void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1479 
1480   void vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp);
1481   void vcmppd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp);
1482 
1483 #define AVX_CMP_P(instr, imm8)                                             \
1484   void v##instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1485     vcmpps(dst, src1, Operand(src2), imm8);                                \
1486   }                                                                        \
1487   void v##instr##ps(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1488     vcmpps(dst, src1, src2, imm8);                                         \
1489   }                                                                        \
1490   void v##instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1491     vcmppd(dst, src1, Operand(src2), imm8);                                \
1492   }                                                                        \
1493   void v##instr##pd(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1494     vcmppd(dst, src1, src2, imm8);                                         \
1495   }
1496 
1497   PACKED_CMP_LIST(AVX_CMP_P)
1498   // vcmpgeps/vcmpgepd only in AVX.
1499   AVX_CMP_P(cmpge, 0xd)
1500 #undef AVX_CMP_P
1501 #undef PACKED_CMP_LIST
1502 
1503 // Other SSE and AVX instructions
1504 #define DECLARE_SSE_UNOP_AND_AVX(instruction, escape, opcode)       \
1505   void instruction(XMMRegister dst, XMMRegister src) {              \
1506     instruction(dst, Operand(src));                                 \
1507   }                                                                 \
1508   void instruction(XMMRegister dst, Operand src) {                  \
1509     sse_instr(dst, src, 0x##escape, 0x##opcode);                    \
1510   }                                                                 \
1511   void v##instruction(XMMRegister dst, XMMRegister src) {           \
1512     v##instruction(dst, Operand(src));                              \
1513   }                                                                 \
1514   void v##instruction(XMMRegister dst, Operand src) {               \
1515     vinstr(0x##opcode, dst, xmm0, src, kNoPrefix, k##escape, kWIG); \
1516   }
1517 
1518   SSE_UNOP_INSTRUCTION_LIST(DECLARE_SSE_UNOP_AND_AVX)
1519 #undef DECLARE_SSE_UNOP_AND_AVX
1520 
1521 #define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
1522   void instruction(XMMRegister dst, XMMRegister src) {                \
1523     instruction(dst, Operand(src));                                   \
1524   }                                                                   \
1525   void instruction(XMMRegister dst, Operand src) {                    \
1526     sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode);         \
1527   }
1528 
1529   SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
1530   SSE2_INSTRUCTION_LIST_SD(DECLARE_SSE2_INSTRUCTION)
1531 #undef DECLARE_SSE2_INSTRUCTION
1532 
1533 #define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode)    \
1534   void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1535     v##instruction(dst, src1, Operand(src2));                                \
1536   }                                                                          \
1537   void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1538     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0);          \
1539   }
1540 
1541   SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
1542   SSE2_INSTRUCTION_LIST_SD(DECLARE_SSE2_AVX_INSTRUCTION)
1543 #undef DECLARE_SSE2_AVX_INSTRUCTION
1544 
1545 #define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2,     \
1546                                   opcode)                                    \
1547   void instruction(XMMRegister dst, XMMRegister src) {                       \
1548     instruction(dst, Operand(src));                                          \
1549   }                                                                          \
1550   void instruction(XMMRegister dst, Operand src) {                           \
1551     ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1552   }
1553 
1554   SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1555   SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1556 #undef DECLARE_SSSE3_INSTRUCTION
1557 
1558 #define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2,     \
1559                                  opcode)                                    \
1560   void instruction(XMMRegister dst, XMMRegister src) {                      \
1561     instruction(dst, Operand(src));                                         \
1562   }                                                                         \
1563   void instruction(XMMRegister dst, Operand src) {                          \
1564     sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1565   }
1566 
1567   SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1568   SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1569   DECLARE_SSE4_INSTRUCTION(blendvps, 66, 0F, 38, 14)
1570   DECLARE_SSE4_INSTRUCTION(blendvpd, 66, 0F, 38, 15)
1571   DECLARE_SSE4_INSTRUCTION(pblendvb, 66, 0F, 38, 10)
1572 #undef DECLARE_SSE4_INSTRUCTION
1573 
1574 #define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2,  \
1575                                       opcode)                                 \
1576   void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) {  \
1577     v##instruction(dst, src1, Operand(src2));                                 \
1578   }                                                                           \
1579   void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) {      \
1580     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1581   }
1582 
1583   SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1584   SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1585 #undef DECLARE_SSE34_AVX_INSTRUCTION
1586 
1587 #define DECLARE_SSE4_AVX_RM_INSTRUCTION(instruction, prefix, escape1, escape2, \
1588                                         opcode)                                \
1589   void v##instruction(XMMRegister dst, XMMRegister src) {                      \
1590     v##instruction(dst, Operand(src));                                         \
1591   }                                                                            \
1592   void v##instruction(XMMRegister dst, Operand src) {                          \
1593     vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0);   \
1594   }
1595 
1596   SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_AVX_RM_INSTRUCTION)
1597   SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_AVX_RM_INSTRUCTION)
1598 #undef DECLARE_SSE4_AVX_RM_INSTRUCTION
1599 
1600   // AVX2 instructions
1601 #define AVX2_INSTRUCTION(instr, prefix, escape1, escape2, opcode)           \
1602   void instr(XMMRegister dst, XMMRegister src) {                            \
1603     vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0, \
1604            AVX2);                                                           \
1605   }                                                                         \
1606   void instr(XMMRegister dst, Operand src) {                                \
1607     vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0, \
1608            AVX2);                                                           \
1609   }
1610   AVX2_BROADCAST_LIST(AVX2_INSTRUCTION)
1611 #undef AVX2_INSTRUCTION
1612 
1613 #define FMA(instr, length, prefix, escape1, escape2, extension, opcode) \
1614   void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) {     \
1615     vinstr(0x##opcode, dst, src1, src2, k##length, k##prefix,           \
1616            k##escape1##escape2, k##extension, FMA3);                    \
1617   }                                                                     \
1618   void instr(XMMRegister dst, XMMRegister src1, Operand src2) {         \
1619     vinstr(0x##opcode, dst, src1, src2, k##length, k##prefix,           \
1620            k##escape1##escape2, k##extension, FMA3);                    \
1621   }
1622   FMA_INSTRUCTION_LIST(FMA)
1623 #undef FMA
1624 
1625   // Prefetch src position into cache level.
1626   // Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a
1627   // non-temporal
1628   void prefetch(Operand src, int level);
1629   // TODO(lrn): Need SFENCE for movnt?
1630 
1631   // Check the code size generated from label to here.
SizeOfCodeGeneratedSince(Label * label)1632   int SizeOfCodeGeneratedSince(Label* label) {
1633     return pc_offset() - label->pos();
1634   }
1635 
1636   // Record a deoptimization reason that can be used by a log or cpu profiler.
1637   // Use --trace-deopt to enable.
1638   void RecordDeoptReason(DeoptimizeReason reason, uint32_t node_id,
1639                          SourcePosition position, int id);
1640 
1641   // Writes a single byte or word of data in the code stream.  Used for
1642   // inline tables, e.g., jump-tables.
1643   void db(uint8_t data);
1644   void dd(uint32_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO);
1645   void dq(uint64_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO);
1646   void dp(uintptr_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO) {
1647     dd(data, rmode);
1648   }
1649   void dd(Label* label);
1650 
1651   // Check if there is less than kGap bytes available in the buffer.
1652   // If this is the case, we need to grow the buffer before emitting
1653   // an instruction or relocation information.
buffer_overflow()1654   inline bool buffer_overflow() const {
1655     return pc_ >= reloc_info_writer.pos() - kGap;
1656   }
1657 
1658   // Get the number of bytes available in the buffer.
available_space()1659   inline int available_space() const { return reloc_info_writer.pos() - pc_; }
1660 
1661   static bool IsNop(Address addr);
1662 
relocation_writer_size()1663   int relocation_writer_size() {
1664     return (buffer_start_ + buffer_->size()) - reloc_info_writer.pos();
1665   }
1666 
1667   // Avoid overflows for displacements etc.
1668   static constexpr int kMaximalBufferSize = 512 * MB;
1669 
byte_at(int pos)1670   byte byte_at(int pos) { return buffer_start_[pos]; }
set_byte_at(int pos,byte value)1671   void set_byte_at(int pos, byte value) { buffer_start_[pos] = value; }
1672 
1673  protected:
1674   void emit_sse_operand(XMMRegister reg, Operand adr);
1675   void emit_sse_operand(XMMRegister dst, XMMRegister src);
1676   void emit_sse_operand(Register dst, XMMRegister src);
1677   void emit_sse_operand(XMMRegister dst, Register src);
1678 
addr_at(int pos)1679   Address addr_at(int pos) {
1680     return reinterpret_cast<Address>(buffer_start_ + pos);
1681   }
1682 
1683  private:
long_at(int pos)1684   uint32_t long_at(int pos) {
1685     return ReadUnalignedValue<uint32_t>(addr_at(pos));
1686   }
long_at_put(int pos,uint32_t x)1687   void long_at_put(int pos, uint32_t x) {
1688     WriteUnalignedValue(addr_at(pos), x);
1689   }
1690 
1691   // code emission
1692   void GrowBuffer();
1693   inline void emit(uint32_t x);
1694   inline void emit(Handle<HeapObject> handle);
1695   inline void emit(uint32_t x, RelocInfo::Mode rmode);
1696   inline void emit(Handle<Code> code, RelocInfo::Mode rmode);
1697   inline void emit(const Immediate& x);
1698   inline void emit_b(Immediate x);
1699   inline void emit_w(const Immediate& x);
1700   inline void emit_q(uint64_t x);
1701 
1702   // Emit the code-object-relative offset of the label's position
1703   inline void emit_code_relative_offset(Label* label);
1704 
1705   // instruction generation
1706   void emit_arith_b(int op1, int op2, Register dst, int imm8);
1707 
1708   // Emit a basic arithmetic instruction (i.e. first byte of the family is 0x81)
1709   // with a given destination expression and an immediate operand.  It attempts
1710   // to use the shortest encoding possible.
1711   // sel specifies the /n in the modrm byte (see the Intel PRM).
1712   void emit_arith(int sel, Operand dst, const Immediate& x);
1713 
1714   void emit_operand(int code, Operand adr);
1715   void emit_operand(Register reg, Operand adr);
1716   void emit_operand(XMMRegister reg, Operand adr);
1717 
1718   void emit_label(Label* label);
1719 
1720   void emit_farith(int b1, int b2, int i);
1721 
1722   // Emit vex prefix
1723   enum SIMDPrefix { kNoPrefix = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
1724   enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
1725   enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
1726   enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
1727   inline void emit_vex_prefix(XMMRegister v, VectorLength l, SIMDPrefix pp,
1728                               LeadingOpcode m, VexW w);
1729   inline void emit_vex_prefix(Register v, VectorLength l, SIMDPrefix pp,
1730                               LeadingOpcode m, VexW w);
1731 
1732   // labels
1733   void print(const Label* L);
1734   void bind_to(Label* L, int pos);
1735 
1736   // displacements
1737   inline Displacement disp_at(Label* L);
1738   inline void disp_at_put(Label* L, Displacement disp);
1739   inline void emit_disp(Label* L, Displacement::Type type);
1740   inline void emit_near_disp(Label* L);
1741 
1742   void sse_instr(XMMRegister dst, Operand src, byte prefix, byte opcode);
1743   void sse2_instr(XMMRegister dst, Operand src, byte prefix, byte escape,
1744                   byte opcode);
1745   void ssse3_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1746                    byte escape2, byte opcode);
1747   void sse4_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1748                   byte escape2, byte opcode);
1749   void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1750               SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature = AVX);
1751   void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
1752               SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature = AVX);
1753   void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1754               VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w,
1755               CpuFeature = AVX);
1756   void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
1757               VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w,
1758               CpuFeature = AVX);
1759   // Most BMI instructions are similar.
1760   void bmi1(byte op, Register reg, Register vreg, Operand rm);
1761   void bmi2(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm);
1762   void fma_instr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1763                  VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w);
1764   void fma_instr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
1765                  VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w);
1766 
1767   // record reloc info for current pc_
1768   void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
1769 
1770   // record the position of jmp/jcc instruction
1771   void record_farjmp_position(Label* L, int pos);
1772 
1773   bool is_optimizable_farjmp(int idx);
1774 
1775   void AllocateAndInstallRequestedHeapObjects(Isolate* isolate);
1776 
1777   int WriteCodeComments();
1778 
1779   friend class EnsureSpace;
1780 
1781   // Internal reference positions, required for (potential) patching in
1782   // GrowBuffer(); contains only those internal references whose labels
1783   // are already bound.
1784   std::deque<int> internal_reference_positions_;
1785 
1786   // code generation
1787   RelocInfoWriter reloc_info_writer;
1788 
1789   // Variables for this instance of assembler
1790   int farjmp_num_ = 0;
1791   std::deque<int> farjmp_positions_;
1792   std::map<Label*, std::vector<int>> label_farjmp_maps_;
1793 };
1794 
1795 // Helper class that ensures that there is enough space for generating
1796 // instructions and relocation information.  The constructor makes
1797 // sure that there is enough space and (in debug mode) the destructor
1798 // checks that we did not generate too much.
1799 class EnsureSpace {
1800  public:
EnsureSpace(Assembler * assembler)1801   explicit V8_INLINE EnsureSpace(Assembler* assembler) : assembler_(assembler) {
1802     if (V8_UNLIKELY(assembler_->buffer_overflow())) assembler_->GrowBuffer();
1803 #ifdef DEBUG
1804     space_before_ = assembler->available_space();
1805 #endif
1806   }
1807 
1808 #ifdef DEBUG
~EnsureSpace()1809   ~EnsureSpace() {
1810     int bytes_generated = space_before_ - assembler_->available_space();
1811     DCHECK(bytes_generated < assembler_->kGap);
1812   }
1813 #endif
1814 
1815  private:
1816   Assembler* const assembler_;
1817 #ifdef DEBUG
1818   int space_before_;
1819 #endif
1820 };
1821 
1822 }  // namespace internal
1823 }  // namespace v8
1824 
1825 #endif  // V8_CODEGEN_IA32_ASSEMBLER_IA32_H_
1826