• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 //
11 // - Redistribution in binary form must reproduce the above copyright
12 // notice, this list of conditions and the following disclaimer in the
13 // documentation and/or other materials provided with the distribution.
14 //
15 // - Neither the name of Sun Microsystems or the names of contributors may
16 // be used to endorse or promote products derived from this software without
17 // specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // The original source code covered by the above license above has been
32 // modified significantly by Google Inc.
33 // Copyright 2011 the V8 project authors. All rights reserved.
34 
35 // A light-weight IA32 Assembler.
36 
37 #ifndef V8_CODEGEN_IA32_ASSEMBLER_IA32_H_
38 #define V8_CODEGEN_IA32_ASSEMBLER_IA32_H_
39 
40 #include <deque>
41 #include <memory>
42 
43 #include "src/codegen/assembler.h"
44 #include "src/codegen/ia32/constants-ia32.h"
45 #include "src/codegen/ia32/register-ia32.h"
46 #include "src/codegen/ia32/sse-instr.h"
47 #include "src/codegen/label.h"
48 #include "src/execution/isolate.h"
49 #include "src/objects/smi.h"
50 #include "src/utils/utils.h"
51 
52 namespace v8 {
53 namespace internal {
54 
55 class SafepointTableBuilder;
56 
57 enum Condition {
58   // any value < 0 is considered no_condition
59   no_condition = -1,
60 
61   overflow = 0,
62   no_overflow = 1,
63   below = 2,
64   above_equal = 3,
65   equal = 4,
66   not_equal = 5,
67   below_equal = 6,
68   above = 7,
69   negative = 8,
70   positive = 9,
71   parity_even = 10,
72   parity_odd = 11,
73   less = 12,
74   greater_equal = 13,
75   less_equal = 14,
76   greater = 15,
77 
78   // aliases
79   carry = below,
80   not_carry = above_equal,
81   zero = equal,
82   not_zero = not_equal,
83   sign = negative,
84   not_sign = positive
85 };
86 
87 // Returns the equivalent of !cc.
88 // Negation of the default no_condition (-1) results in a non-default
89 // no_condition value (-2). As long as tests for no_condition check
90 // for condition < 0, this will work as expected.
NegateCondition(Condition cc)91 inline Condition NegateCondition(Condition cc) {
92   return static_cast<Condition>(cc ^ 1);
93 }
94 
95 enum RoundingMode {
96   kRoundToNearest = 0x0,
97   kRoundDown = 0x1,
98   kRoundUp = 0x2,
99   kRoundToZero = 0x3
100 };
101 
102 // -----------------------------------------------------------------------------
103 // Machine instruction Immediates
104 
105 class Immediate {
106  public:
107   // Calls where x is an Address (uintptr_t) resolve to this overload.
108   inline explicit Immediate(int x, RelocInfo::Mode rmode = RelocInfo::NONE) {
109     value_.immediate = x;
110     rmode_ = rmode;
111   }
Immediate(const ExternalReference & ext)112   inline explicit Immediate(const ExternalReference& ext)
113       : Immediate(ext.address(), RelocInfo::EXTERNAL_REFERENCE) {}
Immediate(Handle<HeapObject> handle)114   inline explicit Immediate(Handle<HeapObject> handle)
115       : Immediate(handle.address(), RelocInfo::FULL_EMBEDDED_OBJECT) {}
Immediate(Smi value)116   inline explicit Immediate(Smi value)
117       : Immediate(static_cast<intptr_t>(value.ptr())) {}
118 
119   static Immediate EmbeddedNumber(double number);  // Smi or HeapNumber.
120   static Immediate EmbeddedStringConstant(const StringConstantBase* str);
121 
CodeRelativeOffset(Label * label)122   static Immediate CodeRelativeOffset(Label* label) { return Immediate(label); }
123 
is_heap_object_request()124   bool is_heap_object_request() const {
125     DCHECK_IMPLIES(is_heap_object_request_,
126                    rmode_ == RelocInfo::FULL_EMBEDDED_OBJECT ||
127                        rmode_ == RelocInfo::CODE_TARGET);
128     return is_heap_object_request_;
129   }
130 
heap_object_request()131   HeapObjectRequest heap_object_request() const {
132     DCHECK(is_heap_object_request());
133     return value_.heap_object_request;
134   }
135 
immediate()136   int immediate() const {
137     DCHECK(!is_heap_object_request());
138     return value_.immediate;
139   }
140 
is_embedded_object()141   bool is_embedded_object() const {
142     return !is_heap_object_request() &&
143            rmode() == RelocInfo::FULL_EMBEDDED_OBJECT;
144   }
145 
embedded_object()146   Handle<HeapObject> embedded_object() const {
147     return Handle<HeapObject>(reinterpret_cast<Address*>(immediate()));
148   }
149 
is_external_reference()150   bool is_external_reference() const {
151     return rmode() == RelocInfo::EXTERNAL_REFERENCE;
152   }
153 
external_reference()154   ExternalReference external_reference() const {
155     DCHECK(is_external_reference());
156     return bit_cast<ExternalReference>(immediate());
157   }
158 
is_zero()159   bool is_zero() const { return RelocInfo::IsNone(rmode_) && immediate() == 0; }
is_int8()160   bool is_int8() const {
161     return RelocInfo::IsNone(rmode_) && i::is_int8(immediate());
162   }
is_uint8()163   bool is_uint8() const {
164     return RelocInfo::IsNone(rmode_) && i::is_uint8(immediate());
165   }
is_int16()166   bool is_int16() const {
167     return RelocInfo::IsNone(rmode_) && i::is_int16(immediate());
168   }
169 
is_uint16()170   bool is_uint16() const {
171     return RelocInfo::IsNone(rmode_) && i::is_uint16(immediate());
172   }
173 
rmode()174   RelocInfo::Mode rmode() const { return rmode_; }
175 
176  private:
Immediate(Label * value)177   inline explicit Immediate(Label* value) {
178     value_.immediate = reinterpret_cast<int32_t>(value);
179     rmode_ = RelocInfo::INTERNAL_REFERENCE;
180   }
181 
182   union Value {
Value()183     Value() {}
184     HeapObjectRequest heap_object_request;
185     int immediate;
186   } value_;
187   bool is_heap_object_request_ = false;
188   RelocInfo::Mode rmode_;
189 
190   friend class Operand;
191   friend class Assembler;
192   friend class MacroAssembler;
193 };
194 
195 // -----------------------------------------------------------------------------
196 // Machine instruction Operands
197 
198 enum ScaleFactor {
199   times_1 = 0,
200   times_2 = 1,
201   times_4 = 2,
202   times_8 = 3,
203   times_int_size = times_4,
204 
205   times_half_system_pointer_size = times_2,
206   times_system_pointer_size = times_4,
207 
208   times_tagged_size = times_4,
209 };
210 
211 class V8_EXPORT_PRIVATE Operand {
212  public:
213   // reg
Operand(Register reg)214   V8_INLINE explicit Operand(Register reg) { set_modrm(3, reg); }
215 
216   // XMM reg
Operand(XMMRegister xmm_reg)217   V8_INLINE explicit Operand(XMMRegister xmm_reg) {
218     Register reg = Register::from_code(xmm_reg.code());
219     set_modrm(3, reg);
220   }
221 
222   // [disp/r]
Operand(int32_t disp,RelocInfo::Mode rmode)223   V8_INLINE explicit Operand(int32_t disp, RelocInfo::Mode rmode) {
224     set_modrm(0, ebp);
225     set_dispr(disp, rmode);
226   }
227 
228   // [disp/r]
Operand(Immediate imm)229   V8_INLINE explicit Operand(Immediate imm) {
230     set_modrm(0, ebp);
231     set_dispr(imm.immediate(), imm.rmode_);
232   }
233 
234   // [base + disp/r]
235   explicit Operand(Register base, int32_t disp,
236                    RelocInfo::Mode rmode = RelocInfo::NONE);
237 
238   // [base + index*scale + disp/r]
239   explicit Operand(Register base, Register index, ScaleFactor scale,
240                    int32_t disp, RelocInfo::Mode rmode = RelocInfo::NONE);
241 
242   // [index*scale + disp/r]
243   explicit Operand(Register index, ScaleFactor scale, int32_t disp,
244                    RelocInfo::Mode rmode = RelocInfo::NONE);
245 
JumpTable(Register index,ScaleFactor scale,Label * table)246   static Operand JumpTable(Register index, ScaleFactor scale, Label* table) {
247     return Operand(index, scale, reinterpret_cast<int32_t>(table),
248                    RelocInfo::INTERNAL_REFERENCE);
249   }
250 
ForRegisterPlusImmediate(Register base,Immediate imm)251   static Operand ForRegisterPlusImmediate(Register base, Immediate imm) {
252     return Operand(base, imm.value_.immediate, imm.rmode_);
253   }
254 
255   // Returns true if this Operand is a wrapper for the specified register.
is_reg(Register reg)256   bool is_reg(Register reg) const { return is_reg(reg.code()); }
is_reg(XMMRegister reg)257   bool is_reg(XMMRegister reg) const { return is_reg(reg.code()); }
258 
259   // Returns true if this Operand is a wrapper for one register.
260   bool is_reg_only() const;
261 
262   // Asserts that this Operand is a wrapper for one register and returns the
263   // register.
264   Register reg() const;
265 
266  private:
267   // Set the ModRM byte without an encoded 'reg' register. The
268   // register is encoded later as part of the emit_operand operation.
set_modrm(int mod,Register rm)269   inline void set_modrm(int mod, Register rm) {
270     DCHECK_EQ(mod & -4, 0);
271     buf_[0] = mod << 6 | rm.code();
272     len_ = 1;
273   }
274 
275   inline void set_sib(ScaleFactor scale, Register index, Register base);
276   inline void set_disp8(int8_t disp);
set_dispr(int32_t disp,RelocInfo::Mode rmode)277   inline void set_dispr(int32_t disp, RelocInfo::Mode rmode) {
278     DCHECK(len_ == 1 || len_ == 2);
279     Address p = reinterpret_cast<Address>(&buf_[len_]);
280     WriteUnalignedValue(p, disp);
281     len_ += sizeof(int32_t);
282     rmode_ = rmode;
283   }
284 
is_reg(int reg_code)285   inline bool is_reg(int reg_code) const {
286     return ((buf_[0] & 0xF8) == 0xC0)  // addressing mode is register only.
287            && ((buf_[0] & 0x07) == reg_code);  // register codes match.
288   }
289 
290   byte buf_[6];
291   // The number of bytes in buf_.
292   uint8_t len_ = 0;
293   // Only valid if len_ > 4.
294   RelocInfo::Mode rmode_ = RelocInfo::NONE;
295 
296   // TODO(clemensb): Get rid of this friendship, or make Operand immutable.
297   friend class Assembler;
298 };
299 ASSERT_TRIVIALLY_COPYABLE(Operand);
300 static_assert(sizeof(Operand) <= 2 * kSystemPointerSize,
301               "Operand must be small enough to pass it by value");
302 
303 // -----------------------------------------------------------------------------
304 // A Displacement describes the 32bit immediate field of an instruction which
305 // may be used together with a Label in order to refer to a yet unknown code
306 // position. Displacements stored in the instruction stream are used to describe
307 // the instruction and to chain a list of instructions using the same Label.
308 // A Displacement contains 2 different fields:
309 //
310 // next field: position of next displacement in the chain (0 = end of list)
311 // type field: instruction type
312 //
313 // A next value of null (0) indicates the end of a chain (note that there can
314 // be no displacement at position zero, because there is always at least one
315 // instruction byte before the displacement).
316 //
317 // Displacement _data field layout
318 //
319 // |31.....2|1......0|
320 // [  next  |  type  |
321 
322 class Displacement {
323  public:
324   enum Type { UNCONDITIONAL_JUMP, CODE_RELATIVE, OTHER, CODE_ABSOLUTE };
325 
data()326   int data() const { return data_; }
type()327   Type type() const { return TypeField::decode(data_); }
next(Label * L)328   void next(Label* L) const {
329     int n = NextField::decode(data_);
330     n > 0 ? L->link_to(n) : L->Unuse();
331   }
link_to(Label * L)332   void link_to(Label* L) { init(L, type()); }
333 
Displacement(int data)334   explicit Displacement(int data) { data_ = data; }
335 
Displacement(Label * L,Type type)336   Displacement(Label* L, Type type) { init(L, type); }
337 
print()338   void print() {
339     PrintF("%s (%x) ", (type() == UNCONDITIONAL_JUMP ? "jmp" : "[other]"),
340            NextField::decode(data_));
341   }
342 
343  private:
344   int data_;
345 
346   using TypeField = base::BitField<Type, 0, 2>;
347   using NextField = base::BitField<int, 2, 32 - 2>;
348 
349   void init(Label* L, Type type);
350 };
351 
352 class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
353  private:
354   // We check before assembling an instruction that there is sufficient
355   // space to write an instruction and its relocation information.
356   // The relocation writer's position must be kGap bytes above the end of
357   // the generated instructions. This leaves enough space for the
358   // longest possible ia32 instruction, 15 bytes, and the longest possible
359   // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
360   // (There is a 15 byte limit on ia32 instruction length that rules out some
361   // otherwise valid instructions.)
362   // This allows for a single, fast space check per instruction.
363   static constexpr int kGap = 32;
364   STATIC_ASSERT(AssemblerBase::kMinimalBufferSize >= 2 * kGap);
365 
366  public:
367   // Create an assembler. Instructions and relocation information are emitted
368   // into a buffer, with the instructions starting from the beginning and the
369   // relocation information starting from the end of the buffer. See CodeDesc
370   // for a detailed comment on the layout (globals.h).
371   //
372   // If the provided buffer is nullptr, the assembler allocates and grows its
373   // own buffer. Otherwise it takes ownership of the provided buffer.
374   explicit Assembler(const AssemblerOptions&,
375                      std::unique_ptr<AssemblerBuffer> = {});
376 
377   // GetCode emits any pending (non-emitted) code and fills the descriptor desc.
378   static constexpr int kNoHandlerTable = 0;
379   static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr;
380   void GetCode(Isolate* isolate, CodeDesc* desc,
381                SafepointTableBuilder* safepoint_table_builder,
382                int handler_table_offset);
383 
384   // Convenience wrapper for code without safepoint or handler tables.
GetCode(Isolate * isolate,CodeDesc * desc)385   void GetCode(Isolate* isolate, CodeDesc* desc) {
386     GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable);
387   }
388 
389   void FinalizeJumpOptimizationInfo();
390 
391   // Unused on this architecture.
MaybeEmitOutOfLineConstantPool()392   void MaybeEmitOutOfLineConstantPool() {}
393 
394   // Read/Modify the code target in the branch/call instruction at pc.
395   // The isolate argument is unused (and may be nullptr) when skipping flushing.
396   inline static Address target_address_at(Address pc, Address constant_pool);
397   inline static void set_target_address_at(
398       Address pc, Address constant_pool, Address target,
399       ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
400 
401   // This sets the branch destination (which is in the instruction on x86).
402   // This is for calls and branches within generated code.
403   inline static void deserialization_set_special_target_at(
404       Address instruction_payload, Code code, Address target);
405 
406   // Get the size of the special target encoded at 'instruction_payload'.
407   inline static int deserialization_special_target_size(
408       Address instruction_payload);
409 
410   // This sets the internal reference at the pc.
411   inline static void deserialization_set_target_internal_reference_at(
412       Address pc, Address target,
413       RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
414 
415   static constexpr int kSpecialTargetSize = kSystemPointerSize;
416 
417   // One byte opcode for test al, 0xXX.
418   static constexpr byte kTestAlByte = 0xA8;
419   // One byte opcode for nop.
420   static constexpr byte kNopByte = 0x90;
421 
422   // One byte opcode for a short unconditional jump.
423   static constexpr byte kJmpShortOpcode = 0xEB;
424   // One byte prefix for a short conditional jump.
425   static constexpr byte kJccShortPrefix = 0x70;
426   static constexpr byte kJncShortOpcode = kJccShortPrefix | not_carry;
427   static constexpr byte kJcShortOpcode = kJccShortPrefix | carry;
428   static constexpr byte kJnzShortOpcode = kJccShortPrefix | not_zero;
429   static constexpr byte kJzShortOpcode = kJccShortPrefix | zero;
430 
431   // ---------------------------------------------------------------------------
432   // Code generation
433   //
434   // - function names correspond one-to-one to ia32 instruction mnemonics
435   // - unless specified otherwise, instructions operate on 32bit operands
436   // - instructions on 8bit (byte) operands/registers have a trailing '_b'
437   // - instructions on 16bit (word) operands/registers have a trailing '_w'
438   // - naming conflicts with C++ keywords are resolved via a trailing '_'
439 
440   // NOTE ON INTERFACE: Currently, the interface is not very consistent
441   // in the sense that some operations (e.g. mov()) can be called in more
442   // the one way to generate the same instruction: The Register argument
443   // can in some cases be replaced with an Operand(Register) argument.
444   // This should be cleaned up and made more orthogonal. The questions
445   // is: should we always use Operands instead of Registers where an
446   // Operand is possible, or should we have a Register (overloaded) form
447   // instead? We must be careful to make sure that the selected instruction
448   // is obvious from the parameters to avoid hard-to-find code generation
449   // bugs.
450 
451   // Insert the smallest number of nop instructions
452   // possible to align the pc offset to a multiple
453   // of m. m must be a power of 2.
454   void Align(int m);
455   // Insert the smallest number of zero bytes possible to align the pc offset
456   // to a mulitple of m. m must be a power of 2 (>= 2).
457   void DataAlign(int m);
458   void Nop(int bytes = 1);
459   // Aligns code to something that's optimal for a jump target for the platform.
460   void CodeTargetAlign();
461 
462   // Stack
463   void pushad();
464   void popad();
465 
466   void pushfd();
467   void popfd();
468 
469   void push(const Immediate& x);
470   void push_imm32(int32_t imm32);
471   void push(Register src);
472   void push(Operand src);
473 
474   void pop(Register dst);
475   void pop(Operand dst);
476 
477   void leave();
478 
479   // Moves
mov_b(Register dst,Register src)480   void mov_b(Register dst, Register src) { mov_b(dst, Operand(src)); }
481   void mov_b(Register dst, Operand src);
mov_b(Register dst,int8_t imm8)482   void mov_b(Register dst, int8_t imm8) { mov_b(Operand(dst), imm8); }
mov_b(Operand dst,int8_t src)483   void mov_b(Operand dst, int8_t src) { mov_b(dst, Immediate(src)); }
484   void mov_b(Operand dst, const Immediate& src);
485   void mov_b(Operand dst, Register src);
486 
487   void mov_w(Register dst, Operand src);
mov_w(Operand dst,int16_t src)488   void mov_w(Operand dst, int16_t src) { mov_w(dst, Immediate(src)); }
489   void mov_w(Operand dst, const Immediate& src);
490   void mov_w(Operand dst, Register src);
491 
492   void mov(Register dst, int32_t imm32);
493   void mov(Register dst, const Immediate& x);
494   void mov(Register dst, Handle<HeapObject> handle);
495   void mov(Register dst, Operand src);
496   void mov(Register dst, Register src);
497   void mov(Operand dst, const Immediate& x);
498   void mov(Operand dst, Handle<HeapObject> handle);
499   void mov(Operand dst, Register src);
500   void mov(Operand dst, Address src, RelocInfo::Mode);
501 
movsx_b(Register dst,Register src)502   void movsx_b(Register dst, Register src) { movsx_b(dst, Operand(src)); }
503   void movsx_b(Register dst, Operand src);
504 
movsx_w(Register dst,Register src)505   void movsx_w(Register dst, Register src) { movsx_w(dst, Operand(src)); }
506   void movsx_w(Register dst, Operand src);
507 
movzx_b(Register dst,Register src)508   void movzx_b(Register dst, Register src) { movzx_b(dst, Operand(src)); }
509   void movzx_b(Register dst, Operand src);
510 
movzx_w(Register dst,Register src)511   void movzx_w(Register dst, Register src) { movzx_w(dst, Operand(src)); }
512   void movzx_w(Register dst, Operand src);
513 
514   void movq(XMMRegister dst, Operand src);
515 
516   // Conditional moves
cmov(Condition cc,Register dst,Register src)517   void cmov(Condition cc, Register dst, Register src) {
518     cmov(cc, dst, Operand(src));
519   }
520   void cmov(Condition cc, Register dst, Operand src);
521 
522   // Flag management.
523   void cld();
524 
525   // Repetitive string instructions.
526   void rep_movs();
527   void rep_stos();
528   void stos();
529 
530   void xadd(Operand dst, Register src);
531   void xadd_b(Operand dst, Register src);
532   void xadd_w(Operand dst, Register src);
533 
534   // Exchange
535   void xchg(Register dst, Register src);
536   void xchg(Register dst, Operand src);
537   void xchg_b(Register reg, Operand op);
538   void xchg_w(Register reg, Operand op);
539 
540   // Lock prefix
541   void lock();
542 
543   // CompareExchange
544   void cmpxchg(Operand dst, Register src);
545   void cmpxchg_b(Operand dst, Register src);
546   void cmpxchg_w(Operand dst, Register src);
547   void cmpxchg8b(Operand dst);
548 
549   // Memory Fence
550   void mfence();
551   void lfence();
552 
553   void pause();
554 
555   // Arithmetics
556   void adc(Register dst, int32_t imm32);
adc(Register dst,Register src)557   void adc(Register dst, Register src) { adc(dst, Operand(src)); }
558   void adc(Register dst, Operand src);
559 
add(Register dst,Register src)560   void add(Register dst, Register src) { add(dst, Operand(src)); }
561   void add(Register dst, Operand src);
562   void add(Operand dst, Register src);
add(Register dst,const Immediate & imm)563   void add(Register dst, const Immediate& imm) { add(Operand(dst), imm); }
564   void add(Operand dst, const Immediate& x);
565 
566   void and_(Register dst, int32_t imm32);
567   void and_(Register dst, const Immediate& x);
and_(Register dst,Register src)568   void and_(Register dst, Register src) { and_(dst, Operand(src)); }
569   void and_(Register dst, Operand src);
570   void and_(Operand dst, Register src);
571   void and_(Operand dst, const Immediate& x);
572 
cmpb(Register reg,Immediate imm8)573   void cmpb(Register reg, Immediate imm8) {
574     DCHECK(reg.is_byte_register());
575     cmpb(Operand(reg), imm8);
576   }
577   void cmpb(Operand op, Immediate imm8);
578   void cmpb(Register reg, Operand op);
579   void cmpb(Operand op, Register reg);
cmpb(Register dst,Register src)580   void cmpb(Register dst, Register src) { cmpb(Operand(dst), src); }
581   void cmpb_al(Operand op);
582   void cmpw_ax(Operand op);
583   void cmpw(Operand dst, Immediate src);
cmpw(Register dst,Immediate src)584   void cmpw(Register dst, Immediate src) { cmpw(Operand(dst), src); }
585   void cmpw(Register dst, Operand src);
cmpw(Register dst,Register src)586   void cmpw(Register dst, Register src) { cmpw(Operand(dst), src); }
587   void cmpw(Operand dst, Register src);
588   void cmp(Register reg, int32_t imm32);
589   void cmp(Register reg, Handle<HeapObject> handle);
cmp(Register reg0,Register reg1)590   void cmp(Register reg0, Register reg1) { cmp(reg0, Operand(reg1)); }
591   void cmp(Register reg, Operand op);
cmp(Register reg,const Immediate & imm)592   void cmp(Register reg, const Immediate& imm) { cmp(Operand(reg), imm); }
593   void cmp(Operand op, Register reg);
594   void cmp(Operand op, const Immediate& imm);
595   void cmp(Operand op, Handle<HeapObject> handle);
596 
597   void dec_b(Register dst);
598   void dec_b(Operand dst);
599 
600   void dec(Register dst);
601   void dec(Operand dst);
602 
603   void cdq();
604 
idiv(Register src)605   void idiv(Register src) { idiv(Operand(src)); }
606   void idiv(Operand src);
div(Register src)607   void div(Register src) { div(Operand(src)); }
608   void div(Operand src);
609 
610   // Signed multiply instructions.
611   void imul(Register src);  // edx:eax = eax * src.
imul(Register dst,Register src)612   void imul(Register dst, Register src) { imul(dst, Operand(src)); }
613   void imul(Register dst, Operand src);                  // dst = dst * src.
614   void imul(Register dst, Register src, int32_t imm32);  // dst = src * imm32.
615   void imul(Register dst, Operand src, int32_t imm32);
616 
617   void inc(Register dst);
618   void inc(Operand dst);
619 
620   void lea(Register dst, Operand src);
621 
622   // Unsigned multiply instruction.
623   void mul(Register src);  // edx:eax = eax * reg.
624 
625   void neg(Register dst);
626   void neg(Operand dst);
627 
628   void not_(Register dst);
629   void not_(Operand dst);
630 
631   void or_(Register dst, int32_t imm32);
or_(Register dst,Register src)632   void or_(Register dst, Register src) { or_(dst, Operand(src)); }
633   void or_(Register dst, Operand src);
634   void or_(Operand dst, Register src);
or_(Register dst,const Immediate & imm)635   void or_(Register dst, const Immediate& imm) { or_(Operand(dst), imm); }
636   void or_(Operand dst, const Immediate& x);
637 
638   void rcl(Register dst, uint8_t imm8);
639   void rcr(Register dst, uint8_t imm8);
640 
rol(Register dst,uint8_t imm8)641   void rol(Register dst, uint8_t imm8) { rol(Operand(dst), imm8); }
642   void rol(Operand dst, uint8_t imm8);
rol_cl(Register dst)643   void rol_cl(Register dst) { rol_cl(Operand(dst)); }
644   void rol_cl(Operand dst);
645 
ror(Register dst,uint8_t imm8)646   void ror(Register dst, uint8_t imm8) { ror(Operand(dst), imm8); }
647   void ror(Operand dst, uint8_t imm8);
ror_cl(Register dst)648   void ror_cl(Register dst) { ror_cl(Operand(dst)); }
649   void ror_cl(Operand dst);
650 
sar(Register dst,uint8_t imm8)651   void sar(Register dst, uint8_t imm8) { sar(Operand(dst), imm8); }
652   void sar(Operand dst, uint8_t imm8);
sar_cl(Register dst)653   void sar_cl(Register dst) { sar_cl(Operand(dst)); }
654   void sar_cl(Operand dst);
655 
sbb(Register dst,Register src)656   void sbb(Register dst, Register src) { sbb(dst, Operand(src)); }
657   void sbb(Register dst, Operand src);
658 
shl(Register dst,uint8_t imm8)659   void shl(Register dst, uint8_t imm8) { shl(Operand(dst), imm8); }
660   void shl(Operand dst, uint8_t imm8);
shl_cl(Register dst)661   void shl_cl(Register dst) { shl_cl(Operand(dst)); }
662   void shl_cl(Operand dst);
663   void shld(Register dst, Register src, uint8_t shift);
664   void shld_cl(Register dst, Register src);
665 
shr(Register dst,uint8_t imm8)666   void shr(Register dst, uint8_t imm8) { shr(Operand(dst), imm8); }
667   void shr(Operand dst, uint8_t imm8);
shr_cl(Register dst)668   void shr_cl(Register dst) { shr_cl(Operand(dst)); }
669   void shr_cl(Operand dst);
670   void shrd(Register dst, Register src, uint8_t shift);
shrd_cl(Register dst,Register src)671   void shrd_cl(Register dst, Register src) { shrd_cl(Operand(dst), src); }
672   void shrd_cl(Operand dst, Register src);
673 
sub(Register dst,const Immediate & imm)674   void sub(Register dst, const Immediate& imm) { sub(Operand(dst), imm); }
675   void sub(Operand dst, const Immediate& x);
sub(Register dst,Register src)676   void sub(Register dst, Register src) { sub(dst, Operand(src)); }
677   void sub(Register dst, Operand src);
678   void sub(Operand dst, Register src);
679   void sub_sp_32(uint32_t imm);
680 
681   void test(Register reg, const Immediate& imm);
test(Register reg0,Register reg1)682   void test(Register reg0, Register reg1) { test(reg0, Operand(reg1)); }
683   void test(Register reg, Operand op);
684   void test(Operand op, const Immediate& imm);
test(Operand op,Register reg)685   void test(Operand op, Register reg) { test(reg, op); }
686   void test_b(Register reg, Operand op);
687   void test_b(Register reg, Immediate imm8);
688   void test_b(Operand op, Immediate imm8);
test_b(Operand op,Register reg)689   void test_b(Operand op, Register reg) { test_b(reg, op); }
test_b(Register dst,Register src)690   void test_b(Register dst, Register src) { test_b(dst, Operand(src)); }
691   void test_w(Register reg, Operand op);
692   void test_w(Register reg, Immediate imm16);
693   void test_w(Operand op, Immediate imm16);
test_w(Operand op,Register reg)694   void test_w(Operand op, Register reg) { test_w(reg, op); }
test_w(Register dst,Register src)695   void test_w(Register dst, Register src) { test_w(dst, Operand(src)); }
696 
697   void xor_(Register dst, int32_t imm32);
xor_(Register dst,Register src)698   void xor_(Register dst, Register src) { xor_(dst, Operand(src)); }
699   void xor_(Register dst, Operand src);
700   void xor_(Operand dst, Register src);
xor_(Register dst,const Immediate & imm)701   void xor_(Register dst, const Immediate& imm) { xor_(Operand(dst), imm); }
702   void xor_(Operand dst, const Immediate& x);
703 
704   // Bit operations.
705   void bswap(Register dst);
706   void bt(Operand dst, Register src);
bts(Register dst,Register src)707   void bts(Register dst, Register src) { bts(Operand(dst), src); }
708   void bts(Operand dst, Register src);
bsr(Register dst,Register src)709   void bsr(Register dst, Register src) { bsr(dst, Operand(src)); }
710   void bsr(Register dst, Operand src);
bsf(Register dst,Register src)711   void bsf(Register dst, Register src) { bsf(dst, Operand(src)); }
712   void bsf(Register dst, Operand src);
713 
714   // Miscellaneous
715   void hlt();
716   void int3();
717   void nop();
718   void ret(int imm16);
719   void ud2();
720 
721   // Label operations & relative jumps (PPUM Appendix D)
722   //
723   // Takes a branch opcode (cc) and a label (L) and generates
724   // either a backward branch or a forward branch and links it
725   // to the label fixup chain. Usage:
726   //
727   // Label L;    // unbound label
728   // j(cc, &L);  // forward branch to unbound label
729   // bind(&L);   // bind label to the current pc
730   // j(cc, &L);  // backward branch to bound label
731   // bind(&L);   // illegal: a label may be bound only once
732   //
733   // Note: The same Label can be used for forward and backward branches
734   // but it may be bound only once.
735 
736   void bind(Label* L);  // binds an unbound label L to the current code position
737 
738   // Calls
739   void call(Label* L);
740   void call(Address entry, RelocInfo::Mode rmode);
call(Register reg)741   void call(Register reg) { call(Operand(reg)); }
742   void call(Operand adr);
743   void call(Handle<Code> code, RelocInfo::Mode rmode);
744   void wasm_call(Address address, RelocInfo::Mode rmode);
745 
746   // Jumps
747   // unconditional jump to L
748   void jmp(Label* L, Label::Distance distance = Label::kFar);
749   void jmp(Address entry, RelocInfo::Mode rmode);
jmp(Register reg)750   void jmp(Register reg) { jmp(Operand(reg)); }
751   void jmp(Operand adr);
752   void jmp(Handle<Code> code, RelocInfo::Mode rmode);
753   // Unconditional jump relative to the current address. Low-level routine,
754   // use with caution!
755   void jmp_rel(int offset);
756 
757   // Conditional jumps
758   void j(Condition cc, Label* L, Label::Distance distance = Label::kFar);
759   void j(Condition cc, byte* entry, RelocInfo::Mode rmode);
760   void j(Condition cc, Handle<Code> code,
761          RelocInfo::Mode rmode = RelocInfo::CODE_TARGET);
762 
763   // Floating-point operations
764   void fld(int i);
765   void fstp(int i);
766 
767   void fld1();
768   void fldz();
769   void fldpi();
770   void fldln2();
771 
772   void fld_s(Operand adr);
773   void fld_d(Operand adr);
774 
775   void fstp_s(Operand adr);
776   void fst_s(Operand adr);
777   void fstp_d(Operand adr);
778   void fst_d(Operand adr);
779 
780   void fild_s(Operand adr);
781   void fild_d(Operand adr);
782 
783   void fist_s(Operand adr);
784 
785   void fistp_s(Operand adr);
786   void fistp_d(Operand adr);
787 
788   // The fisttp instructions require SSE3.
789   void fisttp_s(Operand adr);
790   void fisttp_d(Operand adr);
791 
792   void fabs();
793   void fchs();
794   void fcos();
795   void fsin();
796   void fptan();
797   void fyl2x();
798   void f2xm1();
799   void fscale();
800   void fninit();
801 
802   void fadd(int i);
803   void fadd_i(int i);
804   void fsub(int i);
805   void fsub_i(int i);
806   void fmul(int i);
807   void fmul_i(int i);
808   void fdiv(int i);
809   void fdiv_i(int i);
810 
811   void fisub_s(Operand adr);
812 
813   void faddp(int i = 1);
814   void fsubp(int i = 1);
815   void fsubrp(int i = 1);
816   void fmulp(int i = 1);
817   void fdivp(int i = 1);
818   void fprem();
819   void fprem1();
820 
821   void fxch(int i = 1);
822   void fincstp();
823   void ffree(int i = 0);
824 
825   void ftst();
826   void fucomp(int i);
827   void fucompp();
828   void fucomi(int i);
829   void fucomip();
830   void fcompp();
831   void fnstsw_ax();
832   void fwait();
833   void fnclex();
834 
835   void frndint();
836 
837   void sahf();
838   void setcc(Condition cc, Register reg);
839 
840   void cpuid();
841 
842   // SSE instructions
addss(XMMRegister dst,XMMRegister src)843   void addss(XMMRegister dst, XMMRegister src) { addss(dst, Operand(src)); }
844   void addss(XMMRegister dst, Operand src);
subss(XMMRegister dst,XMMRegister src)845   void subss(XMMRegister dst, XMMRegister src) { subss(dst, Operand(src)); }
846   void subss(XMMRegister dst, Operand src);
mulss(XMMRegister dst,XMMRegister src)847   void mulss(XMMRegister dst, XMMRegister src) { mulss(dst, Operand(src)); }
848   void mulss(XMMRegister dst, Operand src);
divss(XMMRegister dst,XMMRegister src)849   void divss(XMMRegister dst, XMMRegister src) { divss(dst, Operand(src)); }
850   void divss(XMMRegister dst, Operand src);
sqrtss(XMMRegister dst,XMMRegister src)851   void sqrtss(XMMRegister dst, XMMRegister src) { sqrtss(dst, Operand(src)); }
852   void sqrtss(XMMRegister dst, Operand src);
853 
ucomiss(XMMRegister dst,XMMRegister src)854   void ucomiss(XMMRegister dst, XMMRegister src) { ucomiss(dst, Operand(src)); }
855   void ucomiss(XMMRegister dst, Operand src);
movaps(XMMRegister dst,XMMRegister src)856   void movaps(XMMRegister dst, XMMRegister src) { movaps(dst, Operand(src)); }
857   void movaps(XMMRegister dst, Operand src);
movups(XMMRegister dst,XMMRegister src)858   void movups(XMMRegister dst, XMMRegister src) { movups(dst, Operand(src)); }
859   void movups(XMMRegister dst, Operand src);
860   void movups(Operand dst, XMMRegister src);
861   void shufps(XMMRegister dst, XMMRegister src, byte imm8);
862   void shufpd(XMMRegister dst, XMMRegister src, byte imm8);
863 
maxss(XMMRegister dst,XMMRegister src)864   void maxss(XMMRegister dst, XMMRegister src) { maxss(dst, Operand(src)); }
865   void maxss(XMMRegister dst, Operand src);
minss(XMMRegister dst,XMMRegister src)866   void minss(XMMRegister dst, XMMRegister src) { minss(dst, Operand(src)); }
867   void minss(XMMRegister dst, Operand src);
868 
869   void rcpps(XMMRegister dst, Operand src);
rcpps(XMMRegister dst,XMMRegister src)870   void rcpps(XMMRegister dst, XMMRegister src) { rcpps(dst, Operand(src)); }
871   void sqrtps(XMMRegister dst, Operand src);
sqrtps(XMMRegister dst,XMMRegister src)872   void sqrtps(XMMRegister dst, XMMRegister src) { sqrtps(dst, Operand(src)); }
873   void rsqrtps(XMMRegister dst, Operand src);
rsqrtps(XMMRegister dst,XMMRegister src)874   void rsqrtps(XMMRegister dst, XMMRegister src) { rsqrtps(dst, Operand(src)); }
875   void haddps(XMMRegister dst, Operand src);
haddps(XMMRegister dst,XMMRegister src)876   void haddps(XMMRegister dst, XMMRegister src) { haddps(dst, Operand(src)); }
sqrtpd(XMMRegister dst,Operand src)877   void sqrtpd(XMMRegister dst, Operand src) {
878     sse2_instr(dst, src, 0x66, 0x0F, 0x51);
879   }
sqrtpd(XMMRegister dst,XMMRegister src)880   void sqrtpd(XMMRegister dst, XMMRegister src) { sqrtpd(dst, Operand(src)); }
881 
882   void cmpps(XMMRegister dst, Operand src, uint8_t cmp);
cmpps(XMMRegister dst,XMMRegister src,uint8_t cmp)883   void cmpps(XMMRegister dst, XMMRegister src, uint8_t cmp) {
884     cmpps(dst, Operand(src), cmp);
885   }
886   void cmppd(XMMRegister dst, Operand src, uint8_t cmp);
cmppd(XMMRegister dst,XMMRegister src,uint8_t cmp)887   void cmppd(XMMRegister dst, XMMRegister src, uint8_t cmp) {
888     cmppd(dst, Operand(src), cmp);
889   }
890 
891 // Packed floating-point comparison operations.
892 #define PACKED_CMP_LIST(V) \
893   V(cmpeq, 0x0)            \
894   V(cmplt, 0x1)            \
895   V(cmple, 0x2)            \
896   V(cmpunord, 0x3)         \
897   V(cmpneq, 0x4)
898 
899 #define SSE_CMP_P(instr, imm8)                                            \
900   void instr##ps(XMMRegister dst, XMMRegister src) {                      \
901     cmpps(dst, Operand(src), imm8);                                       \
902   }                                                                       \
903   void instr##ps(XMMRegister dst, Operand src) { cmpps(dst, src, imm8); } \
904   void instr##pd(XMMRegister dst, XMMRegister src) {                      \
905     cmppd(dst, Operand(src), imm8);                                       \
906   }                                                                       \
907   void instr##pd(XMMRegister dst, Operand src) { cmppd(dst, src, imm8); }
908 
909   PACKED_CMP_LIST(SSE_CMP_P)
910 #undef SSE_CMP_P
911 
912   // SSE2 instructions
913   void cvttss2si(Register dst, Operand src);
cvttss2si(Register dst,XMMRegister src)914   void cvttss2si(Register dst, XMMRegister src) {
915     cvttss2si(dst, Operand(src));
916   }
917   void cvttsd2si(Register dst, Operand src);
cvttsd2si(Register dst,XMMRegister src)918   void cvttsd2si(Register dst, XMMRegister src) {
919     cvttsd2si(dst, Operand(src));
920   }
921   void cvtsd2si(Register dst, XMMRegister src);
922 
cvtsi2ss(XMMRegister dst,Register src)923   void cvtsi2ss(XMMRegister dst, Register src) { cvtsi2ss(dst, Operand(src)); }
924   void cvtsi2ss(XMMRegister dst, Operand src);
cvtsi2sd(XMMRegister dst,Register src)925   void cvtsi2sd(XMMRegister dst, Register src) { cvtsi2sd(dst, Operand(src)); }
926   void cvtsi2sd(XMMRegister dst, Operand src);
927   void cvtss2sd(XMMRegister dst, Operand src);
cvtss2sd(XMMRegister dst,XMMRegister src)928   void cvtss2sd(XMMRegister dst, XMMRegister src) {
929     cvtss2sd(dst, Operand(src));
930   }
931   void cvtsd2ss(XMMRegister dst, Operand src);
cvtsd2ss(XMMRegister dst,XMMRegister src)932   void cvtsd2ss(XMMRegister dst, XMMRegister src) {
933     cvtsd2ss(dst, Operand(src));
934   }
cvtdq2ps(XMMRegister dst,XMMRegister src)935   void cvtdq2ps(XMMRegister dst, XMMRegister src) {
936     cvtdq2ps(dst, Operand(src));
937   }
938   void cvtdq2ps(XMMRegister dst, Operand src);
cvttps2dq(XMMRegister dst,XMMRegister src)939   void cvttps2dq(XMMRegister dst, XMMRegister src) {
940     cvttps2dq(dst, Operand(src));
941   }
942   void cvttps2dq(XMMRegister dst, Operand src);
943 
addsd(XMMRegister dst,XMMRegister src)944   void addsd(XMMRegister dst, XMMRegister src) { addsd(dst, Operand(src)); }
945   void addsd(XMMRegister dst, Operand src);
subsd(XMMRegister dst,XMMRegister src)946   void subsd(XMMRegister dst, XMMRegister src) { subsd(dst, Operand(src)); }
947   void subsd(XMMRegister dst, Operand src);
mulsd(XMMRegister dst,XMMRegister src)948   void mulsd(XMMRegister dst, XMMRegister src) { mulsd(dst, Operand(src)); }
949   void mulsd(XMMRegister dst, Operand src);
divsd(XMMRegister dst,XMMRegister src)950   void divsd(XMMRegister dst, XMMRegister src) { divsd(dst, Operand(src)); }
951   void divsd(XMMRegister dst, Operand src);
sqrtsd(XMMRegister dst,XMMRegister src)952   void sqrtsd(XMMRegister dst, XMMRegister src) { sqrtsd(dst, Operand(src)); }
953   void sqrtsd(XMMRegister dst, Operand src);
954 
ucomisd(XMMRegister dst,XMMRegister src)955   void ucomisd(XMMRegister dst, XMMRegister src) { ucomisd(dst, Operand(src)); }
956   void ucomisd(XMMRegister dst, Operand src);
957 
958   void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
959   void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
960 
movapd(XMMRegister dst,XMMRegister src)961   void movapd(XMMRegister dst, XMMRegister src) { movapd(dst, Operand(src)); }
movapd(XMMRegister dst,Operand src)962   void movapd(XMMRegister dst, Operand src) {
963     sse2_instr(dst, src, 0x66, 0x0F, 0x28);
964   }
movupd(XMMRegister dst,Operand src)965   void movupd(XMMRegister dst, Operand src) {
966     sse2_instr(dst, src, 0x66, 0x0F, 0x10);
967   }
968 
969   void movmskpd(Register dst, XMMRegister src);
970   void movmskps(Register dst, XMMRegister src);
971 
972   void pmovmskb(Register dst, XMMRegister src);
973 
974   void cmpltsd(XMMRegister dst, XMMRegister src);
975 
maxsd(XMMRegister dst,XMMRegister src)976   void maxsd(XMMRegister dst, XMMRegister src) { maxsd(dst, Operand(src)); }
977   void maxsd(XMMRegister dst, Operand src);
minsd(XMMRegister dst,XMMRegister src)978   void minsd(XMMRegister dst, XMMRegister src) { minsd(dst, Operand(src)); }
979   void minsd(XMMRegister dst, Operand src);
980 
981   void movdqa(XMMRegister dst, Operand src);
982   void movdqa(Operand dst, XMMRegister src);
983   void movdqu(XMMRegister dst, Operand src);
984   void movdqu(Operand dst, XMMRegister src);
movdq(bool aligned,XMMRegister dst,Operand src)985   void movdq(bool aligned, XMMRegister dst, Operand src) {
986     if (aligned) {
987       movdqa(dst, src);
988     } else {
989       movdqu(dst, src);
990     }
991   }
992 
movd(XMMRegister dst,Register src)993   void movd(XMMRegister dst, Register src) { movd(dst, Operand(src)); }
994   void movd(XMMRegister dst, Operand src);
movd(Register dst,XMMRegister src)995   void movd(Register dst, XMMRegister src) { movd(Operand(dst), src); }
996   void movd(Operand dst, XMMRegister src);
movsd(XMMRegister dst,XMMRegister src)997   void movsd(XMMRegister dst, XMMRegister src) { movsd(dst, Operand(src)); }
998   void movsd(XMMRegister dst, Operand src);
999   void movsd(Operand dst, XMMRegister src);
1000 
1001   void movss(XMMRegister dst, Operand src);
1002   void movss(Operand dst, XMMRegister src);
movss(XMMRegister dst,XMMRegister src)1003   void movss(XMMRegister dst, XMMRegister src) { movss(dst, Operand(src)); }
1004   void extractps(Register dst, XMMRegister src, byte imm8);
1005 
1006   void psllw(XMMRegister reg, uint8_t shift);
1007   void pslld(XMMRegister reg, uint8_t shift);
1008   void psrlw(XMMRegister reg, uint8_t shift);
1009   void psrld(XMMRegister reg, uint8_t shift);
1010   void psraw(XMMRegister reg, uint8_t shift);
1011   void psrad(XMMRegister reg, uint8_t shift);
1012   void psllq(XMMRegister reg, uint8_t shift);
1013   void psrlq(XMMRegister reg, uint8_t shift);
1014 
pshufhw(XMMRegister dst,XMMRegister src,uint8_t shuffle)1015   void pshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1016     pshufhw(dst, Operand(src), shuffle);
1017   }
1018   void pshufhw(XMMRegister dst, Operand src, uint8_t shuffle);
pshuflw(XMMRegister dst,XMMRegister src,uint8_t shuffle)1019   void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1020     pshuflw(dst, Operand(src), shuffle);
1021   }
1022   void pshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
pshufd(XMMRegister dst,XMMRegister src,uint8_t shuffle)1023   void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1024     pshufd(dst, Operand(src), shuffle);
1025   }
1026   void pshufd(XMMRegister dst, Operand src, uint8_t shuffle);
1027 
pblendw(XMMRegister dst,XMMRegister src,uint8_t mask)1028   void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask) {
1029     pblendw(dst, Operand(src), mask);
1030   }
1031   void pblendw(XMMRegister dst, Operand src, uint8_t mask);
1032 
palignr(XMMRegister dst,XMMRegister src,uint8_t mask)1033   void palignr(XMMRegister dst, XMMRegister src, uint8_t mask) {
1034     palignr(dst, Operand(src), mask);
1035   }
1036   void palignr(XMMRegister dst, Operand src, uint8_t mask);
1037 
pextrb(Register dst,XMMRegister src,uint8_t offset)1038   void pextrb(Register dst, XMMRegister src, uint8_t offset) {
1039     pextrb(Operand(dst), src, offset);
1040   }
1041   void pextrb(Operand dst, XMMRegister src, uint8_t offset);
1042   // SSE3 instructions
1043   void movddup(XMMRegister dst, Operand src);
movddup(XMMRegister dst,XMMRegister src)1044   void movddup(XMMRegister dst, XMMRegister src) { movddup(dst, Operand(src)); }
1045 
1046   // Use SSE4_1 encoding for pextrw reg, xmm, imm8 for consistency
pextrw(Register dst,XMMRegister src,uint8_t offset)1047   void pextrw(Register dst, XMMRegister src, uint8_t offset) {
1048     pextrw(Operand(dst), src, offset);
1049   }
1050   void pextrw(Operand dst, XMMRegister src, uint8_t offset);
pextrd(Register dst,XMMRegister src,uint8_t offset)1051   void pextrd(Register dst, XMMRegister src, uint8_t offset) {
1052     pextrd(Operand(dst), src, offset);
1053   }
1054   void pextrd(Operand dst, XMMRegister src, uint8_t offset);
1055 
insertps(XMMRegister dst,XMMRegister src,uint8_t offset)1056   void insertps(XMMRegister dst, XMMRegister src, uint8_t offset) {
1057     insertps(dst, Operand(src), offset);
1058   }
1059   void insertps(XMMRegister dst, Operand src, uint8_t offset);
pinsrb(XMMRegister dst,Register src,uint8_t offset)1060   void pinsrb(XMMRegister dst, Register src, uint8_t offset) {
1061     pinsrb(dst, Operand(src), offset);
1062   }
1063   void pinsrb(XMMRegister dst, Operand src, uint8_t offset);
pinsrw(XMMRegister dst,Register src,uint8_t offset)1064   void pinsrw(XMMRegister dst, Register src, uint8_t offset) {
1065     pinsrw(dst, Operand(src), offset);
1066   }
1067   void pinsrw(XMMRegister dst, Operand src, uint8_t offset);
pinsrd(XMMRegister dst,Register src,uint8_t offset)1068   void pinsrd(XMMRegister dst, Register src, uint8_t offset) {
1069     pinsrd(dst, Operand(src), offset);
1070   }
1071   void pinsrd(XMMRegister dst, Operand src, uint8_t offset);
1072 
1073   void roundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
1074   void roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1075 
1076   // AVX instructions
vfmadd132sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1077   void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1078     vfmadd132sd(dst, src1, Operand(src2));
1079   }
vfmadd213sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1080   void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1081     vfmadd213sd(dst, src1, Operand(src2));
1082   }
vfmadd231sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1083   void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1084     vfmadd231sd(dst, src1, Operand(src2));
1085   }
vfmadd132sd(XMMRegister dst,XMMRegister src1,Operand src2)1086   void vfmadd132sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1087     vfmasd(0x99, dst, src1, src2);
1088   }
vfmadd213sd(XMMRegister dst,XMMRegister src1,Operand src2)1089   void vfmadd213sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1090     vfmasd(0xa9, dst, src1, src2);
1091   }
vfmadd231sd(XMMRegister dst,XMMRegister src1,Operand src2)1092   void vfmadd231sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1093     vfmasd(0xb9, dst, src1, src2);
1094   }
vfmsub132sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1095   void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1096     vfmsub132sd(dst, src1, Operand(src2));
1097   }
vfmsub213sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1098   void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1099     vfmsub213sd(dst, src1, Operand(src2));
1100   }
vfmsub231sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1101   void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1102     vfmsub231sd(dst, src1, Operand(src2));
1103   }
vfmsub132sd(XMMRegister dst,XMMRegister src1,Operand src2)1104   void vfmsub132sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1105     vfmasd(0x9b, dst, src1, src2);
1106   }
vfmsub213sd(XMMRegister dst,XMMRegister src1,Operand src2)1107   void vfmsub213sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1108     vfmasd(0xab, dst, src1, src2);
1109   }
vfmsub231sd(XMMRegister dst,XMMRegister src1,Operand src2)1110   void vfmsub231sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1111     vfmasd(0xbb, dst, src1, src2);
1112   }
vfnmadd132sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1113   void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1114     vfnmadd132sd(dst, src1, Operand(src2));
1115   }
vfnmadd213sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1116   void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1117     vfnmadd213sd(dst, src1, Operand(src2));
1118   }
vfnmadd231sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1119   void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1120     vfnmadd231sd(dst, src1, Operand(src2));
1121   }
vfnmadd132sd(XMMRegister dst,XMMRegister src1,Operand src2)1122   void vfnmadd132sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1123     vfmasd(0x9d, dst, src1, src2);
1124   }
vfnmadd213sd(XMMRegister dst,XMMRegister src1,Operand src2)1125   void vfnmadd213sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1126     vfmasd(0xad, dst, src1, src2);
1127   }
vfnmadd231sd(XMMRegister dst,XMMRegister src1,Operand src2)1128   void vfnmadd231sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1129     vfmasd(0xbd, dst, src1, src2);
1130   }
vfnmsub132sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1131   void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1132     vfnmsub132sd(dst, src1, Operand(src2));
1133   }
vfnmsub213sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1134   void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1135     vfnmsub213sd(dst, src1, Operand(src2));
1136   }
vfnmsub231sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1137   void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1138     vfnmsub231sd(dst, src1, Operand(src2));
1139   }
vfnmsub132sd(XMMRegister dst,XMMRegister src1,Operand src2)1140   void vfnmsub132sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1141     vfmasd(0x9f, dst, src1, src2);
1142   }
vfnmsub213sd(XMMRegister dst,XMMRegister src1,Operand src2)1143   void vfnmsub213sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1144     vfmasd(0xaf, dst, src1, src2);
1145   }
vfnmsub231sd(XMMRegister dst,XMMRegister src1,Operand src2)1146   void vfnmsub231sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1147     vfmasd(0xbf, dst, src1, src2);
1148   }
1149   void vfmasd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1150 
vfmadd132ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1151   void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1152     vfmadd132ss(dst, src1, Operand(src2));
1153   }
vfmadd213ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1154   void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1155     vfmadd213ss(dst, src1, Operand(src2));
1156   }
vfmadd231ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1157   void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1158     vfmadd231ss(dst, src1, Operand(src2));
1159   }
vfmadd132ss(XMMRegister dst,XMMRegister src1,Operand src2)1160   void vfmadd132ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1161     vfmass(0x99, dst, src1, src2);
1162   }
vfmadd213ss(XMMRegister dst,XMMRegister src1,Operand src2)1163   void vfmadd213ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1164     vfmass(0xa9, dst, src1, src2);
1165   }
vfmadd231ss(XMMRegister dst,XMMRegister src1,Operand src2)1166   void vfmadd231ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1167     vfmass(0xb9, dst, src1, src2);
1168   }
vfmsub132ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1169   void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1170     vfmsub132ss(dst, src1, Operand(src2));
1171   }
vfmsub213ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1172   void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1173     vfmsub213ss(dst, src1, Operand(src2));
1174   }
vfmsub231ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1175   void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1176     vfmsub231ss(dst, src1, Operand(src2));
1177   }
vfmsub132ss(XMMRegister dst,XMMRegister src1,Operand src2)1178   void vfmsub132ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1179     vfmass(0x9b, dst, src1, src2);
1180   }
vfmsub213ss(XMMRegister dst,XMMRegister src1,Operand src2)1181   void vfmsub213ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1182     vfmass(0xab, dst, src1, src2);
1183   }
vfmsub231ss(XMMRegister dst,XMMRegister src1,Operand src2)1184   void vfmsub231ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1185     vfmass(0xbb, dst, src1, src2);
1186   }
vfnmadd132ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1187   void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1188     vfnmadd132ss(dst, src1, Operand(src2));
1189   }
vfnmadd213ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1190   void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1191     vfnmadd213ss(dst, src1, Operand(src2));
1192   }
vfnmadd231ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1193   void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1194     vfnmadd231ss(dst, src1, Operand(src2));
1195   }
vfnmadd132ss(XMMRegister dst,XMMRegister src1,Operand src2)1196   void vfnmadd132ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1197     vfmass(0x9d, dst, src1, src2);
1198   }
vfnmadd213ss(XMMRegister dst,XMMRegister src1,Operand src2)1199   void vfnmadd213ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1200     vfmass(0xad, dst, src1, src2);
1201   }
vfnmadd231ss(XMMRegister dst,XMMRegister src1,Operand src2)1202   void vfnmadd231ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1203     vfmass(0xbd, dst, src1, src2);
1204   }
vfnmsub132ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1205   void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1206     vfnmsub132ss(dst, src1, Operand(src2));
1207   }
vfnmsub213ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1208   void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1209     vfnmsub213ss(dst, src1, Operand(src2));
1210   }
vfnmsub231ss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1211   void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1212     vfnmsub231ss(dst, src1, Operand(src2));
1213   }
vfnmsub132ss(XMMRegister dst,XMMRegister src1,Operand src2)1214   void vfnmsub132ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1215     vfmass(0x9f, dst, src1, src2);
1216   }
vfnmsub213ss(XMMRegister dst,XMMRegister src1,Operand src2)1217   void vfnmsub213ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1218     vfmass(0xaf, dst, src1, src2);
1219   }
vfnmsub231ss(XMMRegister dst,XMMRegister src1,Operand src2)1220   void vfnmsub231ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1221     vfmass(0xbf, dst, src1, src2);
1222   }
1223   void vfmass(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1224 
vaddsd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1225   void vaddsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1226     vaddsd(dst, src1, Operand(src2));
1227   }
vaddsd(XMMRegister dst,XMMRegister src1,Operand src2)1228   void vaddsd(XMMRegister dst, XMMRegister src1, Operand src2) {
1229     vsd(0x58, dst, src1, src2);
1230   }
vsubsd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1231   void vsubsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1232     vsubsd(dst, src1, Operand(src2));
1233   }
vsubsd(XMMRegister dst,XMMRegister src1,Operand src2)1234   void vsubsd(XMMRegister dst, XMMRegister src1, Operand src2) {
1235     vsd(0x5c, dst, src1, src2);
1236   }
vmulsd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1237   void vmulsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1238     vmulsd(dst, src1, Operand(src2));
1239   }
vmulsd(XMMRegister dst,XMMRegister src1,Operand src2)1240   void vmulsd(XMMRegister dst, XMMRegister src1, Operand src2) {
1241     vsd(0x59, dst, src1, src2);
1242   }
vdivsd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1243   void vdivsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1244     vdivsd(dst, src1, Operand(src2));
1245   }
vdivsd(XMMRegister dst,XMMRegister src1,Operand src2)1246   void vdivsd(XMMRegister dst, XMMRegister src1, Operand src2) {
1247     vsd(0x5e, dst, src1, src2);
1248   }
vmaxsd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1249   void vmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1250     vmaxsd(dst, src1, Operand(src2));
1251   }
vmaxsd(XMMRegister dst,XMMRegister src1,Operand src2)1252   void vmaxsd(XMMRegister dst, XMMRegister src1, Operand src2) {
1253     vsd(0x5f, dst, src1, src2);
1254   }
vminsd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1255   void vminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1256     vminsd(dst, src1, Operand(src2));
1257   }
vminsd(XMMRegister dst,XMMRegister src1,Operand src2)1258   void vminsd(XMMRegister dst, XMMRegister src1, Operand src2) {
1259     vsd(0x5d, dst, src1, src2);
1260   }
vsqrtsd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1261   void vsqrtsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1262     vsqrtsd(dst, src1, Operand(src2));
1263   }
vsqrtsd(XMMRegister dst,XMMRegister src1,Operand src2)1264   void vsqrtsd(XMMRegister dst, XMMRegister src1, Operand src2) {
1265     vsd(0x51, dst, src1, src2);
1266   }
1267   void vsd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1268 
vaddss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1269   void vaddss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1270     vaddss(dst, src1, Operand(src2));
1271   }
vaddss(XMMRegister dst,XMMRegister src1,Operand src2)1272   void vaddss(XMMRegister dst, XMMRegister src1, Operand src2) {
1273     vss(0x58, dst, src1, src2);
1274   }
vsubss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1275   void vsubss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1276     vsubss(dst, src1, Operand(src2));
1277   }
vsubss(XMMRegister dst,XMMRegister src1,Operand src2)1278   void vsubss(XMMRegister dst, XMMRegister src1, Operand src2) {
1279     vss(0x5c, dst, src1, src2);
1280   }
vmulss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1281   void vmulss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1282     vmulss(dst, src1, Operand(src2));
1283   }
vmulss(XMMRegister dst,XMMRegister src1,Operand src2)1284   void vmulss(XMMRegister dst, XMMRegister src1, Operand src2) {
1285     vss(0x59, dst, src1, src2);
1286   }
vdivss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1287   void vdivss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1288     vdivss(dst, src1, Operand(src2));
1289   }
vdivss(XMMRegister dst,XMMRegister src1,Operand src2)1290   void vdivss(XMMRegister dst, XMMRegister src1, Operand src2) {
1291     vss(0x5e, dst, src1, src2);
1292   }
vmaxss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1293   void vmaxss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1294     vmaxss(dst, src1, Operand(src2));
1295   }
vmaxss(XMMRegister dst,XMMRegister src1,Operand src2)1296   void vmaxss(XMMRegister dst, XMMRegister src1, Operand src2) {
1297     vss(0x5f, dst, src1, src2);
1298   }
vminss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1299   void vminss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1300     vminss(dst, src1, Operand(src2));
1301   }
vminss(XMMRegister dst,XMMRegister src1,Operand src2)1302   void vminss(XMMRegister dst, XMMRegister src1, Operand src2) {
1303     vss(0x5d, dst, src1, src2);
1304   }
vsqrtss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1305   void vsqrtss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1306     vsqrtss(dst, src1, Operand(src2));
1307   }
vsqrtss(XMMRegister dst,XMMRegister src1,Operand src2)1308   void vsqrtss(XMMRegister dst, XMMRegister src1, Operand src2) {
1309     vss(0x51, dst, src1, src2);
1310   }
1311   void vss(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1312 
vrcpps(XMMRegister dst,XMMRegister src)1313   void vrcpps(XMMRegister dst, XMMRegister src) { vrcpps(dst, Operand(src)); }
vrcpps(XMMRegister dst,Operand src)1314   void vrcpps(XMMRegister dst, Operand src) {
1315     vinstr(0x53, dst, xmm0, src, kNone, k0F, kWIG);
1316   }
vsqrtps(XMMRegister dst,XMMRegister src)1317   void vsqrtps(XMMRegister dst, XMMRegister src) { vsqrtps(dst, Operand(src)); }
vsqrtps(XMMRegister dst,Operand src)1318   void vsqrtps(XMMRegister dst, Operand src) {
1319     vinstr(0x51, dst, xmm0, src, kNone, k0F, kWIG);
1320   }
vrsqrtps(XMMRegister dst,XMMRegister src)1321   void vrsqrtps(XMMRegister dst, XMMRegister src) {
1322     vrsqrtps(dst, Operand(src));
1323   }
vrsqrtps(XMMRegister dst,Operand src)1324   void vrsqrtps(XMMRegister dst, Operand src) {
1325     vinstr(0x52, dst, xmm0, src, kNone, k0F, kWIG);
1326   }
vhaddps(XMMRegister dst,XMMRegister src1,XMMRegister src2)1327   void vhaddps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1328     vhaddps(dst, src1, Operand(src2));
1329   }
vhaddps(XMMRegister dst,XMMRegister src1,Operand src2)1330   void vhaddps(XMMRegister dst, XMMRegister src1, Operand src2) {
1331     vinstr(0x7C, dst, src1, src2, kF2, k0F, kWIG);
1332   }
vsqrtpd(XMMRegister dst,XMMRegister src)1333   void vsqrtpd(XMMRegister dst, XMMRegister src) { vsqrtpd(dst, Operand(src)); }
vsqrtpd(XMMRegister dst,Operand src)1334   void vsqrtpd(XMMRegister dst, Operand src) {
1335     vinstr(0x51, dst, xmm0, src, k66, k0F, kWIG);
1336   }
vmovaps(XMMRegister dst,XMMRegister src)1337   void vmovaps(XMMRegister dst, XMMRegister src) { vmovaps(dst, Operand(src)); }
vmovaps(XMMRegister dst,Operand src)1338   void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); }
vmovapd(XMMRegister dst,XMMRegister src)1339   void vmovapd(XMMRegister dst, XMMRegister src) { vmovapd(dst, Operand(src)); }
vmovapd(XMMRegister dst,Operand src)1340   void vmovapd(XMMRegister dst, Operand src) { vpd(0x28, dst, xmm0, src); }
vmovups(XMMRegister dst,XMMRegister src)1341   void vmovups(XMMRegister dst, XMMRegister src) { vmovups(dst, Operand(src)); }
vmovups(XMMRegister dst,Operand src)1342   void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); }
vmovupd(XMMRegister dst,Operand src)1343   void vmovupd(XMMRegister dst, Operand src) { vpd(0x10, dst, xmm0, src); }
vshufps(XMMRegister dst,XMMRegister src1,XMMRegister src2,byte imm8)1344   void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
1345     vshufps(dst, src1, Operand(src2), imm8);
1346   }
1347   void vshufps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
vshufpd(XMMRegister dst,XMMRegister src1,XMMRegister src2,byte imm8)1348   void vshufpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
1349     vshufpd(dst, src1, Operand(src2), imm8);
1350   }
1351   void vshufpd(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
1352 
1353   void vpsllw(XMMRegister dst, XMMRegister src, uint8_t imm8);
1354   void vpslld(XMMRegister dst, XMMRegister src, uint8_t imm8);
1355   void vpsllq(XMMRegister dst, XMMRegister src, uint8_t imm8);
1356   void vpsrlw(XMMRegister dst, XMMRegister src, uint8_t imm8);
1357   void vpsrld(XMMRegister dst, XMMRegister src, uint8_t imm8);
1358   void vpsraw(XMMRegister dst, XMMRegister src, uint8_t imm8);
1359   void vpsrad(XMMRegister dst, XMMRegister src, uint8_t imm8);
1360   void vpsrlq(XMMRegister dst, XMMRegister src, uint8_t imm8);
1361 
vpshufhw(XMMRegister dst,XMMRegister src,uint8_t shuffle)1362   void vpshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1363     vpshufhw(dst, Operand(src), shuffle);
1364   }
1365   void vpshufhw(XMMRegister dst, Operand src, uint8_t shuffle);
vpshuflw(XMMRegister dst,XMMRegister src,uint8_t shuffle)1366   void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1367     vpshuflw(dst, Operand(src), shuffle);
1368   }
1369   void vpshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
vpshufd(XMMRegister dst,XMMRegister src,uint8_t shuffle)1370   void vpshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1371     vpshufd(dst, Operand(src), shuffle);
1372   }
1373   void vpshufd(XMMRegister dst, Operand src, uint8_t shuffle);
1374 
vpblendw(XMMRegister dst,XMMRegister src1,XMMRegister src2,uint8_t mask)1375   void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1376                 uint8_t mask) {
1377     vpblendw(dst, src1, Operand(src2), mask);
1378   }
1379   void vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask);
1380 
vpalignr(XMMRegister dst,XMMRegister src1,XMMRegister src2,uint8_t mask)1381   void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1382                 uint8_t mask) {
1383     vpalignr(dst, src1, Operand(src2), mask);
1384   }
1385   void vpalignr(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask);
1386 
vpextrb(Register dst,XMMRegister src,uint8_t offset)1387   void vpextrb(Register dst, XMMRegister src, uint8_t offset) {
1388     vpextrb(Operand(dst), src, offset);
1389   }
1390   void vpextrb(Operand dst, XMMRegister src, uint8_t offset);
vpextrw(Register dst,XMMRegister src,uint8_t offset)1391   void vpextrw(Register dst, XMMRegister src, uint8_t offset) {
1392     vpextrw(Operand(dst), src, offset);
1393   }
1394   void vpextrw(Operand dst, XMMRegister src, uint8_t offset);
vpextrd(Register dst,XMMRegister src,uint8_t offset)1395   void vpextrd(Register dst, XMMRegister src, uint8_t offset) {
1396     vpextrd(Operand(dst), src, offset);
1397   }
1398   void vpextrd(Operand dst, XMMRegister src, uint8_t offset);
1399 
vinsertps(XMMRegister dst,XMMRegister src1,XMMRegister src2,uint8_t offset)1400   void vinsertps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1401                  uint8_t offset) {
1402     vinsertps(dst, src1, Operand(src2), offset);
1403   }
1404   void vinsertps(XMMRegister dst, XMMRegister src1, Operand src2,
1405                  uint8_t offset);
vpinsrb(XMMRegister dst,XMMRegister src1,Register src2,uint8_t offset)1406   void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2,
1407                uint8_t offset) {
1408     vpinsrb(dst, src1, Operand(src2), offset);
1409   }
1410   void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
vpinsrw(XMMRegister dst,XMMRegister src1,Register src2,uint8_t offset)1411   void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2,
1412                uint8_t offset) {
1413     vpinsrw(dst, src1, Operand(src2), offset);
1414   }
1415   void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
vpinsrd(XMMRegister dst,XMMRegister src1,Register src2,uint8_t offset)1416   void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2,
1417                uint8_t offset) {
1418     vpinsrd(dst, src1, Operand(src2), offset);
1419   }
1420   void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
1421 
1422   void vroundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
1423   void vroundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1424 
vcvtdq2ps(XMMRegister dst,XMMRegister src)1425   void vcvtdq2ps(XMMRegister dst, XMMRegister src) {
1426     vcvtdq2ps(dst, Operand(src));
1427   }
vcvtdq2ps(XMMRegister dst,Operand src)1428   void vcvtdq2ps(XMMRegister dst, Operand src) {
1429     vinstr(0x5B, dst, xmm0, src, kNone, k0F, kWIG);
1430   }
vcvttps2dq(XMMRegister dst,XMMRegister src)1431   void vcvttps2dq(XMMRegister dst, XMMRegister src) {
1432     vcvttps2dq(dst, Operand(src));
1433   }
vcvttps2dq(XMMRegister dst,Operand src)1434   void vcvttps2dq(XMMRegister dst, Operand src) {
1435     vinstr(0x5B, dst, xmm0, src, kF3, k0F, kWIG);
1436   }
1437 
vmovddup(XMMRegister dst,Operand src)1438   void vmovddup(XMMRegister dst, Operand src) {
1439     vinstr(0x12, dst, xmm0, src, kF2, k0F, kWIG);
1440   }
vmovddup(XMMRegister dst,XMMRegister src)1441   void vmovddup(XMMRegister dst, XMMRegister src) {
1442     vmovddup(dst, Operand(src));
1443   }
vbroadcastss(XMMRegister dst,Operand src)1444   void vbroadcastss(XMMRegister dst, Operand src) {
1445     vinstr(0x18, dst, xmm0, src, k66, k0F38, kW0);
1446   }
vmovdqu(XMMRegister dst,Operand src)1447   void vmovdqu(XMMRegister dst, Operand src) {
1448     vinstr(0x6F, dst, xmm0, src, kF3, k0F, kWIG);
1449   }
vmovdqu(Operand dst,XMMRegister src)1450   void vmovdqu(Operand dst, XMMRegister src) {
1451     vinstr(0x7F, src, xmm0, dst, kF3, k0F, kWIG);
1452   }
vmovd(XMMRegister dst,Register src)1453   void vmovd(XMMRegister dst, Register src) { vmovd(dst, Operand(src)); }
vmovd(XMMRegister dst,Operand src)1454   void vmovd(XMMRegister dst, Operand src) {
1455     vinstr(0x6E, dst, xmm0, src, k66, k0F, kWIG);
1456   }
vmovd(Register dst,XMMRegister src)1457   void vmovd(Register dst, XMMRegister src) { movd(Operand(dst), src); }
vmovd(Operand dst,XMMRegister src)1458   void vmovd(Operand dst, XMMRegister src) {
1459     vinstr(0x7E, src, xmm0, dst, k66, k0F, kWIG);
1460   }
1461 
1462   void vmovmskps(Register dst, XMMRegister src);
1463 
1464   void vpmovmskb(Register dst, XMMRegister src);
1465 
1466   // BMI instruction
andn(Register dst,Register src1,Register src2)1467   void andn(Register dst, Register src1, Register src2) {
1468     andn(dst, src1, Operand(src2));
1469   }
andn(Register dst,Register src1,Operand src2)1470   void andn(Register dst, Register src1, Operand src2) {
1471     bmi1(0xf2, dst, src1, src2);
1472   }
bextr(Register dst,Register src1,Register src2)1473   void bextr(Register dst, Register src1, Register src2) {
1474     bextr(dst, Operand(src1), src2);
1475   }
bextr(Register dst,Operand src1,Register src2)1476   void bextr(Register dst, Operand src1, Register src2) {
1477     bmi1(0xf7, dst, src2, src1);
1478   }
blsi(Register dst,Register src)1479   void blsi(Register dst, Register src) { blsi(dst, Operand(src)); }
blsi(Register dst,Operand src)1480   void blsi(Register dst, Operand src) { bmi1(0xf3, ebx, dst, src); }
blsmsk(Register dst,Register src)1481   void blsmsk(Register dst, Register src) { blsmsk(dst, Operand(src)); }
blsmsk(Register dst,Operand src)1482   void blsmsk(Register dst, Operand src) { bmi1(0xf3, edx, dst, src); }
blsr(Register dst,Register src)1483   void blsr(Register dst, Register src) { blsr(dst, Operand(src)); }
blsr(Register dst,Operand src)1484   void blsr(Register dst, Operand src) { bmi1(0xf3, ecx, dst, src); }
tzcnt(Register dst,Register src)1485   void tzcnt(Register dst, Register src) { tzcnt(dst, Operand(src)); }
1486   void tzcnt(Register dst, Operand src);
1487 
lzcnt(Register dst,Register src)1488   void lzcnt(Register dst, Register src) { lzcnt(dst, Operand(src)); }
1489   void lzcnt(Register dst, Operand src);
1490 
popcnt(Register dst,Register src)1491   void popcnt(Register dst, Register src) { popcnt(dst, Operand(src)); }
1492   void popcnt(Register dst, Operand src);
1493 
bzhi(Register dst,Register src1,Register src2)1494   void bzhi(Register dst, Register src1, Register src2) {
1495     bzhi(dst, Operand(src1), src2);
1496   }
bzhi(Register dst,Operand src1,Register src2)1497   void bzhi(Register dst, Operand src1, Register src2) {
1498     bmi2(kNone, 0xf5, dst, src2, src1);
1499   }
mulx(Register dst1,Register dst2,Register src)1500   void mulx(Register dst1, Register dst2, Register src) {
1501     mulx(dst1, dst2, Operand(src));
1502   }
mulx(Register dst1,Register dst2,Operand src)1503   void mulx(Register dst1, Register dst2, Operand src) {
1504     bmi2(kF2, 0xf6, dst1, dst2, src);
1505   }
pdep(Register dst,Register src1,Register src2)1506   void pdep(Register dst, Register src1, Register src2) {
1507     pdep(dst, src1, Operand(src2));
1508   }
pdep(Register dst,Register src1,Operand src2)1509   void pdep(Register dst, Register src1, Operand src2) {
1510     bmi2(kF2, 0xf5, dst, src1, src2);
1511   }
pext(Register dst,Register src1,Register src2)1512   void pext(Register dst, Register src1, Register src2) {
1513     pext(dst, src1, Operand(src2));
1514   }
pext(Register dst,Register src1,Operand src2)1515   void pext(Register dst, Register src1, Operand src2) {
1516     bmi2(kF3, 0xf5, dst, src1, src2);
1517   }
sarx(Register dst,Register src1,Register src2)1518   void sarx(Register dst, Register src1, Register src2) {
1519     sarx(dst, Operand(src1), src2);
1520   }
sarx(Register dst,Operand src1,Register src2)1521   void sarx(Register dst, Operand src1, Register src2) {
1522     bmi2(kF3, 0xf7, dst, src2, src1);
1523   }
shlx(Register dst,Register src1,Register src2)1524   void shlx(Register dst, Register src1, Register src2) {
1525     shlx(dst, Operand(src1), src2);
1526   }
shlx(Register dst,Operand src1,Register src2)1527   void shlx(Register dst, Operand src1, Register src2) {
1528     bmi2(k66, 0xf7, dst, src2, src1);
1529   }
shrx(Register dst,Register src1,Register src2)1530   void shrx(Register dst, Register src1, Register src2) {
1531     shrx(dst, Operand(src1), src2);
1532   }
shrx(Register dst,Operand src1,Register src2)1533   void shrx(Register dst, Operand src1, Register src2) {
1534     bmi2(kF2, 0xf7, dst, src2, src1);
1535   }
rorx(Register dst,Register src,byte imm8)1536   void rorx(Register dst, Register src, byte imm8) {
1537     rorx(dst, Operand(src), imm8);
1538   }
1539   void rorx(Register dst, Operand src, byte imm8);
1540 
1541   // Implementation of packed single-precision floating-point SSE instructions.
1542   void ps(byte op, XMMRegister dst, Operand src);
1543   // Implementation of packed double-precision floating-point SSE instructions.
1544   void pd(byte op, XMMRegister dst, Operand src);
1545 
1546 #define PACKED_OP_LIST(V) \
1547   V(and, 0x54)            \
1548   V(andn, 0x55)           \
1549   V(or, 0x56)             \
1550   V(xor, 0x57)            \
1551   V(add, 0x58)            \
1552   V(mul, 0x59)            \
1553   V(sub, 0x5c)            \
1554   V(min, 0x5d)            \
1555   V(div, 0x5e)            \
1556   V(max, 0x5f)
1557 
1558 #define SSE_PACKED_OP_DECLARE(name, opcode)                             \
1559   void name##ps(XMMRegister dst, XMMRegister src) {                     \
1560     ps(opcode, dst, Operand(src));                                      \
1561   }                                                                     \
1562   void name##ps(XMMRegister dst, Operand src) { ps(opcode, dst, src); } \
1563   void name##pd(XMMRegister dst, XMMRegister src) {                     \
1564     pd(opcode, dst, Operand(src));                                      \
1565   }                                                                     \
1566   void name##pd(XMMRegister dst, Operand src) { pd(opcode, dst, src); }
1567 
1568   PACKED_OP_LIST(SSE_PACKED_OP_DECLARE)
1569 #undef SSE_PACKED_OP_DECLARE
1570 
1571 #define AVX_PACKED_OP_DECLARE(name, opcode)                               \
1572   void v##name##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1573     vps(opcode, dst, src1, Operand(src2));                                \
1574   }                                                                       \
1575   void v##name##ps(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1576     vps(opcode, dst, src1, src2);                                         \
1577   }                                                                       \
1578   void v##name##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1579     vpd(opcode, dst, src1, Operand(src2));                                \
1580   }                                                                       \
1581   void v##name##pd(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1582     vpd(opcode, dst, src1, src2);                                         \
1583   }
1584 
1585   PACKED_OP_LIST(AVX_PACKED_OP_DECLARE)
1586 #undef AVX_PACKED_OP_DECLARE
1587 #undef PACKED_OP_LIST
1588 
1589   void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1590   void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1591 
1592   void vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp);
1593   void vcmppd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp);
1594 
1595 #define AVX_CMP_P(instr, imm8)                                             \
1596   void v##instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1597     vcmpps(dst, src1, Operand(src2), imm8);                                \
1598   }                                                                        \
1599   void v##instr##ps(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1600     vcmpps(dst, src1, src2, imm8);                                         \
1601   }                                                                        \
1602   void v##instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1603     vcmppd(dst, src1, Operand(src2), imm8);                                \
1604   }                                                                        \
1605   void v##instr##pd(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1606     vcmppd(dst, src1, src2, imm8);                                         \
1607   }
1608 
1609   PACKED_CMP_LIST(AVX_CMP_P)
1610 #undef AVX_CMP_P
1611 #undef PACKED_CMP_LIST
1612 
1613 // Other SSE and AVX instructions
1614 #define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
1615   void instruction(XMMRegister dst, XMMRegister src) {                \
1616     instruction(dst, Operand(src));                                   \
1617   }                                                                   \
1618   void instruction(XMMRegister dst, Operand src) {                    \
1619     sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode);         \
1620   }
1621 
1622   SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
1623 #undef DECLARE_SSE2_INSTRUCTION
1624 
1625 #define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode)    \
1626   void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1627     v##instruction(dst, src1, Operand(src2));                                \
1628   }                                                                          \
1629   void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) {     \
1630     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0);          \
1631   }
1632 
1633   SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
1634 #undef DECLARE_SSE2_AVX_INSTRUCTION
1635 
1636 #define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2,     \
1637                                   opcode)                                    \
1638   void instruction(XMMRegister dst, XMMRegister src) {                       \
1639     instruction(dst, Operand(src));                                          \
1640   }                                                                          \
1641   void instruction(XMMRegister dst, Operand src) {                           \
1642     ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1643   }
1644 
1645   SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1646   SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1647 #undef DECLARE_SSSE3_INSTRUCTION
1648 
1649 #define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2,     \
1650                                  opcode)                                    \
1651   void instruction(XMMRegister dst, XMMRegister src) {                      \
1652     instruction(dst, Operand(src));                                         \
1653   }                                                                         \
1654   void instruction(XMMRegister dst, Operand src) {                          \
1655     sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1656   }
1657 
1658   SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1659   SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1660 #undef DECLARE_SSE4_INSTRUCTION
1661 
1662 #define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2,  \
1663                                       opcode)                                 \
1664   void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) {  \
1665     v##instruction(dst, src1, Operand(src2));                                 \
1666   }                                                                           \
1667   void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) {      \
1668     vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1669   }
1670 
1671   SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1672   SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1673 #undef DECLARE_SSE34_AVX_INSTRUCTION
1674 
1675 #define DECLARE_SSE4_AVX_RM_INSTRUCTION(instruction, prefix, escape1, escape2, \
1676                                         opcode)                                \
1677   void v##instruction(XMMRegister dst, XMMRegister src) {                      \
1678     v##instruction(dst, Operand(src));                                         \
1679   }                                                                            \
1680   void v##instruction(XMMRegister dst, Operand src) {                          \
1681     vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0);   \
1682   }
1683 
1684   SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_AVX_RM_INSTRUCTION)
1685   SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_AVX_RM_INSTRUCTION)
1686 #undef DECLARE_SSE4_AVX_RM_INSTRUCTION
1687 
1688   // Prefetch src position into cache level.
1689   // Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a
1690   // non-temporal
1691   void prefetch(Operand src, int level);
1692   // TODO(lrn): Need SFENCE for movnt?
1693 
1694   // Check the code size generated from label to here.
SizeOfCodeGeneratedSince(Label * label)1695   int SizeOfCodeGeneratedSince(Label* label) {
1696     return pc_offset() - label->pos();
1697   }
1698 
1699   // Record a deoptimization reason that can be used by a log or cpu profiler.
1700   // Use --trace-deopt to enable.
1701   void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position,
1702                          int id);
1703 
1704   // Writes a single byte or word of data in the code stream.  Used for
1705   // inline tables, e.g., jump-tables.
1706   void db(uint8_t data);
1707   void dd(uint32_t data);
1708   void dq(uint64_t data);
dp(uintptr_t data)1709   void dp(uintptr_t data) { dd(data); }
1710   void dd(Label* label);
1711 
1712   // Check if there is less than kGap bytes available in the buffer.
1713   // If this is the case, we need to grow the buffer before emitting
1714   // an instruction or relocation information.
buffer_overflow()1715   inline bool buffer_overflow() const {
1716     return pc_ >= reloc_info_writer.pos() - kGap;
1717   }
1718 
1719   // Get the number of bytes available in the buffer.
available_space()1720   inline int available_space() const { return reloc_info_writer.pos() - pc_; }
1721 
1722   static bool IsNop(Address addr);
1723 
relocation_writer_size()1724   int relocation_writer_size() {
1725     return (buffer_start_ + buffer_->size()) - reloc_info_writer.pos();
1726   }
1727 
1728   // Avoid overflows for displacements etc.
1729   static constexpr int kMaximalBufferSize = 512 * MB;
1730 
byte_at(int pos)1731   byte byte_at(int pos) { return buffer_start_[pos]; }
set_byte_at(int pos,byte value)1732   void set_byte_at(int pos, byte value) { buffer_start_[pos] = value; }
1733 
1734  protected:
1735   void emit_sse_operand(XMMRegister reg, Operand adr);
1736   void emit_sse_operand(XMMRegister dst, XMMRegister src);
1737   void emit_sse_operand(Register dst, XMMRegister src);
1738   void emit_sse_operand(XMMRegister dst, Register src);
1739 
addr_at(int pos)1740   Address addr_at(int pos) {
1741     return reinterpret_cast<Address>(buffer_start_ + pos);
1742   }
1743 
1744  private:
long_at(int pos)1745   uint32_t long_at(int pos) {
1746     return ReadUnalignedValue<uint32_t>(addr_at(pos));
1747   }
long_at_put(int pos,uint32_t x)1748   void long_at_put(int pos, uint32_t x) {
1749     WriteUnalignedValue(addr_at(pos), x);
1750   }
1751 
1752   // code emission
1753   void GrowBuffer();
1754   inline void emit(uint32_t x);
1755   inline void emit(Handle<HeapObject> handle);
1756   inline void emit(uint32_t x, RelocInfo::Mode rmode);
1757   inline void emit(Handle<Code> code, RelocInfo::Mode rmode);
1758   inline void emit(const Immediate& x);
1759   inline void emit_b(Immediate x);
1760   inline void emit_w(const Immediate& x);
1761   inline void emit_q(uint64_t x);
1762 
1763   // Emit the code-object-relative offset of the label's position
1764   inline void emit_code_relative_offset(Label* label);
1765 
1766   // instruction generation
1767   void emit_arith_b(int op1, int op2, Register dst, int imm8);
1768 
1769   // Emit a basic arithmetic instruction (i.e. first byte of the family is 0x81)
1770   // with a given destination expression and an immediate operand.  It attempts
1771   // to use the shortest encoding possible.
1772   // sel specifies the /n in the modrm byte (see the Intel PRM).
1773   void emit_arith(int sel, Operand dst, const Immediate& x);
1774 
1775   void emit_operand(int code, Operand adr);
1776   void emit_operand(Register reg, Operand adr);
1777   void emit_operand(XMMRegister reg, Operand adr);
1778 
1779   void emit_label(Label* label);
1780 
1781   void emit_farith(int b1, int b2, int i);
1782 
1783   // Emit vex prefix
1784   enum SIMDPrefix { kNone = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
1785   enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
1786   enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
1787   enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
1788   inline void emit_vex_prefix(XMMRegister v, VectorLength l, SIMDPrefix pp,
1789                               LeadingOpcode m, VexW w);
1790   inline void emit_vex_prefix(Register v, VectorLength l, SIMDPrefix pp,
1791                               LeadingOpcode m, VexW w);
1792 
1793   // labels
1794   void print(const Label* L);
1795   void bind_to(Label* L, int pos);
1796 
1797   // displacements
1798   inline Displacement disp_at(Label* L);
1799   inline void disp_at_put(Label* L, Displacement disp);
1800   inline void emit_disp(Label* L, Displacement::Type type);
1801   inline void emit_near_disp(Label* L);
1802 
1803   void sse2_instr(XMMRegister dst, Operand src, byte prefix, byte escape,
1804                   byte opcode);
1805   void ssse3_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1806                    byte escape2, byte opcode);
1807   void sse4_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1808                   byte escape2, byte opcode);
1809   void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
1810               SIMDPrefix pp, LeadingOpcode m, VexW w);
1811   // Most BMI instructions are similar.
1812   void bmi1(byte op, Register reg, Register vreg, Operand rm);
1813   void bmi2(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm);
1814 
1815   // record reloc info for current pc_
1816   void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
1817 
1818   // record the position of jmp/jcc instruction
1819   void record_farjmp_position(Label* L, int pos);
1820 
1821   bool is_optimizable_farjmp(int idx);
1822 
1823   void AllocateAndInstallRequestedHeapObjects(Isolate* isolate);
1824 
1825   int WriteCodeComments();
1826 
1827   friend class EnsureSpace;
1828 
1829   // Internal reference positions, required for (potential) patching in
1830   // GrowBuffer(); contains only those internal references whose labels
1831   // are already bound.
1832   std::deque<int> internal_reference_positions_;
1833 
1834   // code generation
1835   RelocInfoWriter reloc_info_writer;
1836 
1837   // Variables for this instance of assembler
1838   int farjmp_num_ = 0;
1839   std::deque<int> farjmp_positions_;
1840   std::map<Label*, std::vector<int>> label_farjmp_maps_;
1841 };
1842 
1843 // Helper class that ensures that there is enough space for generating
1844 // instructions and relocation information.  The constructor makes
1845 // sure that there is enough space and (in debug mode) the destructor
1846 // checks that we did not generate too much.
1847 class EnsureSpace {
1848  public:
EnsureSpace(Assembler * assembler)1849   explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
1850     if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
1851 #ifdef DEBUG
1852     space_before_ = assembler_->available_space();
1853 #endif
1854   }
1855 
1856 #ifdef DEBUG
~EnsureSpace()1857   ~EnsureSpace() {
1858     int bytes_generated = space_before_ - assembler_->available_space();
1859     DCHECK(bytes_generated < assembler_->kGap);
1860   }
1861 #endif
1862 
1863  private:
1864   Assembler* assembler_;
1865 #ifdef DEBUG
1866   int space_before_;
1867 #endif
1868 };
1869 
1870 }  // namespace internal
1871 }  // namespace v8
1872 
1873 #endif  // V8_CODEGEN_IA32_ASSEMBLER_IA32_H_
1874