1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 //
11 // - Redistribution in binary form must reproduce the above copyright
12 // notice, this list of conditions and the following disclaimer in the
13 // documentation and/or other materials provided with the distribution.
14 //
15 // - Neither the name of Sun Microsystems or the names of contributors may
16 // be used to endorse or promote products derived from this software without
17 // specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // The original source code covered by the above license above has been
32 // modified significantly by Google Inc.
33 // Copyright 2011 the V8 project authors. All rights reserved.
34
35 // A light-weight IA32 Assembler.
36
37 #ifndef V8_CODEGEN_IA32_ASSEMBLER_IA32_H_
38 #define V8_CODEGEN_IA32_ASSEMBLER_IA32_H_
39
40 #include <deque>
41 #include <memory>
42
43 #include "src/codegen/assembler.h"
44 #include "src/codegen/ia32/constants-ia32.h"
45 #include "src/codegen/ia32/fma-instr.h"
46 #include "src/codegen/ia32/register-ia32.h"
47 #include "src/codegen/ia32/sse-instr.h"
48 #include "src/codegen/label.h"
49 #include "src/execution/isolate.h"
50 #include "src/objects/smi.h"
51 #include "src/utils/utils.h"
52
53 namespace v8 {
54 namespace internal {
55
56 class SafepointTableBuilder;
57
58 enum Condition {
59 // any value < 0 is considered no_condition
60 no_condition = -1,
61
62 overflow = 0,
63 no_overflow = 1,
64 below = 2,
65 above_equal = 3,
66 equal = 4,
67 not_equal = 5,
68 below_equal = 6,
69 above = 7,
70 negative = 8,
71 positive = 9,
72 parity_even = 10,
73 parity_odd = 11,
74 less = 12,
75 greater_equal = 13,
76 less_equal = 14,
77 greater = 15,
78
79 // aliases
80 carry = below,
81 not_carry = above_equal,
82 zero = equal,
83 not_zero = not_equal,
84 sign = negative,
85 not_sign = positive
86 };
87
88 // Returns the equivalent of !cc.
89 // Negation of the default no_condition (-1) results in a non-default
90 // no_condition value (-2). As long as tests for no_condition check
91 // for condition < 0, this will work as expected.
NegateCondition(Condition cc)92 inline Condition NegateCondition(Condition cc) {
93 return static_cast<Condition>(cc ^ 1);
94 }
95
96 enum RoundingMode {
97 kRoundToNearest = 0x0,
98 kRoundDown = 0x1,
99 kRoundUp = 0x2,
100 kRoundToZero = 0x3
101 };
102
103 // -----------------------------------------------------------------------------
104 // Machine instruction Immediates
105
106 class Immediate {
107 public:
108 // Calls where x is an Address (uintptr_t) resolve to this overload.
109 inline explicit Immediate(int x, RelocInfo::Mode rmode = RelocInfo::NO_INFO) {
110 value_.immediate = x;
111 rmode_ = rmode;
112 }
Immediate(const ExternalReference & ext)113 inline explicit Immediate(const ExternalReference& ext)
114 : Immediate(ext.address(), RelocInfo::EXTERNAL_REFERENCE) {}
Immediate(Handle<HeapObject> handle)115 inline explicit Immediate(Handle<HeapObject> handle)
116 : Immediate(handle.address(), RelocInfo::FULL_EMBEDDED_OBJECT) {}
Immediate(Smi value)117 inline explicit Immediate(Smi value)
118 : Immediate(static_cast<intptr_t>(value.ptr())) {}
119
120 static Immediate EmbeddedNumber(double number); // Smi or HeapNumber.
121 static Immediate EmbeddedStringConstant(const StringConstantBase* str);
122
CodeRelativeOffset(Label * label)123 static Immediate CodeRelativeOffset(Label* label) { return Immediate(label); }
124
is_heap_object_request()125 bool is_heap_object_request() const {
126 DCHECK_IMPLIES(is_heap_object_request_,
127 rmode_ == RelocInfo::FULL_EMBEDDED_OBJECT ||
128 rmode_ == RelocInfo::CODE_TARGET);
129 return is_heap_object_request_;
130 }
131
heap_object_request()132 HeapObjectRequest heap_object_request() const {
133 DCHECK(is_heap_object_request());
134 return value_.heap_object_request;
135 }
136
immediate()137 int immediate() const {
138 DCHECK(!is_heap_object_request());
139 return value_.immediate;
140 }
141
is_embedded_object()142 bool is_embedded_object() const {
143 return !is_heap_object_request() &&
144 rmode() == RelocInfo::FULL_EMBEDDED_OBJECT;
145 }
146
embedded_object()147 Handle<HeapObject> embedded_object() const {
148 return Handle<HeapObject>(reinterpret_cast<Address*>(immediate()));
149 }
150
is_external_reference()151 bool is_external_reference() const {
152 return rmode() == RelocInfo::EXTERNAL_REFERENCE;
153 }
154
external_reference()155 ExternalReference external_reference() const {
156 DCHECK(is_external_reference());
157 return bit_cast<ExternalReference>(immediate());
158 }
159
is_zero()160 bool is_zero() const {
161 return RelocInfo::IsNoInfo(rmode_) && immediate() == 0;
162 }
is_int8()163 bool is_int8() const {
164 return RelocInfo::IsNoInfo(rmode_) && i::is_int8(immediate());
165 }
is_uint8()166 bool is_uint8() const {
167 return RelocInfo::IsNoInfo(rmode_) && i::is_uint8(immediate());
168 }
is_int16()169 bool is_int16() const {
170 return RelocInfo::IsNoInfo(rmode_) && i::is_int16(immediate());
171 }
172
is_uint16()173 bool is_uint16() const {
174 return RelocInfo::IsNoInfo(rmode_) && i::is_uint16(immediate());
175 }
176
rmode()177 RelocInfo::Mode rmode() const { return rmode_; }
178
179 private:
Immediate(Label * value)180 inline explicit Immediate(Label* value) {
181 value_.immediate = reinterpret_cast<int32_t>(value);
182 rmode_ = RelocInfo::INTERNAL_REFERENCE;
183 }
184
185 union Value {
Value()186 Value() {}
187 HeapObjectRequest heap_object_request;
188 int immediate;
189 } value_;
190 bool is_heap_object_request_ = false;
191 RelocInfo::Mode rmode_;
192
193 friend class Operand;
194 friend class Assembler;
195 friend class MacroAssembler;
196 };
197
198 // -----------------------------------------------------------------------------
199 // Machine instruction Operands
200
201 enum ScaleFactor {
202 times_1 = 0,
203 times_2 = 1,
204 times_4 = 2,
205 times_8 = 3,
206 times_int_size = times_4,
207
208 times_half_system_pointer_size = times_2,
209 times_system_pointer_size = times_4,
210
211 times_tagged_size = times_4,
212 };
213
214 class V8_EXPORT_PRIVATE Operand {
215 public:
216 // reg
Operand(Register reg)217 V8_INLINE explicit Operand(Register reg) { set_modrm(3, reg); }
218
219 // XMM reg
Operand(XMMRegister xmm_reg)220 V8_INLINE explicit Operand(XMMRegister xmm_reg) {
221 Register reg = Register::from_code(xmm_reg.code());
222 set_modrm(3, reg);
223 }
224
225 // [disp/r]
Operand(int32_t disp,RelocInfo::Mode rmode)226 V8_INLINE explicit Operand(int32_t disp, RelocInfo::Mode rmode) {
227 set_modrm(0, ebp);
228 set_dispr(disp, rmode);
229 }
230
231 // [disp/r]
Operand(Immediate imm)232 V8_INLINE explicit Operand(Immediate imm) {
233 set_modrm(0, ebp);
234 set_dispr(imm.immediate(), imm.rmode_);
235 }
236
237 // [base + disp/r]
238 explicit Operand(Register base, int32_t disp,
239 RelocInfo::Mode rmode = RelocInfo::NO_INFO);
240
241 // [rip + disp/r]
Operand(Label * label)242 explicit Operand(Label* label) {
243 set_modrm(0, ebp);
244 set_dispr(reinterpret_cast<intptr_t>(label), RelocInfo::INTERNAL_REFERENCE);
245 }
246
247 // [base + index*scale + disp/r]
248 explicit Operand(Register base, Register index, ScaleFactor scale,
249 int32_t disp, RelocInfo::Mode rmode = RelocInfo::NO_INFO);
250
251 // [index*scale + disp/r]
252 explicit Operand(Register index, ScaleFactor scale, int32_t disp,
253 RelocInfo::Mode rmode = RelocInfo::NO_INFO);
254
JumpTable(Register index,ScaleFactor scale,Label * table)255 static Operand JumpTable(Register index, ScaleFactor scale, Label* table) {
256 return Operand(index, scale, reinterpret_cast<int32_t>(table),
257 RelocInfo::INTERNAL_REFERENCE);
258 }
259
ForRegisterPlusImmediate(Register base,Immediate imm)260 static Operand ForRegisterPlusImmediate(Register base, Immediate imm) {
261 return Operand(base, imm.value_.immediate, imm.rmode_);
262 }
263
264 // Returns true if this Operand is a wrapper for the specified register.
is_reg(Register reg)265 bool is_reg(Register reg) const { return is_reg(reg.code()); }
is_reg(XMMRegister reg)266 bool is_reg(XMMRegister reg) const { return is_reg(reg.code()); }
267
268 // Returns true if this Operand is a wrapper for one register.
269 bool is_reg_only() const;
270
271 // Asserts that this Operand is a wrapper for one register and returns the
272 // register.
273 Register reg() const;
274
encoded_bytes()275 base::Vector<const byte> encoded_bytes() const { return {buf_, len_}; }
rmode()276 RelocInfo::Mode rmode() { return rmode_; }
277
278 private:
279 // Set the ModRM byte without an encoded 'reg' register. The
280 // register is encoded later as part of the emit_operand operation.
set_modrm(int mod,Register rm)281 inline void set_modrm(int mod, Register rm) {
282 DCHECK_EQ(mod & -4, 0);
283 buf_[0] = mod << 6 | rm.code();
284 len_ = 1;
285 }
286
287 inline void set_sib(ScaleFactor scale, Register index, Register base);
288 inline void set_disp8(int8_t disp);
set_dispr(int32_t disp,RelocInfo::Mode rmode)289 inline void set_dispr(int32_t disp, RelocInfo::Mode rmode) {
290 DCHECK(len_ == 1 || len_ == 2);
291 Address p = reinterpret_cast<Address>(&buf_[len_]);
292 WriteUnalignedValue(p, disp);
293 len_ += sizeof(int32_t);
294 rmode_ = rmode;
295 }
296
is_reg(int reg_code)297 inline bool is_reg(int reg_code) const {
298 return ((buf_[0] & 0xF8) == 0xC0) // addressing mode is register only.
299 && ((buf_[0] & 0x07) == reg_code); // register codes match.
300 }
301
302 byte buf_[6];
303 // The number of bytes in buf_.
304 uint8_t len_ = 0;
305 // Only valid if len_ > 4.
306 RelocInfo::Mode rmode_ = RelocInfo::NO_INFO;
307 };
308 ASSERT_TRIVIALLY_COPYABLE(Operand);
309 static_assert(sizeof(Operand) <= 2 * kSystemPointerSize,
310 "Operand must be small enough to pass it by value");
311
312 bool operator!=(Operand op, XMMRegister r);
313
314 // -----------------------------------------------------------------------------
315 // A Displacement describes the 32bit immediate field of an instruction which
316 // may be used together with a Label in order to refer to a yet unknown code
317 // position. Displacements stored in the instruction stream are used to describe
318 // the instruction and to chain a list of instructions using the same Label.
319 // A Displacement contains 2 different fields:
320 //
321 // next field: position of next displacement in the chain (0 = end of list)
322 // type field: instruction type
323 //
324 // A next value of null (0) indicates the end of a chain (note that there can
325 // be no displacement at position zero, because there is always at least one
326 // instruction byte before the displacement).
327 //
328 // Displacement _data field layout
329 //
330 // |31.....2|1......0|
331 // [ next | type |
332
333 class Displacement {
334 public:
335 enum Type { UNCONDITIONAL_JUMP, CODE_RELATIVE, OTHER, CODE_ABSOLUTE };
336
data()337 int data() const { return data_; }
type()338 Type type() const { return TypeField::decode(data_); }
next(Label * L)339 void next(Label* L) const {
340 int n = NextField::decode(data_);
341 n > 0 ? L->link_to(n) : L->Unuse();
342 }
link_to(Label * L)343 void link_to(Label* L) { init(L, type()); }
344
Displacement(int data)345 explicit Displacement(int data) { data_ = data; }
346
Displacement(Label * L,Type type)347 Displacement(Label* L, Type type) { init(L, type); }
348
print()349 void print() {
350 PrintF("%s (%x) ", (type() == UNCONDITIONAL_JUMP ? "jmp" : "[other]"),
351 NextField::decode(data_));
352 }
353
354 private:
355 int data_;
356
357 using TypeField = base::BitField<Type, 0, 2>;
358 using NextField = base::BitField<int, 2, 32 - 2>;
359
360 void init(Label* L, Type type);
361 };
362
363 class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
364 private:
365 // We check before assembling an instruction that there is sufficient
366 // space to write an instruction and its relocation information.
367 // The relocation writer's position must be kGap bytes above the end of
368 // the generated instructions. This leaves enough space for the
369 // longest possible ia32 instruction, 15 bytes, and the longest possible
370 // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
371 // (There is a 15 byte limit on ia32 instruction length that rules out some
372 // otherwise valid instructions.)
373 // This allows for a single, fast space check per instruction.
374 static constexpr int kGap = 32;
375 STATIC_ASSERT(AssemblerBase::kMinimalBufferSize >= 2 * kGap);
376
377 public:
378 // Create an assembler. Instructions and relocation information are emitted
379 // into a buffer, with the instructions starting from the beginning and the
380 // relocation information starting from the end of the buffer. See CodeDesc
381 // for a detailed comment on the layout (globals.h).
382 //
383 // If the provided buffer is nullptr, the assembler allocates and grows its
384 // own buffer. Otherwise it takes ownership of the provided buffer.
385 explicit Assembler(const AssemblerOptions&,
386 std::unique_ptr<AssemblerBuffer> = {});
387
388 // GetCode emits any pending (non-emitted) code and fills the descriptor desc.
389 static constexpr int kNoHandlerTable = 0;
390 static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr;
391 void GetCode(Isolate* isolate, CodeDesc* desc,
392 SafepointTableBuilder* safepoint_table_builder,
393 int handler_table_offset);
394
395 // Convenience wrapper for code without safepoint or handler tables.
GetCode(Isolate * isolate,CodeDesc * desc)396 void GetCode(Isolate* isolate, CodeDesc* desc) {
397 GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable);
398 }
399
400 void FinalizeJumpOptimizationInfo();
401
402 // Unused on this architecture.
MaybeEmitOutOfLineConstantPool()403 void MaybeEmitOutOfLineConstantPool() {}
404
405 // Read/Modify the code target in the branch/call instruction at pc.
406 // The isolate argument is unused (and may be nullptr) when skipping flushing.
407 inline static Address target_address_at(Address pc, Address constant_pool);
408 inline static void set_target_address_at(
409 Address pc, Address constant_pool, Address target,
410 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
411
412 // This sets the branch destination (which is in the instruction on x86).
413 // This is for calls and branches within generated code.
414 inline static void deserialization_set_special_target_at(
415 Address instruction_payload, Code code, Address target);
416
417 // Get the size of the special target encoded at 'instruction_payload'.
418 inline static int deserialization_special_target_size(
419 Address instruction_payload);
420
421 // This sets the internal reference at the pc.
422 inline static void deserialization_set_target_internal_reference_at(
423 Address pc, Address target,
424 RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
425
426 static constexpr int kSpecialTargetSize = kSystemPointerSize;
427
428 // One byte opcode for test al, 0xXX.
429 static constexpr byte kTestAlByte = 0xA8;
430 // One byte opcode for nop.
431 static constexpr byte kNopByte = 0x90;
432
433 // One byte opcode for a short unconditional jump.
434 static constexpr byte kJmpShortOpcode = 0xEB;
435 // One byte prefix for a short conditional jump.
436 static constexpr byte kJccShortPrefix = 0x70;
437 static constexpr byte kJncShortOpcode = kJccShortPrefix | not_carry;
438 static constexpr byte kJcShortOpcode = kJccShortPrefix | carry;
439 static constexpr byte kJnzShortOpcode = kJccShortPrefix | not_zero;
440 static constexpr byte kJzShortOpcode = kJccShortPrefix | zero;
441
442 // ---------------------------------------------------------------------------
443 // Code generation
444 //
445 // - function names correspond one-to-one to ia32 instruction mnemonics
446 // - unless specified otherwise, instructions operate on 32bit operands
447 // - instructions on 8bit (byte) operands/registers have a trailing '_b'
448 // - instructions on 16bit (word) operands/registers have a trailing '_w'
449 // - naming conflicts with C++ keywords are resolved via a trailing '_'
450
451 // NOTE ON INTERFACE: Currently, the interface is not very consistent
452 // in the sense that some operations (e.g. mov()) can be called in more
453 // the one way to generate the same instruction: The Register argument
454 // can in some cases be replaced with an Operand(Register) argument.
455 // This should be cleaned up and made more orthogonal. The questions
456 // is: should we always use Operands instead of Registers where an
457 // Operand is possible, or should we have a Register (overloaded) form
458 // instead? We must be careful to make sure that the selected instruction
459 // is obvious from the parameters to avoid hard-to-find code generation
460 // bugs.
461
462 // Insert the smallest number of nop instructions
463 // possible to align the pc offset to a multiple
464 // of m. m must be a power of 2.
465 void Align(int m);
466 // Insert the smallest number of zero bytes possible to align the pc offset
467 // to a mulitple of m. m must be a power of 2 (>= 2).
468 void DataAlign(int m);
469 void Nop(int bytes = 1);
470 // Aligns code to something that's optimal for a jump target for the platform.
471 void CodeTargetAlign();
LoopHeaderAlign()472 void LoopHeaderAlign() { CodeTargetAlign(); }
473
474 // Stack
475 void pushad();
476 void popad();
477
478 void pushfd();
479 void popfd();
480
481 void push(const Immediate& x);
482 void push_imm32(int32_t imm32);
483 void push(Register src);
484 void push(Operand src);
485
486 void pop(Register dst);
487 void pop(Operand dst);
488
489 void leave();
490
491 // Moves
mov_b(Register dst,Register src)492 void mov_b(Register dst, Register src) { mov_b(dst, Operand(src)); }
493 void mov_b(Register dst, Operand src);
mov_b(Register dst,int8_t imm8)494 void mov_b(Register dst, int8_t imm8) { mov_b(Operand(dst), imm8); }
mov_b(Operand dst,int8_t src)495 void mov_b(Operand dst, int8_t src) { mov_b(dst, Immediate(src)); }
496 void mov_b(Operand dst, const Immediate& src);
497 void mov_b(Operand dst, Register src);
498
499 void mov_w(Register dst, Operand src);
mov_w(Operand dst,int16_t src)500 void mov_w(Operand dst, int16_t src) { mov_w(dst, Immediate(src)); }
501 void mov_w(Operand dst, const Immediate& src);
502 void mov_w(Operand dst, Register src);
503
504 void mov(Register dst, int32_t imm32);
505 void mov(Register dst, const Immediate& x);
506 void mov(Register dst, Handle<HeapObject> handle);
507 void mov(Register dst, Operand src);
508 void mov(Register dst, Register src);
509 void mov(Operand dst, const Immediate& x);
510 void mov(Operand dst, Handle<HeapObject> handle);
511 void mov(Operand dst, Register src);
512 void mov(Operand dst, Address src, RelocInfo::Mode);
513
movsx_b(Register dst,Register src)514 void movsx_b(Register dst, Register src) { movsx_b(dst, Operand(src)); }
515 void movsx_b(Register dst, Operand src);
516
movsx_w(Register dst,Register src)517 void movsx_w(Register dst, Register src) { movsx_w(dst, Operand(src)); }
518 void movsx_w(Register dst, Operand src);
519
movzx_b(Register dst,Register src)520 void movzx_b(Register dst, Register src) { movzx_b(dst, Operand(src)); }
521 void movzx_b(Register dst, Operand src);
522
movzx_w(Register dst,Register src)523 void movzx_w(Register dst, Register src) { movzx_w(dst, Operand(src)); }
524 void movzx_w(Register dst, Operand src);
525
526 void movq(XMMRegister dst, Operand src);
527 void movq(Operand dst, XMMRegister src);
528
529 // Conditional moves
cmov(Condition cc,Register dst,Register src)530 void cmov(Condition cc, Register dst, Register src) {
531 cmov(cc, dst, Operand(src));
532 }
533 void cmov(Condition cc, Register dst, Operand src);
534
535 // Flag management.
536 void cld();
537
538 // Repetitive string instructions.
539 void rep_movs();
540 void rep_stos();
541 void stos();
542
543 void xadd(Operand dst, Register src);
544 void xadd_b(Operand dst, Register src);
545 void xadd_w(Operand dst, Register src);
546
547 // Exchange
548 void xchg(Register dst, Register src);
549 void xchg(Register dst, Operand src);
550 void xchg_b(Register reg, Operand op);
551 void xchg_w(Register reg, Operand op);
552
553 // Lock prefix
554 void lock();
555
556 // CompareExchange
557 void cmpxchg(Operand dst, Register src);
558 void cmpxchg_b(Operand dst, Register src);
559 void cmpxchg_w(Operand dst, Register src);
560 void cmpxchg8b(Operand dst);
561
562 // Memory Fence
563 void mfence();
564 void lfence();
565
566 void pause();
567
568 // Arithmetics
569 void adc(Register dst, int32_t imm32);
adc(Register dst,Register src)570 void adc(Register dst, Register src) { adc(dst, Operand(src)); }
571 void adc(Register dst, Operand src);
572
add(Register dst,Register src)573 void add(Register dst, Register src) { add(dst, Operand(src)); }
574 void add(Register dst, Operand src);
575 void add(Operand dst, Register src);
add(Register dst,const Immediate & imm)576 void add(Register dst, const Immediate& imm) { add(Operand(dst), imm); }
577 void add(Operand dst, const Immediate& x);
578
579 void and_(Register dst, int32_t imm32);
580 void and_(Register dst, const Immediate& x);
and_(Register dst,Register src)581 void and_(Register dst, Register src) { and_(dst, Operand(src)); }
582 void and_(Register dst, Operand src);
583 void and_(Operand dst, Register src);
584 void and_(Operand dst, const Immediate& x);
585
cmpb(Register reg,Immediate imm8)586 void cmpb(Register reg, Immediate imm8) {
587 DCHECK(reg.is_byte_register());
588 cmpb(Operand(reg), imm8);
589 }
590 void cmpb(Operand op, Immediate imm8);
591 void cmpb(Register reg, Operand op);
592 void cmpb(Operand op, Register reg);
cmpb(Register dst,Register src)593 void cmpb(Register dst, Register src) { cmpb(Operand(dst), src); }
594 void cmpb_al(Operand op);
595 void cmpw_ax(Operand op);
596 void cmpw(Operand dst, Immediate src);
cmpw(Register dst,Immediate src)597 void cmpw(Register dst, Immediate src) { cmpw(Operand(dst), src); }
598 void cmpw(Register dst, Operand src);
cmpw(Register dst,Register src)599 void cmpw(Register dst, Register src) { cmpw(Operand(dst), src); }
600 void cmpw(Operand dst, Register src);
601 void cmp(Register reg, int32_t imm32);
602 void cmp(Register reg, Handle<HeapObject> handle);
cmp(Register reg0,Register reg1)603 void cmp(Register reg0, Register reg1) { cmp(reg0, Operand(reg1)); }
604 void cmp(Register reg, Operand op);
cmp(Register reg,const Immediate & imm)605 void cmp(Register reg, const Immediate& imm) { cmp(Operand(reg), imm); }
606 void cmp(Operand op, Register reg);
607 void cmp(Operand op, const Immediate& imm);
608 void cmp(Operand op, Handle<HeapObject> handle);
609
610 void dec_b(Register dst);
611 void dec_b(Operand dst);
612
613 void dec(Register dst);
614 void dec(Operand dst);
615
616 void cdq();
617
idiv(Register src)618 void idiv(Register src) { idiv(Operand(src)); }
619 void idiv(Operand src);
div(Register src)620 void div(Register src) { div(Operand(src)); }
621 void div(Operand src);
622
623 // Signed multiply instructions.
624 void imul(Register src); // edx:eax = eax * src.
imul(Register dst,Register src)625 void imul(Register dst, Register src) { imul(dst, Operand(src)); }
626 void imul(Register dst, Operand src); // dst = dst * src.
627 void imul(Register dst, Register src, int32_t imm32); // dst = src * imm32.
628 void imul(Register dst, Operand src, int32_t imm32);
629
630 void inc(Register dst);
631 void inc(Operand dst);
632
633 void lea(Register dst, Operand src);
634
635 // Unsigned multiply instruction.
636 void mul(Register src); // edx:eax = eax * reg.
637
638 void neg(Register dst);
639 void neg(Operand dst);
640
641 void not_(Register dst);
642 void not_(Operand dst);
643
644 void or_(Register dst, int32_t imm32);
or_(Register dst,Register src)645 void or_(Register dst, Register src) { or_(dst, Operand(src)); }
646 void or_(Register dst, Operand src);
647 void or_(Operand dst, Register src);
or_(Register dst,const Immediate & imm)648 void or_(Register dst, const Immediate& imm) { or_(Operand(dst), imm); }
649 void or_(Operand dst, const Immediate& x);
650
651 void rcl(Register dst, uint8_t imm8);
652 void rcr(Register dst, uint8_t imm8);
653
rol(Register dst,uint8_t imm8)654 void rol(Register dst, uint8_t imm8) { rol(Operand(dst), imm8); }
655 void rol(Operand dst, uint8_t imm8);
rol_cl(Register dst)656 void rol_cl(Register dst) { rol_cl(Operand(dst)); }
657 void rol_cl(Operand dst);
658
ror(Register dst,uint8_t imm8)659 void ror(Register dst, uint8_t imm8) { ror(Operand(dst), imm8); }
660 void ror(Operand dst, uint8_t imm8);
ror_cl(Register dst)661 void ror_cl(Register dst) { ror_cl(Operand(dst)); }
662 void ror_cl(Operand dst);
663
sar(Register dst,uint8_t imm8)664 void sar(Register dst, uint8_t imm8) { sar(Operand(dst), imm8); }
665 void sar(Operand dst, uint8_t imm8);
sar_cl(Register dst)666 void sar_cl(Register dst) { sar_cl(Operand(dst)); }
667 void sar_cl(Operand dst);
668
sbb(Register dst,Register src)669 void sbb(Register dst, Register src) { sbb(dst, Operand(src)); }
670 void sbb(Register dst, Operand src);
671
shl(Register dst,uint8_t imm8)672 void shl(Register dst, uint8_t imm8) { shl(Operand(dst), imm8); }
673 void shl(Operand dst, uint8_t imm8);
shl_cl(Register dst)674 void shl_cl(Register dst) { shl_cl(Operand(dst)); }
675 void shl_cl(Operand dst);
676 void shld(Register dst, Register src, uint8_t shift);
677 void shld_cl(Register dst, Register src);
678
shr(Register dst,uint8_t imm8)679 void shr(Register dst, uint8_t imm8) { shr(Operand(dst), imm8); }
680 void shr(Operand dst, uint8_t imm8);
shr_cl(Register dst)681 void shr_cl(Register dst) { shr_cl(Operand(dst)); }
682 void shr_cl(Operand dst);
683 void shrd(Register dst, Register src, uint8_t shift);
shrd_cl(Register dst,Register src)684 void shrd_cl(Register dst, Register src) { shrd_cl(Operand(dst), src); }
685 void shrd_cl(Operand dst, Register src);
686
sub(Register dst,const Immediate & imm)687 void sub(Register dst, const Immediate& imm) { sub(Operand(dst), imm); }
688 void sub(Operand dst, const Immediate& x);
sub(Register dst,Register src)689 void sub(Register dst, Register src) { sub(dst, Operand(src)); }
690 void sub(Register dst, Operand src);
691 void sub(Operand dst, Register src);
692 void sub_sp_32(uint32_t imm);
693
694 void test(Register reg, const Immediate& imm);
test(Register reg0,Register reg1)695 void test(Register reg0, Register reg1) { test(reg0, Operand(reg1)); }
696 void test(Register reg, Operand op);
697 void test(Operand op, const Immediate& imm);
test(Operand op,Register reg)698 void test(Operand op, Register reg) { test(reg, op); }
699 void test_b(Register reg, Operand op);
700 void test_b(Register reg, Immediate imm8);
701 void test_b(Operand op, Immediate imm8);
test_b(Operand op,Register reg)702 void test_b(Operand op, Register reg) { test_b(reg, op); }
test_b(Register dst,Register src)703 void test_b(Register dst, Register src) { test_b(dst, Operand(src)); }
704 void test_w(Register reg, Operand op);
705 void test_w(Register reg, Immediate imm16);
706 void test_w(Operand op, Immediate imm16);
test_w(Operand op,Register reg)707 void test_w(Operand op, Register reg) { test_w(reg, op); }
test_w(Register dst,Register src)708 void test_w(Register dst, Register src) { test_w(dst, Operand(src)); }
709
710 void xor_(Register dst, int32_t imm32);
xor_(Register dst,Register src)711 void xor_(Register dst, Register src) { xor_(dst, Operand(src)); }
712 void xor_(Register dst, Operand src);
713 void xor_(Operand dst, Register src);
xor_(Register dst,const Immediate & imm)714 void xor_(Register dst, const Immediate& imm) { xor_(Operand(dst), imm); }
715 void xor_(Operand dst, const Immediate& x);
716
717 // Bit operations.
718 void bswap(Register dst);
719 void bt(Operand dst, Register src);
bts(Register dst,Register src)720 void bts(Register dst, Register src) { bts(Operand(dst), src); }
721 void bts(Operand dst, Register src);
bsr(Register dst,Register src)722 void bsr(Register dst, Register src) { bsr(dst, Operand(src)); }
723 void bsr(Register dst, Operand src);
bsf(Register dst,Register src)724 void bsf(Register dst, Register src) { bsf(dst, Operand(src)); }
725 void bsf(Register dst, Operand src);
726
727 // Miscellaneous
728 void hlt();
729 void int3();
730 void nop();
731 void ret(int imm16);
732 void ud2();
733
734 // Label operations & relative jumps (PPUM Appendix D)
735 //
736 // Takes a branch opcode (cc) and a label (L) and generates
737 // either a backward branch or a forward branch and links it
738 // to the label fixup chain. Usage:
739 //
740 // Label L; // unbound label
741 // j(cc, &L); // forward branch to unbound label
742 // bind(&L); // bind label to the current pc
743 // j(cc, &L); // backward branch to bound label
744 // bind(&L); // illegal: a label may be bound only once
745 //
746 // Note: The same Label can be used for forward and backward branches
747 // but it may be bound only once.
748
749 void bind(Label* L); // binds an unbound label L to the current code position
750
751 // Calls
752 void call(Label* L);
753 void call(Address entry, RelocInfo::Mode rmode);
call(Register reg)754 void call(Register reg) { call(Operand(reg)); }
755 void call(Operand adr);
756 void call(Handle<Code> code, RelocInfo::Mode rmode);
757 void wasm_call(Address address, RelocInfo::Mode rmode);
758
759 // Jumps
760 // unconditional jump to L
761 void jmp(Label* L, Label::Distance distance = Label::kFar);
762 void jmp(Address entry, RelocInfo::Mode rmode);
jmp(Register reg)763 void jmp(Register reg) { jmp(Operand(reg)); }
764 void jmp(Operand adr);
765 void jmp(Handle<Code> code, RelocInfo::Mode rmode);
766 // Unconditional jump relative to the current address. Low-level routine,
767 // use with caution!
768 void jmp_rel(int offset);
769
770 // Conditional jumps
771 void j(Condition cc, Label* L, Label::Distance distance = Label::kFar);
772 void j(Condition cc, byte* entry, RelocInfo::Mode rmode);
773 void j(Condition cc, Handle<Code> code,
774 RelocInfo::Mode rmode = RelocInfo::CODE_TARGET);
775
776 // Floating-point operations
777 void fld(int i);
778 void fstp(int i);
779
780 void fld1();
781 void fldz();
782 void fldpi();
783 void fldln2();
784
785 void fld_s(Operand adr);
786 void fld_d(Operand adr);
787
788 void fstp_s(Operand adr);
789 void fst_s(Operand adr);
790 void fstp_d(Operand adr);
791 void fst_d(Operand adr);
792
793 void fild_s(Operand adr);
794 void fild_d(Operand adr);
795
796 void fist_s(Operand adr);
797
798 void fistp_s(Operand adr);
799 void fistp_d(Operand adr);
800
801 // The fisttp instructions require SSE3.
802 void fisttp_s(Operand adr);
803 void fisttp_d(Operand adr);
804
805 void fabs();
806 void fchs();
807 void fcos();
808 void fsin();
809 void fptan();
810 void fyl2x();
811 void f2xm1();
812 void fscale();
813 void fninit();
814
815 void fadd(int i);
816 void fadd_i(int i);
817 void fsub(int i);
818 void fsub_i(int i);
819 void fmul(int i);
820 void fmul_i(int i);
821 void fdiv(int i);
822 void fdiv_i(int i);
823
824 void fisub_s(Operand adr);
825
826 void faddp(int i = 1);
827 void fsubp(int i = 1);
828 void fsubrp(int i = 1);
829 void fmulp(int i = 1);
830 void fdivp(int i = 1);
831 void fprem();
832 void fprem1();
833
834 void fxch(int i = 1);
835 void fincstp();
836 void ffree(int i = 0);
837
838 void ftst();
839 void fucomp(int i);
840 void fucompp();
841 void fucomi(int i);
842 void fucomip();
843 void fcompp();
844 void fnstsw_ax();
845 void fwait();
846 void fnclex();
847
848 void frndint();
849
850 void sahf();
851 void setcc(Condition cc, Register reg);
852
853 void cpuid();
854
855 // SSE instructions
addss(XMMRegister dst,XMMRegister src)856 void addss(XMMRegister dst, XMMRegister src) { addss(dst, Operand(src)); }
857 void addss(XMMRegister dst, Operand src);
subss(XMMRegister dst,XMMRegister src)858 void subss(XMMRegister dst, XMMRegister src) { subss(dst, Operand(src)); }
859 void subss(XMMRegister dst, Operand src);
mulss(XMMRegister dst,XMMRegister src)860 void mulss(XMMRegister dst, XMMRegister src) { mulss(dst, Operand(src)); }
861 void mulss(XMMRegister dst, Operand src);
divss(XMMRegister dst,XMMRegister src)862 void divss(XMMRegister dst, XMMRegister src) { divss(dst, Operand(src)); }
863 void divss(XMMRegister dst, Operand src);
sqrtss(XMMRegister dst,XMMRegister src)864 void sqrtss(XMMRegister dst, XMMRegister src) { sqrtss(dst, Operand(src)); }
865 void sqrtss(XMMRegister dst, Operand src);
866
ucomiss(XMMRegister dst,XMMRegister src)867 void ucomiss(XMMRegister dst, XMMRegister src) { ucomiss(dst, Operand(src)); }
868 void ucomiss(XMMRegister dst, Operand src);
movaps(XMMRegister dst,XMMRegister src)869 void movaps(XMMRegister dst, XMMRegister src) { movaps(dst, Operand(src)); }
870 void movaps(XMMRegister dst, Operand src);
movups(XMMRegister dst,XMMRegister src)871 void movups(XMMRegister dst, XMMRegister src) { movups(dst, Operand(src)); }
872 void movups(XMMRegister dst, Operand src);
873 void movups(Operand dst, XMMRegister src);
874 void shufps(XMMRegister dst, XMMRegister src, byte imm8);
875 void shufpd(XMMRegister dst, XMMRegister src, byte imm8);
876
877 void movhlps(XMMRegister dst, XMMRegister src);
878 void movlhps(XMMRegister dst, XMMRegister src);
879 void movlps(XMMRegister dst, Operand src);
880 void movlps(Operand dst, XMMRegister src);
881 void movhps(XMMRegister dst, Operand src);
882 void movhps(Operand dst, XMMRegister src);
883
maxss(XMMRegister dst,XMMRegister src)884 void maxss(XMMRegister dst, XMMRegister src) { maxss(dst, Operand(src)); }
885 void maxss(XMMRegister dst, Operand src);
minss(XMMRegister dst,XMMRegister src)886 void minss(XMMRegister dst, XMMRegister src) { minss(dst, Operand(src)); }
887 void minss(XMMRegister dst, Operand src);
888
889 void haddps(XMMRegister dst, Operand src);
haddps(XMMRegister dst,XMMRegister src)890 void haddps(XMMRegister dst, XMMRegister src) { haddps(dst, Operand(src)); }
sqrtpd(XMMRegister dst,Operand src)891 void sqrtpd(XMMRegister dst, Operand src) {
892 sse2_instr(dst, src, 0x66, 0x0F, 0x51);
893 }
sqrtpd(XMMRegister dst,XMMRegister src)894 void sqrtpd(XMMRegister dst, XMMRegister src) { sqrtpd(dst, Operand(src)); }
895
896 void cmpps(XMMRegister dst, Operand src, uint8_t cmp);
cmpps(XMMRegister dst,XMMRegister src,uint8_t cmp)897 void cmpps(XMMRegister dst, XMMRegister src, uint8_t cmp) {
898 cmpps(dst, Operand(src), cmp);
899 }
900 void cmppd(XMMRegister dst, Operand src, uint8_t cmp);
cmppd(XMMRegister dst,XMMRegister src,uint8_t cmp)901 void cmppd(XMMRegister dst, XMMRegister src, uint8_t cmp) {
902 cmppd(dst, Operand(src), cmp);
903 }
904
905 // Packed floating-point comparison operations.
906 #define PACKED_CMP_LIST(V) \
907 V(cmpeq, 0x0) \
908 V(cmplt, 0x1) \
909 V(cmple, 0x2) \
910 V(cmpunord, 0x3) \
911 V(cmpneq, 0x4)
912
913 #define SSE_CMP_P(instr, imm8) \
914 void instr##ps(XMMRegister dst, XMMRegister src) { \
915 cmpps(dst, Operand(src), imm8); \
916 } \
917 void instr##ps(XMMRegister dst, Operand src) { cmpps(dst, src, imm8); } \
918 void instr##pd(XMMRegister dst, XMMRegister src) { \
919 cmppd(dst, Operand(src), imm8); \
920 } \
921 void instr##pd(XMMRegister dst, Operand src) { cmppd(dst, src, imm8); }
922
923 PACKED_CMP_LIST(SSE_CMP_P)
924 #undef SSE_CMP_P
925
926 // SSE2 instructions
927 void cvttss2si(Register dst, Operand src);
cvttss2si(Register dst,XMMRegister src)928 void cvttss2si(Register dst, XMMRegister src) {
929 cvttss2si(dst, Operand(src));
930 }
931 void cvttsd2si(Register dst, Operand src);
cvttsd2si(Register dst,XMMRegister src)932 void cvttsd2si(Register dst, XMMRegister src) {
933 cvttsd2si(dst, Operand(src));
934 }
935 void cvtsd2si(Register dst, XMMRegister src);
936
cvtsi2ss(XMMRegister dst,Register src)937 void cvtsi2ss(XMMRegister dst, Register src) { cvtsi2ss(dst, Operand(src)); }
938 void cvtsi2ss(XMMRegister dst, Operand src);
cvtsi2sd(XMMRegister dst,Register src)939 void cvtsi2sd(XMMRegister dst, Register src) { cvtsi2sd(dst, Operand(src)); }
940 void cvtsi2sd(XMMRegister dst, Operand src);
941 void cvtss2sd(XMMRegister dst, Operand src);
cvtss2sd(XMMRegister dst,XMMRegister src)942 void cvtss2sd(XMMRegister dst, XMMRegister src) {
943 cvtss2sd(dst, Operand(src));
944 }
945 void cvtdq2pd(XMMRegister dst, XMMRegister src);
946 void cvtpd2ps(XMMRegister dst, XMMRegister src);
cvttps2dq(XMMRegister dst,XMMRegister src)947 void cvttps2dq(XMMRegister dst, XMMRegister src) {
948 cvttps2dq(dst, Operand(src));
949 }
950 void cvttps2dq(XMMRegister dst, Operand src);
951 void cvttpd2dq(XMMRegister dst, XMMRegister src);
952
ucomisd(XMMRegister dst,XMMRegister src)953 void ucomisd(XMMRegister dst, XMMRegister src) { ucomisd(dst, Operand(src)); }
954 void ucomisd(XMMRegister dst, Operand src);
955
956 void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
957 void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
958
movapd(XMMRegister dst,XMMRegister src)959 void movapd(XMMRegister dst, XMMRegister src) { movapd(dst, Operand(src)); }
movapd(XMMRegister dst,Operand src)960 void movapd(XMMRegister dst, Operand src) {
961 sse2_instr(dst, src, 0x66, 0x0F, 0x28);
962 }
movupd(XMMRegister dst,Operand src)963 void movupd(XMMRegister dst, Operand src) {
964 sse2_instr(dst, src, 0x66, 0x0F, 0x10);
965 }
966
967 void movmskpd(Register dst, XMMRegister src);
968 void movmskps(Register dst, XMMRegister src);
969
970 void pmovmskb(Register dst, XMMRegister src);
971
972 void cmpltsd(XMMRegister dst, XMMRegister src);
973
974 void movdqa(XMMRegister dst, Operand src);
975 void movdqa(Operand dst, XMMRegister src);
976 void movdqa(XMMRegister dst, XMMRegister src);
977 void movdqu(XMMRegister dst, Operand src);
978 void movdqu(Operand dst, XMMRegister src);
979 void movdqu(XMMRegister dst, XMMRegister src);
movdq(bool aligned,XMMRegister dst,Operand src)980 void movdq(bool aligned, XMMRegister dst, Operand src) {
981 if (aligned) {
982 movdqa(dst, src);
983 } else {
984 movdqu(dst, src);
985 }
986 }
987
movd(XMMRegister dst,Register src)988 void movd(XMMRegister dst, Register src) { movd(dst, Operand(src)); }
989 void movd(XMMRegister dst, Operand src);
movd(Register dst,XMMRegister src)990 void movd(Register dst, XMMRegister src) { movd(Operand(dst), src); }
991 void movd(Operand dst, XMMRegister src);
movsd(XMMRegister dst,XMMRegister src)992 void movsd(XMMRegister dst, XMMRegister src) { movsd(dst, Operand(src)); }
993 void movsd(XMMRegister dst, Operand src);
994 void movsd(Operand dst, XMMRegister src);
995
996 void movss(XMMRegister dst, Operand src);
997 void movss(Operand dst, XMMRegister src);
movss(XMMRegister dst,XMMRegister src)998 void movss(XMMRegister dst, XMMRegister src) { movss(dst, Operand(src)); }
999
1000 void extractps(Operand dst, XMMRegister src, byte imm8);
1001 void extractps(Register dst, XMMRegister src, byte imm8);
1002
1003 void pcmpgtq(XMMRegister dst, XMMRegister src);
1004
1005 void psllw(XMMRegister reg, uint8_t shift);
1006 void pslld(XMMRegister reg, uint8_t shift);
1007 void psrlw(XMMRegister reg, uint8_t shift);
1008 void psrld(XMMRegister reg, uint8_t shift);
1009 void psraw(XMMRegister reg, uint8_t shift);
1010 void psrad(XMMRegister reg, uint8_t shift);
1011 void psllq(XMMRegister reg, uint8_t shift);
1012 void psrlq(XMMRegister reg, uint8_t shift);
1013
pshufhw(XMMRegister dst,XMMRegister src,uint8_t shuffle)1014 void pshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1015 pshufhw(dst, Operand(src), shuffle);
1016 }
1017 void pshufhw(XMMRegister dst, Operand src, uint8_t shuffle);
pshuflw(XMMRegister dst,XMMRegister src,uint8_t shuffle)1018 void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1019 pshuflw(dst, Operand(src), shuffle);
1020 }
1021 void pshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
pshufd(XMMRegister dst,XMMRegister src,uint8_t shuffle)1022 void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1023 pshufd(dst, Operand(src), shuffle);
1024 }
1025 void pshufd(XMMRegister dst, Operand src, uint8_t shuffle);
1026
pblendw(XMMRegister dst,XMMRegister src,uint8_t mask)1027 void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask) {
1028 pblendw(dst, Operand(src), mask);
1029 }
1030 void pblendw(XMMRegister dst, Operand src, uint8_t mask);
1031
palignr(XMMRegister dst,XMMRegister src,uint8_t mask)1032 void palignr(XMMRegister dst, XMMRegister src, uint8_t mask) {
1033 palignr(dst, Operand(src), mask);
1034 }
1035 void palignr(XMMRegister dst, Operand src, uint8_t mask);
1036
pextrb(Register dst,XMMRegister src,uint8_t offset)1037 void pextrb(Register dst, XMMRegister src, uint8_t offset) {
1038 pextrb(Operand(dst), src, offset);
1039 }
1040 void pextrb(Operand dst, XMMRegister src, uint8_t offset);
1041 // SSE3 instructions
1042 void movddup(XMMRegister dst, Operand src);
movddup(XMMRegister dst,XMMRegister src)1043 void movddup(XMMRegister dst, XMMRegister src) { movddup(dst, Operand(src)); }
1044 void movshdup(XMMRegister dst, XMMRegister src);
1045
1046 // Use SSE4_1 encoding for pextrw reg, xmm, imm8 for consistency
pextrw(Register dst,XMMRegister src,uint8_t offset)1047 void pextrw(Register dst, XMMRegister src, uint8_t offset) {
1048 pextrw(Operand(dst), src, offset);
1049 }
1050 void pextrw(Operand dst, XMMRegister src, uint8_t offset);
pextrd(Register dst,XMMRegister src,uint8_t offset)1051 void pextrd(Register dst, XMMRegister src, uint8_t offset) {
1052 pextrd(Operand(dst), src, offset);
1053 }
1054 void pextrd(Operand dst, XMMRegister src, uint8_t offset);
1055
insertps(XMMRegister dst,XMMRegister src,uint8_t offset)1056 void insertps(XMMRegister dst, XMMRegister src, uint8_t offset) {
1057 insertps(dst, Operand(src), offset);
1058 }
1059 void insertps(XMMRegister dst, Operand src, uint8_t offset);
pinsrb(XMMRegister dst,Register src,uint8_t offset)1060 void pinsrb(XMMRegister dst, Register src, uint8_t offset) {
1061 pinsrb(dst, Operand(src), offset);
1062 }
1063 void pinsrb(XMMRegister dst, Operand src, uint8_t offset);
pinsrw(XMMRegister dst,Register src,uint8_t offset)1064 void pinsrw(XMMRegister dst, Register src, uint8_t offset) {
1065 pinsrw(dst, Operand(src), offset);
1066 }
1067 void pinsrw(XMMRegister dst, Operand src, uint8_t offset);
pinsrd(XMMRegister dst,Register src,uint8_t offset)1068 void pinsrd(XMMRegister dst, Register src, uint8_t offset) {
1069 pinsrd(dst, Operand(src), offset);
1070 }
1071 void pinsrd(XMMRegister dst, Operand src, uint8_t offset);
1072
1073 void roundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
1074 void roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1075
1076 // AVX instructions
vaddss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1077 void vaddss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1078 vaddss(dst, src1, Operand(src2));
1079 }
vaddss(XMMRegister dst,XMMRegister src1,Operand src2)1080 void vaddss(XMMRegister dst, XMMRegister src1, Operand src2) {
1081 vss(0x58, dst, src1, src2);
1082 }
vsubss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1083 void vsubss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1084 vsubss(dst, src1, Operand(src2));
1085 }
vsubss(XMMRegister dst,XMMRegister src1,Operand src2)1086 void vsubss(XMMRegister dst, XMMRegister src1, Operand src2) {
1087 vss(0x5c, dst, src1, src2);
1088 }
vmulss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1089 void vmulss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1090 vmulss(dst, src1, Operand(src2));
1091 }
vmulss(XMMRegister dst,XMMRegister src1,Operand src2)1092 void vmulss(XMMRegister dst, XMMRegister src1, Operand src2) {
1093 vss(0x59, dst, src1, src2);
1094 }
vdivss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1095 void vdivss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1096 vdivss(dst, src1, Operand(src2));
1097 }
vdivss(XMMRegister dst,XMMRegister src1,Operand src2)1098 void vdivss(XMMRegister dst, XMMRegister src1, Operand src2) {
1099 vss(0x5e, dst, src1, src2);
1100 }
vmaxss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1101 void vmaxss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1102 vmaxss(dst, src1, Operand(src2));
1103 }
vmaxss(XMMRegister dst,XMMRegister src1,Operand src2)1104 void vmaxss(XMMRegister dst, XMMRegister src1, Operand src2) {
1105 vss(0x5f, dst, src1, src2);
1106 }
vminss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1107 void vminss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1108 vminss(dst, src1, Operand(src2));
1109 }
vminss(XMMRegister dst,XMMRegister src1,Operand src2)1110 void vminss(XMMRegister dst, XMMRegister src1, Operand src2) {
1111 vss(0x5d, dst, src1, src2);
1112 }
vsqrtss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1113 void vsqrtss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1114 vsqrtss(dst, src1, Operand(src2));
1115 }
vsqrtss(XMMRegister dst,XMMRegister src1,Operand src2)1116 void vsqrtss(XMMRegister dst, XMMRegister src1, Operand src2) {
1117 vss(0x51, dst, src1, src2);
1118 }
1119 void vss(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1120
vhaddps(XMMRegister dst,XMMRegister src1,XMMRegister src2)1121 void vhaddps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1122 vhaddps(dst, src1, Operand(src2));
1123 }
vhaddps(XMMRegister dst,XMMRegister src1,Operand src2)1124 void vhaddps(XMMRegister dst, XMMRegister src1, Operand src2) {
1125 vinstr(0x7C, dst, src1, src2, kF2, k0F, kWIG);
1126 }
vsqrtpd(XMMRegister dst,XMMRegister src)1127 void vsqrtpd(XMMRegister dst, XMMRegister src) { vsqrtpd(dst, Operand(src)); }
vsqrtpd(XMMRegister dst,Operand src)1128 void vsqrtpd(XMMRegister dst, Operand src) {
1129 vinstr(0x51, dst, xmm0, src, k66, k0F, kWIG);
1130 }
vmovss(Operand dst,XMMRegister src)1131 void vmovss(Operand dst, XMMRegister src) {
1132 vinstr(0x11, src, xmm0, dst, kF3, k0F, kWIG);
1133 }
vmovss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1134 void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1135 vinstr(0x10, dst, src1, src2, kF3, k0F, kWIG);
1136 }
vmovss(XMMRegister dst,Operand src)1137 void vmovss(XMMRegister dst, Operand src) {
1138 vinstr(0x10, dst, xmm0, src, kF3, k0F, kWIG);
1139 }
vmovsd(Operand dst,XMMRegister src)1140 void vmovsd(Operand dst, XMMRegister src) {
1141 vinstr(0x11, src, xmm0, dst, kF2, k0F, kWIG);
1142 }
vmovsd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1143 void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1144 vinstr(0x10, dst, src1, src2, kF2, k0F, kWIG);
1145 }
vmovsd(XMMRegister dst,Operand src)1146 void vmovsd(XMMRegister dst, Operand src) {
1147 vinstr(0x10, dst, xmm0, src, kF2, k0F, kWIG);
1148 }
1149
1150 void vextractps(Operand dst, XMMRegister src, byte imm8);
1151
1152 void vpcmpgtq(XMMRegister dst, XMMRegister src1, XMMRegister src2);
1153
vmovaps(XMMRegister dst,XMMRegister src)1154 void vmovaps(XMMRegister dst, XMMRegister src) { vmovaps(dst, Operand(src)); }
vmovaps(XMMRegister dst,Operand src)1155 void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); }
vmovapd(XMMRegister dst,XMMRegister src)1156 void vmovapd(XMMRegister dst, XMMRegister src) { vmovapd(dst, Operand(src)); }
vmovapd(XMMRegister dst,Operand src)1157 void vmovapd(XMMRegister dst, Operand src) { vpd(0x28, dst, xmm0, src); }
vmovups(Operand dst,XMMRegister src)1158 void vmovups(Operand dst, XMMRegister src) { vps(0x11, src, xmm0, dst); }
vmovups(XMMRegister dst,XMMRegister src)1159 void vmovups(XMMRegister dst, XMMRegister src) { vmovups(dst, Operand(src)); }
vmovups(XMMRegister dst,Operand src)1160 void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); }
vmovupd(XMMRegister dst,Operand src)1161 void vmovupd(XMMRegister dst, Operand src) { vpd(0x10, dst, xmm0, src); }
vshufps(XMMRegister dst,XMMRegister src1,XMMRegister src2,byte imm8)1162 void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
1163 vshufps(dst, src1, Operand(src2), imm8);
1164 }
1165 void vshufps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
vshufpd(XMMRegister dst,XMMRegister src1,XMMRegister src2,byte imm8)1166 void vshufpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
1167 vshufpd(dst, src1, Operand(src2), imm8);
1168 }
1169 void vshufpd(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
1170
1171 void vmovhlps(XMMRegister dst, XMMRegister src1, XMMRegister src2);
1172 void vmovlhps(XMMRegister dst, XMMRegister src1, XMMRegister src2);
1173 void vmovlps(XMMRegister dst, XMMRegister src1, Operand src2);
1174 void vmovlps(Operand dst, XMMRegister src);
1175 void vmovhps(XMMRegister dst, XMMRegister src1, Operand src2);
1176 void vmovhps(Operand dst, XMMRegister src);
1177
1178 void vpsllw(XMMRegister dst, XMMRegister src, uint8_t imm8);
1179 void vpslld(XMMRegister dst, XMMRegister src, uint8_t imm8);
1180 void vpsllq(XMMRegister dst, XMMRegister src, uint8_t imm8);
1181 void vpsrlw(XMMRegister dst, XMMRegister src, uint8_t imm8);
1182 void vpsrld(XMMRegister dst, XMMRegister src, uint8_t imm8);
1183 void vpsraw(XMMRegister dst, XMMRegister src, uint8_t imm8);
1184 void vpsrad(XMMRegister dst, XMMRegister src, uint8_t imm8);
1185 void vpsrlq(XMMRegister dst, XMMRegister src, uint8_t imm8);
1186
vpshufhw(XMMRegister dst,XMMRegister src,uint8_t shuffle)1187 void vpshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1188 vpshufhw(dst, Operand(src), shuffle);
1189 }
1190 void vpshufhw(XMMRegister dst, Operand src, uint8_t shuffle);
vpshuflw(XMMRegister dst,XMMRegister src,uint8_t shuffle)1191 void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1192 vpshuflw(dst, Operand(src), shuffle);
1193 }
1194 void vpshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
vpshufd(XMMRegister dst,XMMRegister src,uint8_t shuffle)1195 void vpshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1196 vpshufd(dst, Operand(src), shuffle);
1197 }
1198 void vpshufd(XMMRegister dst, Operand src, uint8_t shuffle);
1199
1200 void vblendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1201 XMMRegister mask);
1202 void vblendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1203 XMMRegister mask);
1204 void vpblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1205 XMMRegister mask);
1206
vpblendw(XMMRegister dst,XMMRegister src1,XMMRegister src2,uint8_t mask)1207 void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1208 uint8_t mask) {
1209 vpblendw(dst, src1, Operand(src2), mask);
1210 }
1211 void vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask);
1212
vpalignr(XMMRegister dst,XMMRegister src1,XMMRegister src2,uint8_t mask)1213 void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1214 uint8_t mask) {
1215 vpalignr(dst, src1, Operand(src2), mask);
1216 }
1217 void vpalignr(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask);
1218
vpextrb(Register dst,XMMRegister src,uint8_t offset)1219 void vpextrb(Register dst, XMMRegister src, uint8_t offset) {
1220 vpextrb(Operand(dst), src, offset);
1221 }
1222 void vpextrb(Operand dst, XMMRegister src, uint8_t offset);
vpextrw(Register dst,XMMRegister src,uint8_t offset)1223 void vpextrw(Register dst, XMMRegister src, uint8_t offset) {
1224 vpextrw(Operand(dst), src, offset);
1225 }
1226 void vpextrw(Operand dst, XMMRegister src, uint8_t offset);
vpextrd(Register dst,XMMRegister src,uint8_t offset)1227 void vpextrd(Register dst, XMMRegister src, uint8_t offset) {
1228 vpextrd(Operand(dst), src, offset);
1229 }
1230 void vpextrd(Operand dst, XMMRegister src, uint8_t offset);
1231
vinsertps(XMMRegister dst,XMMRegister src1,XMMRegister src2,uint8_t offset)1232 void vinsertps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1233 uint8_t offset) {
1234 vinsertps(dst, src1, Operand(src2), offset);
1235 }
1236 void vinsertps(XMMRegister dst, XMMRegister src1, Operand src2,
1237 uint8_t offset);
vpinsrb(XMMRegister dst,XMMRegister src1,Register src2,uint8_t offset)1238 void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2,
1239 uint8_t offset) {
1240 vpinsrb(dst, src1, Operand(src2), offset);
1241 }
1242 void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
vpinsrw(XMMRegister dst,XMMRegister src1,Register src2,uint8_t offset)1243 void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2,
1244 uint8_t offset) {
1245 vpinsrw(dst, src1, Operand(src2), offset);
1246 }
1247 void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
vpinsrd(XMMRegister dst,XMMRegister src1,Register src2,uint8_t offset)1248 void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2,
1249 uint8_t offset) {
1250 vpinsrd(dst, src1, Operand(src2), offset);
1251 }
1252 void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
1253
1254 void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1255 RoundingMode mode);
1256 void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1257 RoundingMode mode);
1258 void vroundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
1259 void vroundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1260
vcvtdq2pd(XMMRegister dst,XMMRegister src)1261 void vcvtdq2pd(XMMRegister dst, XMMRegister src) {
1262 vinstr(0xE6, dst, xmm0, src, kF3, k0F, kWIG);
1263 }
vcvtpd2ps(XMMRegister dst,XMMRegister src)1264 void vcvtpd2ps(XMMRegister dst, XMMRegister src) {
1265 vinstr(0x5A, dst, xmm0, src, k66, k0F, kWIG);
1266 }
vcvttps2dq(XMMRegister dst,XMMRegister src)1267 void vcvttps2dq(XMMRegister dst, XMMRegister src) {
1268 vcvttps2dq(dst, Operand(src));
1269 }
vcvttps2dq(XMMRegister dst,Operand src)1270 void vcvttps2dq(XMMRegister dst, Operand src) {
1271 vinstr(0x5B, dst, xmm0, src, kF3, k0F, kWIG);
1272 }
vcvttpd2dq(XMMRegister dst,XMMRegister src)1273 void vcvttpd2dq(XMMRegister dst, XMMRegister src) {
1274 vinstr(0xE6, dst, xmm0, src, k66, k0F, kWIG);
1275 }
vcvttsd2si(Register dst,XMMRegister src)1276 void vcvttsd2si(Register dst, XMMRegister src) {
1277 XMMRegister idst = XMMRegister::from_code(dst.code());
1278 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1279 }
vcvttsd2si(Register dst,Operand src)1280 void vcvttsd2si(Register dst, Operand src) {
1281 XMMRegister idst = XMMRegister::from_code(dst.code());
1282 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1283 }
vcvtss2sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1284 void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1285 vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1286 }
vcvtss2sd(XMMRegister dst,XMMRegister src1,Operand src2)1287 void vcvtss2sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1288 vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1289 }
vcvttss2si(Register dst,XMMRegister src)1290 void vcvttss2si(Register dst, XMMRegister src) {
1291 XMMRegister idst = XMMRegister::from_code(dst.code());
1292 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1293 }
vcvttss2si(Register dst,Operand src)1294 void vcvttss2si(Register dst, Operand src) {
1295 XMMRegister idst = XMMRegister::from_code(dst.code());
1296 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1297 }
1298
vmovddup(XMMRegister dst,Operand src)1299 void vmovddup(XMMRegister dst, Operand src) {
1300 vinstr(0x12, dst, xmm0, src, kF2, k0F, kWIG);
1301 }
vmovddup(XMMRegister dst,XMMRegister src)1302 void vmovddup(XMMRegister dst, XMMRegister src) {
1303 vmovddup(dst, Operand(src));
1304 }
vmovshdup(XMMRegister dst,XMMRegister src)1305 void vmovshdup(XMMRegister dst, XMMRegister src) {
1306 vinstr(0x16, dst, xmm0, src, kF3, k0F, kWIG);
1307 }
vbroadcastss(XMMRegister dst,XMMRegister src)1308 void vbroadcastss(XMMRegister dst, XMMRegister src) {
1309 vinstr(0x18, dst, xmm0, src, k66, k0F38, kW0, AVX2);
1310 }
vbroadcastss(XMMRegister dst,Operand src)1311 void vbroadcastss(XMMRegister dst, Operand src) {
1312 vinstr(0x18, dst, xmm0, src, k66, k0F38, kW0);
1313 }
vmovdqa(XMMRegister dst,Operand src)1314 void vmovdqa(XMMRegister dst, Operand src) {
1315 vinstr(0x6F, dst, xmm0, src, k66, k0F, kWIG);
1316 }
vmovdqa(XMMRegister dst,XMMRegister src)1317 void vmovdqa(XMMRegister dst, XMMRegister src) {
1318 vinstr(0x6F, dst, xmm0, src, k66, k0F, kWIG);
1319 }
vmovdqu(XMMRegister dst,Operand src)1320 void vmovdqu(XMMRegister dst, Operand src) {
1321 vinstr(0x6F, dst, xmm0, src, kF3, k0F, kWIG);
1322 }
vmovdqu(Operand dst,XMMRegister src)1323 void vmovdqu(Operand dst, XMMRegister src) {
1324 vinstr(0x7F, src, xmm0, dst, kF3, k0F, kWIG);
1325 }
vmovd(XMMRegister dst,Register src)1326 void vmovd(XMMRegister dst, Register src) { vmovd(dst, Operand(src)); }
vmovd(XMMRegister dst,Operand src)1327 void vmovd(XMMRegister dst, Operand src) {
1328 vinstr(0x6E, dst, xmm0, src, k66, k0F, kWIG);
1329 }
vmovd(Register dst,XMMRegister src)1330 void vmovd(Register dst, XMMRegister src) { movd(Operand(dst), src); }
vmovd(Operand dst,XMMRegister src)1331 void vmovd(Operand dst, XMMRegister src) {
1332 vinstr(0x7E, src, xmm0, dst, k66, k0F, kWIG);
1333 }
1334
1335 void vmovmskpd(Register dst, XMMRegister src);
1336 void vmovmskps(Register dst, XMMRegister src);
1337
1338 void vpmovmskb(Register dst, XMMRegister src);
1339
vucomisd(XMMRegister dst,XMMRegister src)1340 void vucomisd(XMMRegister dst, XMMRegister src) {
1341 vinstr(0x2E, dst, xmm0, src, k66, k0F, kWIG);
1342 }
vucomisd(XMMRegister dst,Operand src)1343 void vucomisd(XMMRegister dst, Operand src) {
1344 vinstr(0x2E, dst, xmm0, src, k66, k0F, kWIG);
1345 }
vucomiss(XMMRegister dst,XMMRegister src)1346 void vucomiss(XMMRegister dst, XMMRegister src) {
1347 vinstr(0x2E, dst, xmm0, src, kNoPrefix, k0F, kWIG);
1348 }
vucomiss(XMMRegister dst,Operand src)1349 void vucomiss(XMMRegister dst, Operand src) {
1350 vinstr(0x2E, dst, xmm0, src, kNoPrefix, k0F, kWIG);
1351 }
1352
1353 // BMI instruction
andn(Register dst,Register src1,Register src2)1354 void andn(Register dst, Register src1, Register src2) {
1355 andn(dst, src1, Operand(src2));
1356 }
andn(Register dst,Register src1,Operand src2)1357 void andn(Register dst, Register src1, Operand src2) {
1358 bmi1(0xf2, dst, src1, src2);
1359 }
bextr(Register dst,Register src1,Register src2)1360 void bextr(Register dst, Register src1, Register src2) {
1361 bextr(dst, Operand(src1), src2);
1362 }
bextr(Register dst,Operand src1,Register src2)1363 void bextr(Register dst, Operand src1, Register src2) {
1364 bmi1(0xf7, dst, src2, src1);
1365 }
blsi(Register dst,Register src)1366 void blsi(Register dst, Register src) { blsi(dst, Operand(src)); }
blsi(Register dst,Operand src)1367 void blsi(Register dst, Operand src) { bmi1(0xf3, ebx, dst, src); }
blsmsk(Register dst,Register src)1368 void blsmsk(Register dst, Register src) { blsmsk(dst, Operand(src)); }
blsmsk(Register dst,Operand src)1369 void blsmsk(Register dst, Operand src) { bmi1(0xf3, edx, dst, src); }
blsr(Register dst,Register src)1370 void blsr(Register dst, Register src) { blsr(dst, Operand(src)); }
blsr(Register dst,Operand src)1371 void blsr(Register dst, Operand src) { bmi1(0xf3, ecx, dst, src); }
tzcnt(Register dst,Register src)1372 void tzcnt(Register dst, Register src) { tzcnt(dst, Operand(src)); }
1373 void tzcnt(Register dst, Operand src);
1374
lzcnt(Register dst,Register src)1375 void lzcnt(Register dst, Register src) { lzcnt(dst, Operand(src)); }
1376 void lzcnt(Register dst, Operand src);
1377
popcnt(Register dst,Register src)1378 void popcnt(Register dst, Register src) { popcnt(dst, Operand(src)); }
1379 void popcnt(Register dst, Operand src);
1380
bzhi(Register dst,Register src1,Register src2)1381 void bzhi(Register dst, Register src1, Register src2) {
1382 bzhi(dst, Operand(src1), src2);
1383 }
bzhi(Register dst,Operand src1,Register src2)1384 void bzhi(Register dst, Operand src1, Register src2) {
1385 bmi2(kNoPrefix, 0xf5, dst, src2, src1);
1386 }
mulx(Register dst1,Register dst2,Register src)1387 void mulx(Register dst1, Register dst2, Register src) {
1388 mulx(dst1, dst2, Operand(src));
1389 }
mulx(Register dst1,Register dst2,Operand src)1390 void mulx(Register dst1, Register dst2, Operand src) {
1391 bmi2(kF2, 0xf6, dst1, dst2, src);
1392 }
pdep(Register dst,Register src1,Register src2)1393 void pdep(Register dst, Register src1, Register src2) {
1394 pdep(dst, src1, Operand(src2));
1395 }
pdep(Register dst,Register src1,Operand src2)1396 void pdep(Register dst, Register src1, Operand src2) {
1397 bmi2(kF2, 0xf5, dst, src1, src2);
1398 }
pext(Register dst,Register src1,Register src2)1399 void pext(Register dst, Register src1, Register src2) {
1400 pext(dst, src1, Operand(src2));
1401 }
pext(Register dst,Register src1,Operand src2)1402 void pext(Register dst, Register src1, Operand src2) {
1403 bmi2(kF3, 0xf5, dst, src1, src2);
1404 }
sarx(Register dst,Register src1,Register src2)1405 void sarx(Register dst, Register src1, Register src2) {
1406 sarx(dst, Operand(src1), src2);
1407 }
sarx(Register dst,Operand src1,Register src2)1408 void sarx(Register dst, Operand src1, Register src2) {
1409 bmi2(kF3, 0xf7, dst, src2, src1);
1410 }
shlx(Register dst,Register src1,Register src2)1411 void shlx(Register dst, Register src1, Register src2) {
1412 shlx(dst, Operand(src1), src2);
1413 }
shlx(Register dst,Operand src1,Register src2)1414 void shlx(Register dst, Operand src1, Register src2) {
1415 bmi2(k66, 0xf7, dst, src2, src1);
1416 }
shrx(Register dst,Register src1,Register src2)1417 void shrx(Register dst, Register src1, Register src2) {
1418 shrx(dst, Operand(src1), src2);
1419 }
shrx(Register dst,Operand src1,Register src2)1420 void shrx(Register dst, Operand src1, Register src2) {
1421 bmi2(kF2, 0xf7, dst, src2, src1);
1422 }
rorx(Register dst,Register src,byte imm8)1423 void rorx(Register dst, Register src, byte imm8) {
1424 rorx(dst, Operand(src), imm8);
1425 }
1426 void rorx(Register dst, Operand src, byte imm8);
1427
1428 // Implementation of packed single-precision floating-point SSE instructions.
1429 void ps(byte op, XMMRegister dst, Operand src);
1430 // Implementation of packed double-precision floating-point SSE instructions.
1431 void pd(byte op, XMMRegister dst, Operand src);
1432
1433 #define PACKED_OP_LIST(V) \
1434 V(unpckl, 0x14) \
1435 V(and, 0x54) \
1436 V(andn, 0x55) \
1437 V(or, 0x56) \
1438 V(xor, 0x57) \
1439 V(add, 0x58) \
1440 V(mul, 0x59) \
1441 V(sub, 0x5c) \
1442 V(min, 0x5d) \
1443 V(div, 0x5e) \
1444 V(max, 0x5f)
1445
1446 #define SSE_PACKED_OP_DECLARE(name, opcode) \
1447 void name##ps(XMMRegister dst, XMMRegister src) { \
1448 ps(opcode, dst, Operand(src)); \
1449 } \
1450 void name##ps(XMMRegister dst, Operand src) { ps(opcode, dst, src); } \
1451 void name##pd(XMMRegister dst, XMMRegister src) { \
1452 pd(opcode, dst, Operand(src)); \
1453 } \
1454 void name##pd(XMMRegister dst, Operand src) { pd(opcode, dst, src); }
1455
1456 PACKED_OP_LIST(SSE_PACKED_OP_DECLARE)
1457 #undef SSE_PACKED_OP_DECLARE
1458
1459 #define AVX_PACKED_OP_DECLARE(name, opcode) \
1460 void v##name##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1461 vps(opcode, dst, src1, Operand(src2)); \
1462 } \
1463 void v##name##ps(XMMRegister dst, XMMRegister src1, Operand src2) { \
1464 vps(opcode, dst, src1, src2); \
1465 } \
1466 void v##name##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1467 vpd(opcode, dst, src1, Operand(src2)); \
1468 } \
1469 void v##name##pd(XMMRegister dst, XMMRegister src1, Operand src2) { \
1470 vpd(opcode, dst, src1, src2); \
1471 }
1472
1473 PACKED_OP_LIST(AVX_PACKED_OP_DECLARE)
1474 #undef AVX_PACKED_OP_DECLARE
1475 #undef PACKED_OP_LIST
1476
1477 void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1478 void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1479
1480 void vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp);
1481 void vcmppd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp);
1482
1483 #define AVX_CMP_P(instr, imm8) \
1484 void v##instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1485 vcmpps(dst, src1, Operand(src2), imm8); \
1486 } \
1487 void v##instr##ps(XMMRegister dst, XMMRegister src1, Operand src2) { \
1488 vcmpps(dst, src1, src2, imm8); \
1489 } \
1490 void v##instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1491 vcmppd(dst, src1, Operand(src2), imm8); \
1492 } \
1493 void v##instr##pd(XMMRegister dst, XMMRegister src1, Operand src2) { \
1494 vcmppd(dst, src1, src2, imm8); \
1495 }
1496
1497 PACKED_CMP_LIST(AVX_CMP_P)
1498 // vcmpgeps/vcmpgepd only in AVX.
1499 AVX_CMP_P(cmpge, 0xd)
1500 #undef AVX_CMP_P
1501 #undef PACKED_CMP_LIST
1502
1503 // Other SSE and AVX instructions
1504 #define DECLARE_SSE_UNOP_AND_AVX(instruction, escape, opcode) \
1505 void instruction(XMMRegister dst, XMMRegister src) { \
1506 instruction(dst, Operand(src)); \
1507 } \
1508 void instruction(XMMRegister dst, Operand src) { \
1509 sse_instr(dst, src, 0x##escape, 0x##opcode); \
1510 } \
1511 void v##instruction(XMMRegister dst, XMMRegister src) { \
1512 v##instruction(dst, Operand(src)); \
1513 } \
1514 void v##instruction(XMMRegister dst, Operand src) { \
1515 vinstr(0x##opcode, dst, xmm0, src, kNoPrefix, k##escape, kWIG); \
1516 }
1517
1518 SSE_UNOP_INSTRUCTION_LIST(DECLARE_SSE_UNOP_AND_AVX)
1519 #undef DECLARE_SSE_UNOP_AND_AVX
1520
1521 #define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
1522 void instruction(XMMRegister dst, XMMRegister src) { \
1523 instruction(dst, Operand(src)); \
1524 } \
1525 void instruction(XMMRegister dst, Operand src) { \
1526 sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
1527 }
1528
1529 SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
1530 SSE2_INSTRUCTION_LIST_SD(DECLARE_SSE2_INSTRUCTION)
1531 #undef DECLARE_SSE2_INSTRUCTION
1532
1533 #define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1534 void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1535 v##instruction(dst, src1, Operand(src2)); \
1536 } \
1537 void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) { \
1538 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
1539 }
1540
1541 SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
1542 SSE2_INSTRUCTION_LIST_SD(DECLARE_SSE2_AVX_INSTRUCTION)
1543 #undef DECLARE_SSE2_AVX_INSTRUCTION
1544
1545 #define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \
1546 opcode) \
1547 void instruction(XMMRegister dst, XMMRegister src) { \
1548 instruction(dst, Operand(src)); \
1549 } \
1550 void instruction(XMMRegister dst, Operand src) { \
1551 ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1552 }
1553
1554 SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1555 SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1556 #undef DECLARE_SSSE3_INSTRUCTION
1557
1558 #define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \
1559 opcode) \
1560 void instruction(XMMRegister dst, XMMRegister src) { \
1561 instruction(dst, Operand(src)); \
1562 } \
1563 void instruction(XMMRegister dst, Operand src) { \
1564 sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1565 }
1566
1567 SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1568 SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1569 DECLARE_SSE4_INSTRUCTION(blendvps, 66, 0F, 38, 14)
1570 DECLARE_SSE4_INSTRUCTION(blendvpd, 66, 0F, 38, 15)
1571 DECLARE_SSE4_INSTRUCTION(pblendvb, 66, 0F, 38, 10)
1572 #undef DECLARE_SSE4_INSTRUCTION
1573
1574 #define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
1575 opcode) \
1576 void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1577 v##instruction(dst, src1, Operand(src2)); \
1578 } \
1579 void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) { \
1580 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1581 }
1582
1583 SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1584 SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1585 #undef DECLARE_SSE34_AVX_INSTRUCTION
1586
1587 #define DECLARE_SSE4_AVX_RM_INSTRUCTION(instruction, prefix, escape1, escape2, \
1588 opcode) \
1589 void v##instruction(XMMRegister dst, XMMRegister src) { \
1590 v##instruction(dst, Operand(src)); \
1591 } \
1592 void v##instruction(XMMRegister dst, Operand src) { \
1593 vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
1594 }
1595
1596 SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_AVX_RM_INSTRUCTION)
1597 SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_AVX_RM_INSTRUCTION)
1598 #undef DECLARE_SSE4_AVX_RM_INSTRUCTION
1599
1600 // AVX2 instructions
1601 #define AVX2_INSTRUCTION(instr, prefix, escape1, escape2, opcode) \
1602 void instr(XMMRegister dst, XMMRegister src) { \
1603 vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0, \
1604 AVX2); \
1605 } \
1606 void instr(XMMRegister dst, Operand src) { \
1607 vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0, \
1608 AVX2); \
1609 }
1610 AVX2_BROADCAST_LIST(AVX2_INSTRUCTION)
1611 #undef AVX2_INSTRUCTION
1612
1613 #define FMA(instr, length, prefix, escape1, escape2, extension, opcode) \
1614 void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1615 vinstr(0x##opcode, dst, src1, src2, k##length, k##prefix, \
1616 k##escape1##escape2, k##extension, FMA3); \
1617 } \
1618 void instr(XMMRegister dst, XMMRegister src1, Operand src2) { \
1619 vinstr(0x##opcode, dst, src1, src2, k##length, k##prefix, \
1620 k##escape1##escape2, k##extension, FMA3); \
1621 }
1622 FMA_INSTRUCTION_LIST(FMA)
1623 #undef FMA
1624
1625 // Prefetch src position into cache level.
1626 // Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a
1627 // non-temporal
1628 void prefetch(Operand src, int level);
1629 // TODO(lrn): Need SFENCE for movnt?
1630
1631 // Check the code size generated from label to here.
SizeOfCodeGeneratedSince(Label * label)1632 int SizeOfCodeGeneratedSince(Label* label) {
1633 return pc_offset() - label->pos();
1634 }
1635
1636 // Record a deoptimization reason that can be used by a log or cpu profiler.
1637 // Use --trace-deopt to enable.
1638 void RecordDeoptReason(DeoptimizeReason reason, uint32_t node_id,
1639 SourcePosition position, int id);
1640
1641 // Writes a single byte or word of data in the code stream. Used for
1642 // inline tables, e.g., jump-tables.
1643 void db(uint8_t data);
1644 void dd(uint32_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO);
1645 void dq(uint64_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO);
1646 void dp(uintptr_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO) {
1647 dd(data, rmode);
1648 }
1649 void dd(Label* label);
1650
1651 // Check if there is less than kGap bytes available in the buffer.
1652 // If this is the case, we need to grow the buffer before emitting
1653 // an instruction or relocation information.
buffer_overflow()1654 inline bool buffer_overflow() const {
1655 return pc_ >= reloc_info_writer.pos() - kGap;
1656 }
1657
1658 // Get the number of bytes available in the buffer.
available_space()1659 inline int available_space() const { return reloc_info_writer.pos() - pc_; }
1660
1661 static bool IsNop(Address addr);
1662
relocation_writer_size()1663 int relocation_writer_size() {
1664 return (buffer_start_ + buffer_->size()) - reloc_info_writer.pos();
1665 }
1666
1667 // Avoid overflows for displacements etc.
1668 static constexpr int kMaximalBufferSize = 512 * MB;
1669
byte_at(int pos)1670 byte byte_at(int pos) { return buffer_start_[pos]; }
set_byte_at(int pos,byte value)1671 void set_byte_at(int pos, byte value) { buffer_start_[pos] = value; }
1672
1673 protected:
1674 void emit_sse_operand(XMMRegister reg, Operand adr);
1675 void emit_sse_operand(XMMRegister dst, XMMRegister src);
1676 void emit_sse_operand(Register dst, XMMRegister src);
1677 void emit_sse_operand(XMMRegister dst, Register src);
1678
addr_at(int pos)1679 Address addr_at(int pos) {
1680 return reinterpret_cast<Address>(buffer_start_ + pos);
1681 }
1682
1683 private:
long_at(int pos)1684 uint32_t long_at(int pos) {
1685 return ReadUnalignedValue<uint32_t>(addr_at(pos));
1686 }
long_at_put(int pos,uint32_t x)1687 void long_at_put(int pos, uint32_t x) {
1688 WriteUnalignedValue(addr_at(pos), x);
1689 }
1690
1691 // code emission
1692 void GrowBuffer();
1693 inline void emit(uint32_t x);
1694 inline void emit(Handle<HeapObject> handle);
1695 inline void emit(uint32_t x, RelocInfo::Mode rmode);
1696 inline void emit(Handle<Code> code, RelocInfo::Mode rmode);
1697 inline void emit(const Immediate& x);
1698 inline void emit_b(Immediate x);
1699 inline void emit_w(const Immediate& x);
1700 inline void emit_q(uint64_t x);
1701
1702 // Emit the code-object-relative offset of the label's position
1703 inline void emit_code_relative_offset(Label* label);
1704
1705 // instruction generation
1706 void emit_arith_b(int op1, int op2, Register dst, int imm8);
1707
1708 // Emit a basic arithmetic instruction (i.e. first byte of the family is 0x81)
1709 // with a given destination expression and an immediate operand. It attempts
1710 // to use the shortest encoding possible.
1711 // sel specifies the /n in the modrm byte (see the Intel PRM).
1712 void emit_arith(int sel, Operand dst, const Immediate& x);
1713
1714 void emit_operand(int code, Operand adr);
1715 void emit_operand(Register reg, Operand adr);
1716 void emit_operand(XMMRegister reg, Operand adr);
1717
1718 void emit_label(Label* label);
1719
1720 void emit_farith(int b1, int b2, int i);
1721
1722 // Emit vex prefix
1723 enum SIMDPrefix { kNoPrefix = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
1724 enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
1725 enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
1726 enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
1727 inline void emit_vex_prefix(XMMRegister v, VectorLength l, SIMDPrefix pp,
1728 LeadingOpcode m, VexW w);
1729 inline void emit_vex_prefix(Register v, VectorLength l, SIMDPrefix pp,
1730 LeadingOpcode m, VexW w);
1731
1732 // labels
1733 void print(const Label* L);
1734 void bind_to(Label* L, int pos);
1735
1736 // displacements
1737 inline Displacement disp_at(Label* L);
1738 inline void disp_at_put(Label* L, Displacement disp);
1739 inline void emit_disp(Label* L, Displacement::Type type);
1740 inline void emit_near_disp(Label* L);
1741
1742 void sse_instr(XMMRegister dst, Operand src, byte prefix, byte opcode);
1743 void sse2_instr(XMMRegister dst, Operand src, byte prefix, byte escape,
1744 byte opcode);
1745 void ssse3_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1746 byte escape2, byte opcode);
1747 void sse4_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1748 byte escape2, byte opcode);
1749 void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1750 SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature = AVX);
1751 void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
1752 SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature = AVX);
1753 void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1754 VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w,
1755 CpuFeature = AVX);
1756 void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
1757 VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w,
1758 CpuFeature = AVX);
1759 // Most BMI instructions are similar.
1760 void bmi1(byte op, Register reg, Register vreg, Operand rm);
1761 void bmi2(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm);
1762 void fma_instr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1763 VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w);
1764 void fma_instr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
1765 VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w);
1766
1767 // record reloc info for current pc_
1768 void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
1769
1770 // record the position of jmp/jcc instruction
1771 void record_farjmp_position(Label* L, int pos);
1772
1773 bool is_optimizable_farjmp(int idx);
1774
1775 void AllocateAndInstallRequestedHeapObjects(Isolate* isolate);
1776
1777 int WriteCodeComments();
1778
1779 friend class EnsureSpace;
1780
1781 // Internal reference positions, required for (potential) patching in
1782 // GrowBuffer(); contains only those internal references whose labels
1783 // are already bound.
1784 std::deque<int> internal_reference_positions_;
1785
1786 // code generation
1787 RelocInfoWriter reloc_info_writer;
1788
1789 // Variables for this instance of assembler
1790 int farjmp_num_ = 0;
1791 std::deque<int> farjmp_positions_;
1792 std::map<Label*, std::vector<int>> label_farjmp_maps_;
1793 };
1794
1795 // Helper class that ensures that there is enough space for generating
1796 // instructions and relocation information. The constructor makes
1797 // sure that there is enough space and (in debug mode) the destructor
1798 // checks that we did not generate too much.
1799 class EnsureSpace {
1800 public:
EnsureSpace(Assembler * assembler)1801 explicit V8_INLINE EnsureSpace(Assembler* assembler) : assembler_(assembler) {
1802 if (V8_UNLIKELY(assembler_->buffer_overflow())) assembler_->GrowBuffer();
1803 #ifdef DEBUG
1804 space_before_ = assembler->available_space();
1805 #endif
1806 }
1807
1808 #ifdef DEBUG
~EnsureSpace()1809 ~EnsureSpace() {
1810 int bytes_generated = space_before_ - assembler_->available_space();
1811 DCHECK(bytes_generated < assembler_->kGap);
1812 }
1813 #endif
1814
1815 private:
1816 Assembler* const assembler_;
1817 #ifdef DEBUG
1818 int space_before_;
1819 #endif
1820 };
1821
1822 } // namespace internal
1823 } // namespace v8
1824
1825 #endif // V8_CODEGEN_IA32_ASSEMBLER_IA32_H_
1826