1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 //
11 // - Redistribution in binary form must reproduce the above copyright
12 // notice, this list of conditions and the following disclaimer in the
13 // documentation and/or other materials provided with the distribution.
14 //
15 // - Neither the name of Sun Microsystems or the names of contributors may
16 // be used to endorse or promote products derived from this software without
17 // specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // The original source code covered by the above license above has been
32 // modified significantly by Google Inc.
33 // Copyright 2012 the V8 project authors. All rights reserved.
34
35 // A lightweight X64 Assembler.
36
37 #ifndef V8_CODEGEN_X64_ASSEMBLER_X64_H_
38 #define V8_CODEGEN_X64_ASSEMBLER_X64_H_
39
40 #include <deque>
41 #include <map>
42 #include <memory>
43 #include <vector>
44
45 #include "src/base/export-template.h"
46 #include "src/codegen/assembler.h"
47 #include "src/codegen/cpu-features.h"
48 #include "src/codegen/label.h"
49 #include "src/codegen/x64/constants-x64.h"
50 #include "src/codegen/x64/fma-instr.h"
51 #include "src/codegen/x64/register-x64.h"
52 #include "src/codegen/x64/sse-instr.h"
53 #include "src/objects/smi.h"
54 #if defined(V8_OS_WIN_X64)
55 #include "src/diagnostics/unwinding-info-win64.h"
56 #endif
57
58 namespace v8 {
59 namespace internal {
60
61 class SafepointTableBuilder;
62
63 // Utility functions
64
65 enum Condition {
66 // any value < 0 is considered no_condition
67 no_condition = -1,
68
69 overflow = 0,
70 no_overflow = 1,
71 below = 2,
72 above_equal = 3,
73 equal = 4,
74 not_equal = 5,
75 below_equal = 6,
76 above = 7,
77 negative = 8,
78 positive = 9,
79 parity_even = 10,
80 parity_odd = 11,
81 less = 12,
82 greater_equal = 13,
83 less_equal = 14,
84 greater = 15,
85
86 // Fake conditions that are handled by the
87 // opcodes using them.
88 always = 16,
89 never = 17,
90 // aliases
91 carry = below,
92 not_carry = above_equal,
93 zero = equal,
94 not_zero = not_equal,
95 sign = negative,
96 not_sign = positive,
97 last_condition = greater
98 };
99
100 // Returns the equivalent of !cc.
101 // Negation of the default no_condition (-1) results in a non-default
102 // no_condition value (-2). As long as tests for no_condition check
103 // for condition < 0, this will work as expected.
NegateCondition(Condition cc)104 inline Condition NegateCondition(Condition cc) {
105 return static_cast<Condition>(cc ^ 1);
106 }
107
108 enum RoundingMode {
109 kRoundToNearest = 0x0,
110 kRoundDown = 0x1,
111 kRoundUp = 0x2,
112 kRoundToZero = 0x3
113 };
114
115 // -----------------------------------------------------------------------------
116 // Machine instruction Immediates
117
118 class Immediate {
119 public:
Immediate(int32_t value)120 explicit constexpr Immediate(int32_t value) : value_(value) {}
Immediate(int32_t value,RelocInfo::Mode rmode)121 explicit constexpr Immediate(int32_t value, RelocInfo::Mode rmode)
122 : value_(value), rmode_(rmode) {}
Immediate(Smi value)123 explicit Immediate(Smi value)
124 : value_(static_cast<int32_t>(static_cast<intptr_t>(value.ptr()))) {
125 DCHECK(SmiValuesAre31Bits()); // Only available for 31-bit SMI.
126 }
127
value()128 int32_t value() const { return value_; }
rmode()129 RelocInfo::Mode rmode() const { return rmode_; }
130
131 private:
132 const int32_t value_;
133 const RelocInfo::Mode rmode_ = RelocInfo::NO_INFO;
134
135 friend class Assembler;
136 };
137 ASSERT_TRIVIALLY_COPYABLE(Immediate);
138 static_assert(sizeof(Immediate) <= kSystemPointerSize,
139 "Immediate must be small enough to pass it by value");
140
141 class Immediate64 {
142 public:
Immediate64(int64_t value)143 explicit constexpr Immediate64(int64_t value) : value_(value) {}
Immediate64(int64_t value,RelocInfo::Mode rmode)144 explicit constexpr Immediate64(int64_t value, RelocInfo::Mode rmode)
145 : value_(value), rmode_(rmode) {}
Immediate64(Address value,RelocInfo::Mode rmode)146 explicit constexpr Immediate64(Address value, RelocInfo::Mode rmode)
147 : value_(static_cast<int64_t>(value)), rmode_(rmode) {}
148
149 private:
150 const int64_t value_;
151 const RelocInfo::Mode rmode_ = RelocInfo::NO_INFO;
152
153 friend class Assembler;
154 };
155
156 // -----------------------------------------------------------------------------
157 // Machine instruction Operands
158
159 enum ScaleFactor : int8_t {
160 times_1 = 0,
161 times_2 = 1,
162 times_4 = 2,
163 times_8 = 3,
164 times_int_size = times_4,
165
166 times_half_system_pointer_size = times_4,
167 times_system_pointer_size = times_8,
168 times_tagged_size = (kTaggedSize == 8) ? times_8 : times_4,
169 };
170
171 class V8_EXPORT_PRIVATE Operand {
172 public:
173 struct Data {
174 byte rex = 0;
175 byte buf[9];
176 byte len = 1; // number of bytes of buf_ in use.
177 int8_t addend; // for rip + offset + addend.
178 };
179
180 // [base + disp/r]
Operand(Register base,int32_t disp)181 V8_INLINE Operand(Register base, int32_t disp) {
182 if (base == rsp || base == r12) {
183 // SIB byte is needed to encode (rsp + offset) or (r12 + offset).
184 set_sib(times_1, rsp, base);
185 }
186
187 if (disp == 0 && base != rbp && base != r13) {
188 set_modrm(0, base);
189 } else if (is_int8(disp)) {
190 set_modrm(1, base);
191 set_disp8(disp);
192 } else {
193 set_modrm(2, base);
194 set_disp32(disp);
195 }
196 }
197
198 // [base + index*scale + disp/r]
Operand(Register base,Register index,ScaleFactor scale,int32_t disp)199 V8_INLINE Operand(Register base, Register index, ScaleFactor scale,
200 int32_t disp) {
201 DCHECK(index != rsp);
202 set_sib(scale, index, base);
203 if (disp == 0 && base != rbp && base != r13) {
204 // This call to set_modrm doesn't overwrite the REX.B (or REX.X) bits
205 // possibly set by set_sib.
206 set_modrm(0, rsp);
207 } else if (is_int8(disp)) {
208 set_modrm(1, rsp);
209 set_disp8(disp);
210 } else {
211 set_modrm(2, rsp);
212 set_disp32(disp);
213 }
214 }
215
216 // [index*scale + disp/r]
Operand(Register index,ScaleFactor scale,int32_t disp)217 V8_INLINE Operand(Register index, ScaleFactor scale, int32_t disp) {
218 DCHECK(index != rsp);
219 set_modrm(0, rsp);
220 set_sib(scale, index, rbp);
221 set_disp32(disp);
222 }
223
224 // Offset from existing memory operand.
225 // Offset is added to existing displacement as 32-bit signed values and
226 // this must not overflow.
227 Operand(Operand base, int32_t offset);
228
229 // [rip + disp/r]
230 V8_INLINE explicit Operand(Label* label, int addend = 0) {
231 data_.addend = addend;
232 DCHECK_NOT_NULL(label);
233 DCHECK(addend == 0 || (is_int8(addend) && label->is_bound()));
234 set_modrm(0, rbp);
235 set_disp64(reinterpret_cast<intptr_t>(label));
236 }
237
238 Operand(const Operand&) V8_NOEXCEPT = default;
239 Operand& operator=(const Operand&) V8_NOEXCEPT = default;
240
data()241 const Data& data() const { return data_; }
242
243 // Checks whether either base or index register is the given register.
244 // Does not check the "reg" part of the Operand.
245 bool AddressUsesRegister(Register reg) const;
246
247 private:
set_modrm(int mod,Register rm_reg)248 V8_INLINE void set_modrm(int mod, Register rm_reg) {
249 DCHECK(is_uint2(mod));
250 data_.buf[0] = mod << 6 | rm_reg.low_bits();
251 // Set REX.B to the high bit of rm.code().
252 data_.rex |= rm_reg.high_bit();
253 }
254
set_sib(ScaleFactor scale,Register index,Register base)255 V8_INLINE void set_sib(ScaleFactor scale, Register index, Register base) {
256 DCHECK_EQ(data_.len, 1);
257 DCHECK(is_uint2(scale));
258 // Use SIB with no index register only for base rsp or r12. Otherwise we
259 // would skip the SIB byte entirely.
260 DCHECK(index != rsp || base == rsp || base == r12);
261 data_.buf[1] = (scale << 6) | (index.low_bits() << 3) | base.low_bits();
262 data_.rex |= index.high_bit() << 1 | base.high_bit();
263 data_.len = 2;
264 }
265
set_disp8(int disp)266 V8_INLINE void set_disp8(int disp) {
267 DCHECK(is_int8(disp));
268 DCHECK(data_.len == 1 || data_.len == 2);
269 int8_t* p = reinterpret_cast<int8_t*>(&data_.buf[data_.len]);
270 *p = disp;
271 data_.len += sizeof(int8_t);
272 }
273
set_disp32(int disp)274 V8_INLINE void set_disp32(int disp) {
275 DCHECK(data_.len == 1 || data_.len == 2);
276 Address p = reinterpret_cast<Address>(&data_.buf[data_.len]);
277 WriteUnalignedValue(p, disp);
278 data_.len += sizeof(int32_t);
279 }
280
set_disp64(int64_t disp)281 V8_INLINE void set_disp64(int64_t disp) {
282 DCHECK_EQ(1, data_.len);
283 Address p = reinterpret_cast<Address>(&data_.buf[data_.len]);
284 WriteUnalignedValue(p, disp);
285 data_.len += sizeof(disp);
286 }
287
288 Data data_;
289 };
290 ASSERT_TRIVIALLY_COPYABLE(Operand);
291 static_assert(sizeof(Operand) <= 2 * kSystemPointerSize,
292 "Operand must be small enough to pass it by value");
293
294 #define ASSEMBLER_INSTRUCTION_LIST(V) \
295 V(add) \
296 V(and) \
297 V(cmp) \
298 V(cmpxchg) \
299 V(dec) \
300 V(idiv) \
301 V(div) \
302 V(imul) \
303 V(inc) \
304 V(lea) \
305 V(mov) \
306 V(movzxb) \
307 V(movzxw) \
308 V(not) \
309 V(or) \
310 V(repmovs) \
311 V(sbb) \
312 V(sub) \
313 V(test) \
314 V(xchg) \
315 V(xor)
316
317 // Shift instructions on operands/registers with kInt32Size and kInt64Size.
318 #define SHIFT_INSTRUCTION_LIST(V) \
319 V(rol, 0x0) \
320 V(ror, 0x1) \
321 V(rcl, 0x2) \
322 V(rcr, 0x3) \
323 V(shl, 0x4) \
324 V(shr, 0x5) \
325 V(sar, 0x7)
326
327 // Partial Constant Pool
328 // Different from complete constant pool (like arm does), partial constant pool
329 // only takes effects for shareable constants in order to reduce code size.
330 // Partial constant pool does not emit constant pool entries at the end of each
331 // code object. Instead, it keeps the first shareable constant inlined in the
332 // instructions and uses rip-relative memory loadings for the same constants in
333 // subsequent instructions. These rip-relative memory loadings will target at
334 // the position of the first inlined constant. For example:
335 //
336 // REX.W movq r10,0x7f9f75a32c20 ; 10 bytes
337 // …
338 // REX.W movq r10,0x7f9f75a32c20 ; 10 bytes
339 // …
340 //
341 // turns into
342 //
343 // REX.W movq r10,0x7f9f75a32c20 ; 10 bytes
344 // …
345 // REX.W movq r10,[rip+0xffffff96] ; 7 bytes
346 // …
347
348 class ConstPool {
349 public:
ConstPool(Assembler * assm)350 explicit ConstPool(Assembler* assm) : assm_(assm) {}
351 // Returns true when partial constant pool is valid for this entry.
352 bool TryRecordEntry(intptr_t data, RelocInfo::Mode mode);
IsEmpty()353 bool IsEmpty() const { return entries_.empty(); }
354
355 void PatchEntries();
356 // Discard any pending pool entries.
357 void Clear();
358
359 private:
360 // Adds a shared entry to entries_. Returns true if this is not the first time
361 // we add this entry, false otherwise.
362 bool AddSharedEntry(uint64_t data, int offset);
363
364 // Check if the instruction is a rip-relative move.
365 bool IsMoveRipRelative(Address instr);
366
367 Assembler* assm_;
368
369 // Values, pc offsets of entries.
370 using EntryMap = std::multimap<uint64_t, int>;
371 EntryMap entries_;
372
373 // Number of bytes taken up by the displacement of rip-relative addressing.
374 static constexpr int kRipRelativeDispSize = 4; // 32-bit displacement.
375 // Distance between the address of the displacement in the rip-relative move
376 // instruction and the head address of the instruction.
377 static constexpr int kMoveRipRelativeDispOffset =
378 3; // REX Opcode ModRM Displacement
379 // Distance between the address of the imm64 in the 'movq reg, imm64'
380 // instruction and the head address of the instruction.
381 static constexpr int kMoveImm64Offset = 2; // REX Opcode imm64
382 // A mask for rip-relative move instruction.
383 static constexpr uint32_t kMoveRipRelativeMask = 0x00C7FFFB;
384 // The bits for a rip-relative move instruction after mask.
385 static constexpr uint32_t kMoveRipRelativeInstr = 0x00058B48;
386 };
387
388 class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
389 private:
390 // We check before assembling an instruction that there is sufficient
391 // space to write an instruction and its relocation information.
392 // The relocation writer's position must be kGap bytes above the end of
393 // the generated instructions. This leaves enough space for the
394 // longest possible x64 instruction, 15 bytes, and the longest possible
395 // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
396 // (There is a 15 byte limit on x64 instruction length that rules out some
397 // otherwise valid instructions.)
398 // This allows for a single, fast space check per instruction.
399 static constexpr int kGap = 32;
400 STATIC_ASSERT(AssemblerBase::kMinimalBufferSize >= 2 * kGap);
401
402 public:
403 // Create an assembler. Instructions and relocation information are emitted
404 // into a buffer, with the instructions starting from the beginning and the
405 // relocation information starting from the end of the buffer. See CodeDesc
406 // for a detailed comment on the layout (globals.h).
407 //
408 // If the provided buffer is nullptr, the assembler allocates and grows its
409 // own buffer. Otherwise it takes ownership of the provided buffer.
410 explicit Assembler(const AssemblerOptions&,
411 std::unique_ptr<AssemblerBuffer> = {});
412 ~Assembler() override = default;
413
414 // GetCode emits any pending (non-emitted) code and fills the descriptor desc.
415 static constexpr int kNoHandlerTable = 0;
416 static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr;
417 void GetCode(Isolate* isolate, CodeDesc* desc,
418 SafepointTableBuilder* safepoint_table_builder,
419 int handler_table_offset);
420
421 // Convenience wrapper for code without safepoint or handler tables.
GetCode(Isolate * isolate,CodeDesc * desc)422 void GetCode(Isolate* isolate, CodeDesc* desc) {
423 GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable);
424 }
425
426 void FinalizeJumpOptimizationInfo();
427
428 // Unused on this architecture.
MaybeEmitOutOfLineConstantPool()429 void MaybeEmitOutOfLineConstantPool() {}
430
431 // Read/Modify the code target in the relative branch/call instruction at pc.
432 // On the x64 architecture, we use relative jumps with a 32-bit displacement
433 // to jump to other Code objects in the Code space in the heap.
434 // Jumps to C functions are done indirectly through a 64-bit register holding
435 // the absolute address of the target.
436 // These functions convert between absolute Addresses of Code objects and
437 // the relative displacements stored in the code.
438 // The isolate argument is unused (and may be nullptr) when skipping flushing.
439 static inline Address target_address_at(Address pc, Address constant_pool);
440 static inline void set_target_address_at(
441 Address pc, Address constant_pool, Address target,
442 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
443 static inline int32_t relative_target_offset(Address target, Address pc);
444
445 // This sets the branch destination (which is in the instruction on x64).
446 // This is for calls and branches within generated code.
447 inline static void deserialization_set_special_target_at(
448 Address instruction_payload, Code code, Address target);
449
450 // Get the size of the special target encoded at 'instruction_payload'.
451 inline static int deserialization_special_target_size(
452 Address instruction_payload);
453
454 // This sets the internal reference at the pc.
455 inline static void deserialization_set_target_internal_reference_at(
456 Address pc, Address target,
457 RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
458
459 inline Handle<CodeT> code_target_object_handle_at(Address pc);
460 inline Handle<HeapObject> compressed_embedded_object_handle_at(Address pc);
461 inline Address runtime_entry_at(Address pc);
462
463 // Number of bytes taken up by the branch target in the code.
464 static constexpr int kSpecialTargetSize = 4; // 32-bit displacement.
465
466 // One byte opcode for test eax,0xXXXXXXXX.
467 static constexpr byte kTestEaxByte = 0xA9;
468 // One byte opcode for test al, 0xXX.
469 static constexpr byte kTestAlByte = 0xA8;
470 // One byte opcode for nop.
471 static constexpr byte kNopByte = 0x90;
472
473 // One byte prefix for a short conditional jump.
474 static constexpr byte kJccShortPrefix = 0x70;
475 static constexpr byte kJncShortOpcode = kJccShortPrefix | not_carry;
476 static constexpr byte kJcShortOpcode = kJccShortPrefix | carry;
477 static constexpr byte kJnzShortOpcode = kJccShortPrefix | not_zero;
478 static constexpr byte kJzShortOpcode = kJccShortPrefix | zero;
479
480 // VEX prefix encodings.
481 enum SIMDPrefix { kNoPrefix = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
482 enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
483 enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
484 enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
485
486 // ---------------------------------------------------------------------------
487 // Code generation
488 //
489 // Function names correspond one-to-one to x64 instruction mnemonics.
490 // Unless specified otherwise, instructions operate on 64-bit operands.
491 //
492 // If we need versions of an assembly instruction that operate on different
493 // width arguments, we add a single-letter suffix specifying the width.
494 // This is done for the following instructions: mov, cmp, inc, dec,
495 // add, sub, and test.
496 // There are no versions of these instructions without the suffix.
497 // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'.
498 // - Instructions on 16-bit (word) operands/registers have a trailing 'w'.
499 // - Instructions on 32-bit (doubleword) operands/registers use 'l'.
500 // - Instructions on 64-bit (quadword) operands/registers use 'q'.
501 // - Instructions on operands/registers with pointer size use 'p'.
502
503 #define DECLARE_INSTRUCTION(instruction) \
504 template <typename... Ps> \
505 void instruction##_tagged(Ps... ps) { \
506 emit_##instruction(ps..., kTaggedSize); \
507 } \
508 \
509 template <typename... Ps> \
510 void instruction##l(Ps... ps) { \
511 emit_##instruction(ps..., kInt32Size); \
512 } \
513 \
514 template <typename... Ps> \
515 void instruction##q(Ps... ps) { \
516 emit_##instruction(ps..., kInt64Size); \
517 }
518 ASSEMBLER_INSTRUCTION_LIST(DECLARE_INSTRUCTION)
519 #undef DECLARE_INSTRUCTION
520
521 // Insert the smallest number of nop instructions
522 // possible to align the pc offset to a multiple
523 // of m, where m must be a power of 2.
524 void Align(int m);
525 // Insert the smallest number of zero bytes possible to align the pc offset
526 // to a mulitple of m. m must be a power of 2 (>= 2).
527 void DataAlign(int m);
528 void Nop(int bytes = 1);
529 // Aligns code to something that's optimal for a jump target for the platform.
530 void CodeTargetAlign();
531 void LoopHeaderAlign();
532
533 // Stack
534 void pushfq();
535 void popfq();
536
537 void pushq(Immediate value);
538 // Push a 32 bit integer, and guarantee that it is actually pushed as a
539 // 32 bit value, the normal push will optimize the 8 bit case.
540 void pushq_imm32(int32_t imm32);
541 void pushq(Register src);
542 void pushq(Operand src);
543
544 void popq(Register dst);
545 void popq(Operand dst);
546
547 void incsspq(Register number_of_words);
548
549 void leave();
550
551 // Moves
552 void movb(Register dst, Operand src);
553 void movb(Register dst, Immediate imm);
554 void movb(Operand dst, Register src);
555 void movb(Operand dst, Immediate imm);
556
557 // Move the low 16 bits of a 64-bit register value to a 16-bit
558 // memory location.
559 void movw(Register dst, Operand src);
560 void movw(Operand dst, Register src);
561 void movw(Operand dst, Immediate imm);
562
563 // Move the offset of the label location relative to the current
564 // position (after the move) to the destination.
565 void movl(Operand dst, Label* src);
566
567 // Load a heap number into a register.
568 // The heap number will not be allocated and embedded into the code right
569 // away. Instead, we emit the load of a dummy object. Later, when calling
570 // Assembler::GetCode, the heap number will be allocated and the code will be
571 // patched by replacing the dummy with the actual object. The RelocInfo for
572 // the embedded object gets already recorded correctly when emitting the dummy
573 // move.
574 void movq_heap_number(Register dst, double value);
575
576 void movq_string(Register dst, const StringConstantBase* str);
577
578 // Loads a 64-bit immediate into a register, potentially using the constant
579 // pool.
movq(Register dst,int64_t value)580 void movq(Register dst, int64_t value) { movq(dst, Immediate64(value)); }
movq(Register dst,uint64_t value)581 void movq(Register dst, uint64_t value) {
582 movq(dst, Immediate64(static_cast<int64_t>(value)));
583 }
584
585 // Loads a 64-bit immediate into a register without using the constant pool.
586 void movq_imm64(Register dst, int64_t value);
587
588 void movsxbl(Register dst, Register src);
589 void movsxbl(Register dst, Operand src);
590 void movsxbq(Register dst, Register src);
591 void movsxbq(Register dst, Operand src);
592 void movsxwl(Register dst, Register src);
593 void movsxwl(Register dst, Operand src);
594 void movsxwq(Register dst, Register src);
595 void movsxwq(Register dst, Operand src);
596 void movsxlq(Register dst, Register src);
597 void movsxlq(Register dst, Operand src);
598
599 // Repeated moves.
600 void repmovsb();
601 void repmovsw();
repmovsl()602 void repmovsl() { emit_repmovs(kInt32Size); }
repmovsq()603 void repmovsq() { emit_repmovs(kInt64Size); }
604
605 // Repeated store of doublewords (fill (E)CX bytes at ES:[(E)DI] with EAX).
606 void repstosl();
607 // Repeated store of quadwords (fill RCX quadwords at [RDI] with RAX).
608 void repstosq();
609
610 // Instruction to load from an immediate 64-bit pointer into RAX.
611 void load_rax(Address value, RelocInfo::Mode rmode);
612 void load_rax(ExternalReference ext);
613
614 // Conditional moves.
615 void cmovq(Condition cc, Register dst, Register src);
616 void cmovq(Condition cc, Register dst, Operand src);
617 void cmovl(Condition cc, Register dst, Register src);
618 void cmovl(Condition cc, Register dst, Operand src);
619
cmpb(Register dst,Immediate src)620 void cmpb(Register dst, Immediate src) {
621 immediate_arithmetic_op_8(0x7, dst, src);
622 }
623
624 void cmpb_al(Immediate src);
625
cmpb(Register dst,Register src)626 void cmpb(Register dst, Register src) { arithmetic_op_8(0x3A, dst, src); }
627
cmpb(Register dst,Operand src)628 void cmpb(Register dst, Operand src) { arithmetic_op_8(0x3A, dst, src); }
629
cmpb(Operand dst,Register src)630 void cmpb(Operand dst, Register src) { arithmetic_op_8(0x38, src, dst); }
631
cmpb(Operand dst,Immediate src)632 void cmpb(Operand dst, Immediate src) {
633 immediate_arithmetic_op_8(0x7, dst, src);
634 }
635
cmpw(Operand dst,Immediate src)636 void cmpw(Operand dst, Immediate src) {
637 immediate_arithmetic_op_16(0x7, dst, src);
638 }
639
cmpw(Register dst,Immediate src)640 void cmpw(Register dst, Immediate src) {
641 immediate_arithmetic_op_16(0x7, dst, src);
642 }
643
cmpw(Register dst,Operand src)644 void cmpw(Register dst, Operand src) { arithmetic_op_16(0x3B, dst, src); }
645
cmpw(Register dst,Register src)646 void cmpw(Register dst, Register src) { arithmetic_op_16(0x3B, dst, src); }
647
cmpw(Operand dst,Register src)648 void cmpw(Operand dst, Register src) { arithmetic_op_16(0x39, src, dst); }
649
testb(Register reg,Operand op)650 void testb(Register reg, Operand op) { testb(op, reg); }
651
testw(Register reg,Operand op)652 void testw(Register reg, Operand op) { testw(op, reg); }
653
andb(Register dst,Immediate src)654 void andb(Register dst, Immediate src) {
655 immediate_arithmetic_op_8(0x4, dst, src);
656 }
657
658 void decb(Register dst);
659 void decb(Operand dst);
660
661 // Lock prefix.
662 void lock();
663
664 void xchgb(Register reg, Operand op);
665 void xchgw(Register reg, Operand op);
666
667 void xaddb(Operand dst, Register src);
668 void xaddw(Operand dst, Register src);
669 void xaddl(Operand dst, Register src);
670 void xaddq(Operand dst, Register src);
671
672 void negb(Register reg);
673 void negw(Register reg);
674 void negl(Register reg);
675 void negq(Register reg);
676 void negb(Operand op);
677 void negw(Operand op);
678 void negl(Operand op);
679 void negq(Operand op);
680
681 void cmpxchgb(Operand dst, Register src);
682 void cmpxchgw(Operand dst, Register src);
683
684 // Sign-extends rax into rdx:rax.
685 void cqo();
686 // Sign-extends eax into edx:eax.
687 void cdq();
688
689 // Multiply eax by src, put the result in edx:eax.
690 void mull(Register src);
691 void mull(Operand src);
692 // Multiply rax by src, put the result in rdx:rax.
693 void mulq(Register src);
694
695 #define DECLARE_SHIFT_INSTRUCTION(instruction, subcode) \
696 void instruction##l(Register dst, Immediate imm8) { \
697 shift(dst, imm8, subcode, kInt32Size); \
698 } \
699 \
700 void instruction##q(Register dst, Immediate imm8) { \
701 shift(dst, imm8, subcode, kInt64Size); \
702 } \
703 \
704 void instruction##l(Operand dst, Immediate imm8) { \
705 shift(dst, imm8, subcode, kInt32Size); \
706 } \
707 \
708 void instruction##q(Operand dst, Immediate imm8) { \
709 shift(dst, imm8, subcode, kInt64Size); \
710 } \
711 \
712 void instruction##l_cl(Register dst) { shift(dst, subcode, kInt32Size); } \
713 \
714 void instruction##q_cl(Register dst) { shift(dst, subcode, kInt64Size); } \
715 \
716 void instruction##l_cl(Operand dst) { shift(dst, subcode, kInt32Size); } \
717 \
718 void instruction##q_cl(Operand dst) { shift(dst, subcode, kInt64Size); }
719 SHIFT_INSTRUCTION_LIST(DECLARE_SHIFT_INSTRUCTION)
720 #undef DECLARE_SHIFT_INSTRUCTION
721
722 // Shifts dst:src left by cl bits, affecting only dst.
723 void shld(Register dst, Register src);
724
725 // Shifts src:dst right by cl bits, affecting only dst.
726 void shrd(Register dst, Register src);
727
728 void store_rax(Address dst, RelocInfo::Mode mode);
729 void store_rax(ExternalReference ref);
730
subb(Register dst,Immediate src)731 void subb(Register dst, Immediate src) {
732 immediate_arithmetic_op_8(0x5, dst, src);
733 }
734
735 void sub_sp_32(uint32_t imm);
736
737 void testb(Register dst, Register src);
738 void testb(Register reg, Immediate mask);
739 void testb(Operand op, Immediate mask);
740 void testb(Operand op, Register reg);
741
742 void testw(Register dst, Register src);
743 void testw(Register reg, Immediate mask);
744 void testw(Operand op, Immediate mask);
745 void testw(Operand op, Register reg);
746
747 // Bit operations.
748 void bswapl(Register dst);
749 void bswapq(Register dst);
750 void btq(Operand dst, Register src);
751 void btsq(Operand dst, Register src);
752 void btsq(Register dst, Immediate imm8);
753 void btrq(Register dst, Immediate imm8);
754 void bsrq(Register dst, Register src);
755 void bsrq(Register dst, Operand src);
756 void bsrl(Register dst, Register src);
757 void bsrl(Register dst, Operand src);
758 void bsfq(Register dst, Register src);
759 void bsfq(Register dst, Operand src);
760 void bsfl(Register dst, Register src);
761 void bsfl(Register dst, Operand src);
762
763 // Miscellaneous
764 void clc();
765 void cld();
766 void cpuid();
767 void hlt();
768 void int3();
769 void nop();
770 void ret(int imm16);
771 void ud2();
772 void setcc(Condition cc, Register reg);
773
774 void pblendw(XMMRegister dst, Operand src, uint8_t mask);
775 void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask);
776 void palignr(XMMRegister dst, Operand src, uint8_t mask);
777 void palignr(XMMRegister dst, XMMRegister src, uint8_t mask);
778
779 // Label operations & relative jumps (PPUM Appendix D)
780 //
781 // Takes a branch opcode (cc) and a label (L) and generates
782 // either a backward branch or a forward branch and links it
783 // to the label fixup chain. Usage:
784 //
785 // Label L; // unbound label
786 // j(cc, &L); // forward branch to unbound label
787 // bind(&L); // bind label to the current pc
788 // j(cc, &L); // backward branch to bound label
789 // bind(&L); // illegal: a label may be bound only once
790 //
791 // Note: The same Label can be used for forward and backward branches
792 // but it may be bound only once.
793
794 void bind(Label* L); // binds an unbound label L to the current code position
795
796 // Calls
797 // Call near relative 32-bit displacement, relative to next instruction.
798 void call(Label* L);
799 void call(Address entry, RelocInfo::Mode rmode);
800
801 // Explicitly emit a near call / near jump. The displacement is relative to
802 // the next instructions (which starts at {pc_offset() + kNearJmpInstrSize}).
803 static constexpr int kNearJmpInstrSize = 5;
804 void near_call(intptr_t disp, RelocInfo::Mode rmode);
805 void near_jmp(intptr_t disp, RelocInfo::Mode rmode);
806
807 void call(Handle<CodeT> target,
808 RelocInfo::Mode rmode = RelocInfo::CODE_TARGET);
809
810 // Call near absolute indirect, address in register
811 void call(Register adr);
812
813 // Jumps
814 // Jump short or near relative.
815 // Use a 32-bit signed displacement.
816 // Unconditional jump to L
817 void jmp(Label* L, Label::Distance distance = Label::kFar);
818 void jmp(Handle<CodeT> target, RelocInfo::Mode rmode);
819 void jmp(Address entry, RelocInfo::Mode rmode);
820
821 // Jump near absolute indirect (r64)
822 void jmp(Register adr);
823 void jmp(Operand src);
824
825 // Unconditional jump relative to the current address. Low-level routine,
826 // use with caution!
827 void jmp_rel(int offset);
828
829 // Conditional jumps
830 void j(Condition cc, Label* L, Label::Distance distance = Label::kFar);
831 void j(Condition cc, Address entry, RelocInfo::Mode rmode);
832 void j(Condition cc, Handle<CodeT> target, RelocInfo::Mode rmode);
833
834 // Floating-point operations
835 void fld(int i);
836
837 void fld1();
838 void fldz();
839 void fldpi();
840 void fldln2();
841
842 void fld_s(Operand adr);
843 void fld_d(Operand adr);
844
845 void fstp_s(Operand adr);
846 void fstp_d(Operand adr);
847 void fstp(int index);
848
849 void fild_s(Operand adr);
850 void fild_d(Operand adr);
851
852 void fist_s(Operand adr);
853
854 void fistp_s(Operand adr);
855 void fistp_d(Operand adr);
856
857 void fisttp_s(Operand adr);
858 void fisttp_d(Operand adr);
859
860 void fabs();
861 void fchs();
862
863 void fadd(int i);
864 void fsub(int i);
865 void fmul(int i);
866 void fdiv(int i);
867
868 void fisub_s(Operand adr);
869
870 void faddp(int i = 1);
871 void fsubp(int i = 1);
872 void fsubrp(int i = 1);
873 void fmulp(int i = 1);
874 void fdivp(int i = 1);
875 void fprem();
876 void fprem1();
877
878 void fxch(int i = 1);
879 void fincstp();
880 void ffree(int i = 0);
881
882 void ftst();
883 void fucomp(int i);
884 void fucompp();
885 void fucomi(int i);
886 void fucomip();
887
888 void fcompp();
889 void fnstsw_ax();
890 void fwait();
891 void fnclex();
892
893 void fsin();
894 void fcos();
895 void fptan();
896 void fyl2x();
897 void f2xm1();
898 void fscale();
899 void fninit();
900
901 void frndint();
902
903 void sahf();
904
905 void ucomiss(XMMRegister dst, XMMRegister src);
906 void ucomiss(XMMRegister dst, Operand src);
907 void movaps(XMMRegister dst, XMMRegister src);
908 void movaps(XMMRegister dst, Operand src);
909
910 // Don't use this unless it's important to keep the
911 // top half of the destination register unchanged.
912 // Use movaps when moving float values and movd for integer
913 // values in xmm registers.
914 void movss(XMMRegister dst, XMMRegister src);
915
916 void movss(XMMRegister dst, Operand src);
917 void movss(Operand dst, XMMRegister src);
918
919 void movlps(XMMRegister dst, Operand src);
920 void movlps(Operand dst, XMMRegister src);
921
922 void movhps(XMMRegister dst, Operand src);
923 void movhps(Operand dst, XMMRegister src);
924
925 void shufps(XMMRegister dst, XMMRegister src, byte imm8);
926
927 void cvttss2si(Register dst, Operand src);
928 void cvttss2si(Register dst, XMMRegister src);
929 void cvtlsi2ss(XMMRegister dst, Operand src);
930 void cvtlsi2ss(XMMRegister dst, Register src);
931
932 void movmskps(Register dst, XMMRegister src);
933
934 void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
935 SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature feature = AVX);
936 void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
937 SIMDPrefix pp, LeadingOpcode m, VexW w, CpuFeature feature = AVX);
938
939 template <typename Reg1, typename Reg2, typename Op>
940 void vinstr(byte op, Reg1 dst, Reg2 src1, Op src2, SIMDPrefix pp,
941 LeadingOpcode m, VexW w, CpuFeature feature = AVX2);
942
943 // SSE instructions
944 void sse_instr(XMMRegister dst, XMMRegister src, byte escape, byte opcode);
945 void sse_instr(XMMRegister dst, Operand src, byte escape, byte opcode);
946 #define DECLARE_SSE_INSTRUCTION(instruction, escape, opcode) \
947 void instruction(XMMRegister dst, XMMRegister src) { \
948 sse_instr(dst, src, 0x##escape, 0x##opcode); \
949 } \
950 void instruction(XMMRegister dst, Operand src) { \
951 sse_instr(dst, src, 0x##escape, 0x##opcode); \
952 }
953
954 SSE_UNOP_INSTRUCTION_LIST(DECLARE_SSE_INSTRUCTION)
955 SSE_BINOP_INSTRUCTION_LIST(DECLARE_SSE_INSTRUCTION)
956 #undef DECLARE_SSE_INSTRUCTION
957
958 // SSE instructions with prefix and SSE2 instructions
959 void sse2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape,
960 byte opcode);
961 void sse2_instr(XMMRegister dst, Operand src, byte prefix, byte escape,
962 byte opcode);
963 #define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
964 void instruction(XMMRegister dst, XMMRegister src) { \
965 sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
966 } \
967 void instruction(XMMRegister dst, Operand src) { \
968 sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
969 }
970
971 // These SSE instructions have the same encoding as the SSE2 instructions.
972 SSE_INSTRUCTION_LIST_SS(DECLARE_SSE2_INSTRUCTION)
SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)973 SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
974 SSE2_INSTRUCTION_LIST_SD(DECLARE_SSE2_INSTRUCTION)
975 SSE2_UNOP_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
976 #undef DECLARE_SSE2_INSTRUCTION
977
978 void sse2_instr(XMMRegister reg, byte imm8, byte prefix, byte escape,
979 byte opcode, int extension) {
980 XMMRegister ext_reg = XMMRegister::from_code(extension);
981 sse2_instr(ext_reg, reg, prefix, escape, opcode);
982 emit(imm8);
983 }
984
985 #define DECLARE_SSE2_SHIFT_IMM(instruction, prefix, escape, opcode, extension) \
986 void instruction(XMMRegister reg, byte imm8) { \
987 sse2_instr(reg, imm8, 0x##prefix, 0x##escape, 0x##opcode, 0x##extension); \
988 }
989 SSE2_INSTRUCTION_LIST_SHIFT_IMM(DECLARE_SSE2_SHIFT_IMM)
990 #undef DECLARE_SSE2_SHIFT_IMM
991
992 #define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
993 void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
994 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
995 } \
996 void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) { \
997 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
998 }
999
1000 #define DECLARE_SSE2_PD_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1001 DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1002 void v##instruction(YMMRegister dst, YMMRegister src1, YMMRegister src2) { \
1003 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0, AVX); \
1004 } \
1005 void v##instruction(YMMRegister dst, YMMRegister src1, Operand src2) { \
1006 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0, AVX); \
1007 }
1008
1009 SSE2_INSTRUCTION_LIST_PD(DECLARE_SSE2_PD_AVX_INSTRUCTION)
1010 #undef DECLARE_SSE2_PD_AVX_INSTRUCTION
1011
1012 #define DECLARE_SSE2_PI_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1013 DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1014 void v##instruction(YMMRegister dst, YMMRegister src1, YMMRegister src2) { \
1015 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0, AVX2); \
1016 } \
1017 void v##instruction(YMMRegister dst, YMMRegister src1, Operand src2) { \
1018 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0, AVX2); \
1019 }
1020
1021 SSE2_INSTRUCTION_LIST_PI(DECLARE_SSE2_PI_AVX_INSTRUCTION)
1022 #undef DECLARE_SSE2_PI_AVX_INSTRUCTION
1023
1024 #define DECLARE_SSE2_SHIFT_AVX_INSTRUCTION(instruction, prefix, escape, \
1025 opcode) \
1026 DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1027 void v##instruction(YMMRegister dst, YMMRegister src1, XMMRegister src2) { \
1028 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0, AVX2); \
1029 } \
1030 void v##instruction(YMMRegister dst, YMMRegister src1, Operand src2) { \
1031 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0, AVX2); \
1032 }
1033
1034 SSE2_INSTRUCTION_LIST_SHIFT(DECLARE_SSE2_SHIFT_AVX_INSTRUCTION)
1035 #undef DECLARE_SSE2_SHIFT_AVX_INSTRUCTION
1036 #undef DECLARE_SSE2_AVX_INSTRUCTION
1037
1038 #define DECLARE_SSE2_UNOP_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1039 void v##instruction(XMMRegister dst, XMMRegister src) { \
1040 vpd(0x##opcode, dst, xmm0, src); \
1041 } \
1042 void v##instruction(XMMRegister dst, Operand src) { \
1043 vpd(0x##opcode, dst, xmm0, src); \
1044 }
1045
1046 SSE2_UNOP_INSTRUCTION_LIST(DECLARE_SSE2_UNOP_AVX_INSTRUCTION)
1047 #undef DECLARE_SSE2_UNOP_AVX_INSTRUCTION
1048
1049 // SSE3
1050 void lddqu(XMMRegister dst, Operand src);
1051 void movddup(XMMRegister dst, Operand src);
1052 void movddup(XMMRegister dst, XMMRegister src);
1053 void movshdup(XMMRegister dst, XMMRegister src);
1054
1055 // SSSE3
1056 void ssse3_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1057 byte escape2, byte opcode);
1058 void ssse3_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1059 byte escape2, byte opcode);
1060
1061 #define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \
1062 opcode) \
1063 void instruction(XMMRegister dst, XMMRegister src) { \
1064 ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1065 } \
1066 void instruction(XMMRegister dst, Operand src) { \
1067 ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1068 }
1069
1070 SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1071 SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1072 #undef DECLARE_SSSE3_INSTRUCTION
1073
1074 // SSE4
1075 void sse4_instr(Register dst, XMMRegister src, byte prefix, byte escape1,
1076 byte escape2, byte opcode, int8_t imm8);
1077 void sse4_instr(Operand dst, XMMRegister src, byte prefix, byte escape1,
1078 byte escape2, byte opcode, int8_t imm8);
1079 void sse4_instr(XMMRegister dst, Register src, byte prefix, byte escape1,
1080 byte escape2, byte opcode, int8_t imm8);
1081 void sse4_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1082 byte escape2, byte opcode);
1083 void sse4_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1084 byte escape2, byte opcode);
1085 #define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \
1086 opcode) \
1087 void instruction(XMMRegister dst, XMMRegister src) { \
1088 sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1089 } \
1090 void instruction(XMMRegister dst, Operand src) { \
1091 sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1092 }
1093
1094 SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1095 SSE4_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1096 DECLARE_SSE4_INSTRUCTION(pblendvb, 66, 0F, 38, 10)
1097 DECLARE_SSE4_INSTRUCTION(blendvps, 66, 0F, 38, 14)
1098 DECLARE_SSE4_INSTRUCTION(blendvpd, 66, 0F, 38, 15)
1099 #undef DECLARE_SSE4_INSTRUCTION
1100
1101 #define DECLARE_SSE4_EXTRACT_INSTRUCTION(instruction, prefix, escape1, \
1102 escape2, opcode) \
1103 void instruction(Register dst, XMMRegister src, uint8_t imm8) { \
1104 sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode, \
1105 imm8); \
1106 } \
1107 void instruction(Operand dst, XMMRegister src, uint8_t imm8) { \
1108 sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode, \
1109 imm8); \
1110 }
1111
1112 SSE4_EXTRACT_INSTRUCTION_LIST(DECLARE_SSE4_EXTRACT_INSTRUCTION)
1113 #undef DECLARE_SSE4_EXTRACT_INSTRUCTION
1114
1115 // SSE4.2
1116 void sse4_2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1117 byte escape2, byte opcode);
1118 void sse4_2_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1119 byte escape2, byte opcode);
1120 #define DECLARE_SSE4_2_INSTRUCTION(instruction, prefix, escape1, escape2, \
1121 opcode) \
1122 void instruction(XMMRegister dst, XMMRegister src) { \
1123 sse4_2_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1124 } \
1125 void instruction(XMMRegister dst, Operand src) { \
1126 sse4_2_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1127 }
1128
1129 SSE4_2_INSTRUCTION_LIST(DECLARE_SSE4_2_INSTRUCTION)
1130 #undef DECLARE_SSE4_2_INSTRUCTION
1131
1132 #define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
1133 opcode) \
1134 void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1135 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1136 } \
1137 void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) { \
1138 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1139 } \
1140 void v##instruction(YMMRegister dst, YMMRegister src1, YMMRegister src2) { \
1141 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0, \
1142 AVX2); \
1143 } \
1144 void v##instruction(YMMRegister dst, YMMRegister src1, Operand src2) { \
1145 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0, \
1146 AVX2); \
1147 }
1148
SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)1149 SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1150 SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1151 SSE4_2_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1152 #undef DECLARE_SSE34_AVX_INSTRUCTION
1153
1154 #define DECLARE_SSSE3_UNOP_AVX_INSTRUCTION(instruction, prefix, escape1, \
1155 escape2, opcode) \
1156 void v##instruction(XMMRegister dst, XMMRegister src) { \
1157 vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
1158 } \
1159 void v##instruction(XMMRegister dst, Operand src) { \
1160 vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
1161 } \
1162 void v##instruction(YMMRegister dst, YMMRegister src) { \
1163 vinstr(0x##opcode, dst, ymm0, src, k##prefix, k##escape1##escape2, kW0); \
1164 } \
1165 void v##instruction(YMMRegister dst, Operand src) { \
1166 vinstr(0x##opcode, dst, ymm0, src, k##prefix, k##escape1##escape2, kW0); \
1167 }
1168
1169 SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSSE3_UNOP_AVX_INSTRUCTION)
1170 #undef DECLARE_SSSE3_UNOP_AVX_INSTRUCTION
1171
1172 void vpblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1173 XMMRegister mask) {
1174 vinstr(0x4C, dst, src1, src2, k66, k0F3A, kW0);
1175 // The mask operand is encoded in bits[7:4] of the immediate byte.
1176 emit(mask.code() << 4);
1177 }
vpblendvb(YMMRegister dst,YMMRegister src1,YMMRegister src2,YMMRegister mask)1178 void vpblendvb(YMMRegister dst, YMMRegister src1, YMMRegister src2,
1179 YMMRegister mask) {
1180 vinstr(0x4C, dst, src1, src2, k66, k0F3A, kW0, AVX2);
1181 // The mask operand is encoded in bits[7:4] of the immediate byte.
1182 emit(mask.code() << 4);
1183 }
1184
vblendvps(XMMRegister dst,XMMRegister src1,XMMRegister src2,XMMRegister mask)1185 void vblendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1186 XMMRegister mask) {
1187 vinstr(0x4A, dst, src1, src2, k66, k0F3A, kW0);
1188 // The mask operand is encoded in bits[7:4] of the immediate byte.
1189 emit(mask.code() << 4);
1190 }
vblendvps(YMMRegister dst,YMMRegister src1,YMMRegister src2,YMMRegister mask)1191 void vblendvps(YMMRegister dst, YMMRegister src1, YMMRegister src2,
1192 YMMRegister mask) {
1193 vinstr(0x4A, dst, src1, src2, k66, k0F3A, kW0, AVX);
1194 // The mask operand is encoded in bits[7:4] of the immediate byte.
1195 emit(mask.code() << 4);
1196 }
1197
vblendvpd(XMMRegister dst,XMMRegister src1,XMMRegister src2,XMMRegister mask)1198 void vblendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1199 XMMRegister mask) {
1200 vinstr(0x4B, dst, src1, src2, k66, k0F3A, kW0);
1201 // The mask operand is encoded in bits[7:4] of the immediate byte.
1202 emit(mask.code() << 4);
1203 }
vblendvpd(YMMRegister dst,YMMRegister src1,YMMRegister src2,YMMRegister mask)1204 void vblendvpd(YMMRegister dst, YMMRegister src1, YMMRegister src2,
1205 YMMRegister mask) {
1206 vinstr(0x4B, dst, src1, src2, k66, k0F3A, kW0, AVX);
1207 // The mask operand is encoded in bits[7:4] of the immediate byte.
1208 emit(mask.code() << 4);
1209 }
1210
1211 #define DECLARE_SSE4_PMOV_AVX_INSTRUCTION(instruction, prefix, escape1, \
1212 escape2, opcode) \
1213 void v##instruction(XMMRegister dst, XMMRegister src) { \
1214 vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
1215 } \
1216 void v##instruction(XMMRegister dst, Operand src) { \
1217 vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
1218 }
1219 SSE4_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_PMOV_AVX_INSTRUCTION)
1220 #undef DECLARE_SSE4_PMOV_AVX_INSTRUCTION
1221
1222 #define DECLARE_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, opcode) \
1223 void v##instruction(Register dst, XMMRegister src, uint8_t imm8) { \
1224 XMMRegister idst = XMMRegister::from_code(dst.code()); \
1225 vinstr(0x##opcode, src, xmm0, idst, k##prefix, k##escape1##escape2, kW0); \
1226 emit(imm8); \
1227 } \
1228 void v##instruction(Operand dst, XMMRegister src, uint8_t imm8) { \
1229 vinstr(0x##opcode, src, xmm0, dst, k##prefix, k##escape1##escape2, kW0); \
1230 emit(imm8); \
1231 }
1232
1233 SSE4_EXTRACT_INSTRUCTION_LIST(DECLARE_AVX_INSTRUCTION)
1234 #undef DECLARE_AVX_INSTRUCTION
1235
1236 void movd(XMMRegister dst, Register src);
1237 void movd(XMMRegister dst, Operand src);
1238 void movd(Register dst, XMMRegister src);
1239 void movq(XMMRegister dst, Register src);
1240 void movq(XMMRegister dst, Operand src);
1241 void movq(Register dst, XMMRegister src);
1242 void movq(XMMRegister dst, XMMRegister src);
1243
1244 // Don't use this unless it's important to keep the
1245 // top half of the destination register unchanged.
1246 // Use movapd when moving double values and movq for integer
1247 // values in xmm registers.
1248 void movsd(XMMRegister dst, XMMRegister src);
1249
1250 void movsd(Operand dst, XMMRegister src);
1251 void movsd(XMMRegister dst, Operand src);
1252
1253 void movdqa(Operand dst, XMMRegister src);
1254 void movdqa(XMMRegister dst, Operand src);
1255 void movdqa(XMMRegister dst, XMMRegister src);
1256
1257 void movdqu(Operand dst, XMMRegister src);
1258 void movdqu(XMMRegister dst, Operand src);
1259 void movdqu(XMMRegister dst, XMMRegister src);
1260
1261 void movapd(XMMRegister dst, XMMRegister src);
1262 void movupd(XMMRegister dst, Operand src);
1263 void movupd(Operand dst, XMMRegister src);
1264
1265 void cvtdq2pd(XMMRegister dst, XMMRegister src);
1266
1267 void cvttsd2si(Register dst, Operand src);
1268 void cvttsd2si(Register dst, XMMRegister src);
1269 void cvttss2siq(Register dst, XMMRegister src);
1270 void cvttss2siq(Register dst, Operand src);
1271 void cvttsd2siq(Register dst, XMMRegister src);
1272 void cvttsd2siq(Register dst, Operand src);
1273 void cvttps2dq(XMMRegister dst, Operand src);
1274 void cvttps2dq(XMMRegister dst, XMMRegister src);
1275
1276 void cvtlsi2sd(XMMRegister dst, Operand src);
1277 void cvtlsi2sd(XMMRegister dst, Register src);
1278
1279 void cvtqsi2ss(XMMRegister dst, Operand src);
1280 void cvtqsi2ss(XMMRegister dst, Register src);
1281
1282 void cvtqsi2sd(XMMRegister dst, Operand src);
1283 void cvtqsi2sd(XMMRegister dst, Register src);
1284
1285 void cvtsd2si(Register dst, XMMRegister src);
1286 void cvtsd2siq(Register dst, XMMRegister src);
1287
1288 void haddps(XMMRegister dst, XMMRegister src);
1289 void haddps(XMMRegister dst, Operand src);
1290
1291 void cmpeqsd(XMMRegister dst, XMMRegister src);
1292 void cmpeqss(XMMRegister dst, XMMRegister src);
1293 void cmpltsd(XMMRegister dst, XMMRegister src);
1294
1295 void movmskpd(Register dst, XMMRegister src);
1296
1297 void pmovmskb(Register dst, XMMRegister src);
1298
1299 void pinsrw(XMMRegister dst, Register src, uint8_t imm8);
1300 void pinsrw(XMMRegister dst, Operand src, uint8_t imm8);
1301
1302 // SSE 4.1 instruction
1303 void insertps(XMMRegister dst, XMMRegister src, byte imm8);
1304 void insertps(XMMRegister dst, Operand src, byte imm8);
1305 void pextrq(Register dst, XMMRegister src, int8_t imm8);
1306 void pinsrb(XMMRegister dst, Register src, uint8_t imm8);
1307 void pinsrb(XMMRegister dst, Operand src, uint8_t imm8);
1308 void pinsrd(XMMRegister dst, Register src, uint8_t imm8);
1309 void pinsrd(XMMRegister dst, Operand src, uint8_t imm8);
1310 void pinsrq(XMMRegister dst, Register src, uint8_t imm8);
1311 void pinsrq(XMMRegister dst, Operand src, uint8_t imm8);
1312
1313 void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
1314 void roundss(XMMRegister dst, Operand src, RoundingMode mode);
1315 void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1316 void roundsd(XMMRegister dst, Operand src, RoundingMode mode);
1317 void roundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
1318 void roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1319
1320 void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp);
1321 void cmpps(XMMRegister dst, Operand src, int8_t cmp);
1322 void cmppd(XMMRegister dst, XMMRegister src, int8_t cmp);
1323 void cmppd(XMMRegister dst, Operand src, int8_t cmp);
1324
1325 #define SSE_CMP_P(instr, imm8) \
1326 void instr##ps(XMMRegister dst, XMMRegister src) { cmpps(dst, src, imm8); } \
1327 void instr##ps(XMMRegister dst, Operand src) { cmpps(dst, src, imm8); } \
1328 void instr##pd(XMMRegister dst, XMMRegister src) { cmppd(dst, src, imm8); } \
1329 void instr##pd(XMMRegister dst, Operand src) { cmppd(dst, src, imm8); }
1330
1331 SSE_CMP_P(cmpeq, 0x0)
1332 SSE_CMP_P(cmplt, 0x1)
1333 SSE_CMP_P(cmple, 0x2)
1334 SSE_CMP_P(cmpunord, 0x3)
1335 SSE_CMP_P(cmpneq, 0x4)
1336 SSE_CMP_P(cmpnlt, 0x5)
1337 SSE_CMP_P(cmpnle, 0x6)
1338
1339 #undef SSE_CMP_P
1340
1341 void movups(XMMRegister dst, XMMRegister src);
1342 void movups(XMMRegister dst, Operand src);
1343 void movups(Operand dst, XMMRegister src);
1344 void psrldq(XMMRegister dst, uint8_t shift);
1345 void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1346 void pshufd(XMMRegister dst, Operand src, uint8_t shuffle);
1347 void pshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1348 void pshufhw(XMMRegister dst, Operand src, uint8_t shuffle);
1349 void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1350 void pshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
1351
movhlps(XMMRegister dst,XMMRegister src)1352 void movhlps(XMMRegister dst, XMMRegister src) {
1353 sse_instr(dst, src, 0x0F, 0x12);
1354 }
movlhps(XMMRegister dst,XMMRegister src)1355 void movlhps(XMMRegister dst, XMMRegister src) {
1356 sse_instr(dst, src, 0x0F, 0x16);
1357 }
1358
1359 // AVX instruction
1360 void vmovddup(XMMRegister dst, XMMRegister src);
1361 void vmovddup(XMMRegister dst, Operand src);
1362 void vmovddup(YMMRegister dst, YMMRegister src);
1363 void vmovddup(YMMRegister dst, Operand src);
1364 void vmovshdup(XMMRegister dst, XMMRegister src);
1365 void vmovshdup(YMMRegister dst, YMMRegister src);
1366 void vbroadcastss(XMMRegister dst, Operand src);
1367 void vbroadcastss(XMMRegister dst, XMMRegister src);
1368 void vbroadcastss(YMMRegister dst, Operand src);
1369 void vbroadcastss(YMMRegister dst, XMMRegister src);
1370
1371 void fma_instr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1372 VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w);
1373 void fma_instr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
1374 VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w);
1375
1376 #define FMA(instr, length, prefix, escape1, escape2, extension, opcode) \
1377 void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1378 fma_instr(0x##opcode, dst, src1, src2, k##length, k##prefix, \
1379 k##escape1##escape2, k##extension); \
1380 } \
1381 void instr(XMMRegister dst, XMMRegister src1, Operand src2) { \
1382 fma_instr(0x##opcode, dst, src1, src2, k##length, k##prefix, \
1383 k##escape1##escape2, k##extension); \
1384 }
1385 FMA_INSTRUCTION_LIST(FMA)
1386 #undef FMA
1387
1388 void vmovd(XMMRegister dst, Register src);
1389 void vmovd(XMMRegister dst, Operand src);
1390 void vmovd(Register dst, XMMRegister src);
1391 void vmovq(XMMRegister dst, Register src);
1392 void vmovq(XMMRegister dst, Operand src);
1393 void vmovq(Register dst, XMMRegister src);
1394
vmovsd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1395 void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1396 vsd(0x10, dst, src1, src2);
1397 }
vmovsd(XMMRegister dst,Operand src)1398 void vmovsd(XMMRegister dst, Operand src) { vsd(0x10, dst, xmm0, src); }
vmovsd(Operand dst,XMMRegister src)1399 void vmovsd(Operand dst, XMMRegister src) { vsd(0x11, src, xmm0, dst); }
1400 void vmovdqa(XMMRegister dst, Operand src);
1401 void vmovdqa(XMMRegister dst, XMMRegister src);
1402 void vmovdqa(YMMRegister dst, Operand src);
1403 void vmovdqa(YMMRegister dst, YMMRegister src);
1404 void vmovdqu(XMMRegister dst, Operand src);
1405 void vmovdqu(Operand dst, XMMRegister src);
1406 void vmovdqu(XMMRegister dst, XMMRegister src);
1407 void vmovdqu(YMMRegister dst, Operand src);
1408 void vmovdqu(Operand dst, YMMRegister src);
1409 void vmovdqu(YMMRegister dst, YMMRegister src);
1410
1411 void vmovlps(XMMRegister dst, XMMRegister src1, Operand src2);
1412 void vmovlps(Operand dst, XMMRegister src);
1413
1414 void vmovhps(XMMRegister dst, XMMRegister src1, Operand src2);
1415 void vmovhps(Operand dst, XMMRegister src);
1416
1417 #define AVX_SSE_UNOP(instr, escape, opcode) \
1418 void v##instr(XMMRegister dst, XMMRegister src2) { \
1419 vps(0x##opcode, dst, xmm0, src2); \
1420 } \
1421 void v##instr(XMMRegister dst, Operand src2) { \
1422 vps(0x##opcode, dst, xmm0, src2); \
1423 } \
1424 void v##instr(YMMRegister dst, YMMRegister src2) { \
1425 vps(0x##opcode, dst, ymm0, src2); \
1426 } \
1427 void v##instr(YMMRegister dst, Operand src2) { \
1428 vps(0x##opcode, dst, ymm0, src2); \
1429 }
1430 SSE_UNOP_INSTRUCTION_LIST(AVX_SSE_UNOP)
1431 #undef AVX_SSE_UNOP
1432
1433 #define AVX_SSE_BINOP(instr, escape, opcode) \
1434 void v##instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1435 vps(0x##opcode, dst, src1, src2); \
1436 } \
1437 void v##instr(XMMRegister dst, XMMRegister src1, Operand src2) { \
1438 vps(0x##opcode, dst, src1, src2); \
1439 } \
1440 void v##instr(YMMRegister dst, YMMRegister src1, YMMRegister src2) { \
1441 vps(0x##opcode, dst, src1, src2); \
1442 } \
1443 void v##instr(YMMRegister dst, YMMRegister src1, Operand src2) { \
1444 vps(0x##opcode, dst, src1, src2); \
1445 }
SSE_BINOP_INSTRUCTION_LIST(AVX_SSE_BINOP)1446 SSE_BINOP_INSTRUCTION_LIST(AVX_SSE_BINOP)
1447 #undef AVX_SSE_BINOP
1448
1449 #define AVX_3(instr, opcode, impl, SIMDRegister) \
1450 void instr(SIMDRegister dst, SIMDRegister src1, SIMDRegister src2) { \
1451 impl(opcode, dst, src1, src2); \
1452 } \
1453 void instr(SIMDRegister dst, SIMDRegister src1, Operand src2) { \
1454 impl(opcode, dst, src1, src2); \
1455 }
1456
1457 AVX_3(vhaddps, 0x7c, vsd, XMMRegister)
1458 AVX_3(vhaddps, 0x7c, vsd, YMMRegister)
1459
1460 #define AVX_SCALAR(instr, prefix, escape, opcode) \
1461 void v##instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1462 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kWIG); \
1463 } \
1464 void v##instr(XMMRegister dst, XMMRegister src1, Operand src2) { \
1465 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kWIG); \
1466 }
1467 SSE_INSTRUCTION_LIST_SS(AVX_SCALAR)
1468 SSE2_INSTRUCTION_LIST_SD(AVX_SCALAR)
1469 #undef AVX_SCALAR
1470
1471 #undef AVX_3
1472
1473 #define AVX_SSE2_SHIFT_IMM(instr, prefix, escape, opcode, extension) \
1474 void v##instr(XMMRegister dst, XMMRegister src, byte imm8) { \
1475 XMMRegister ext_reg = XMMRegister::from_code(extension); \
1476 vinstr(0x##opcode, ext_reg, dst, src, k##prefix, k##escape, kWIG); \
1477 emit(imm8); \
1478 }
1479 SSE2_INSTRUCTION_LIST_SHIFT_IMM(AVX_SSE2_SHIFT_IMM)
1480 #undef AVX_SSE2_SHIFT_IMM
1481
1482 void vmovlhps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1483 vinstr(0x16, dst, src1, src2, kNoPrefix, k0F, kWIG);
1484 }
vmovhlps(XMMRegister dst,XMMRegister src1,XMMRegister src2)1485 void vmovhlps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1486 vinstr(0x12, dst, src1, src2, kNoPrefix, k0F, kWIG);
1487 }
vcvtdq2pd(XMMRegister dst,XMMRegister src)1488 void vcvtdq2pd(XMMRegister dst, XMMRegister src) {
1489 vinstr(0xe6, dst, xmm0, src, kF3, k0F, kWIG);
1490 }
vcvttps2dq(XMMRegister dst,XMMRegister src)1491 void vcvttps2dq(XMMRegister dst, XMMRegister src) {
1492 vinstr(0x5b, dst, xmm0, src, kF3, k0F, kWIG);
1493 }
vcvtlsi2sd(XMMRegister dst,XMMRegister src1,Register src2)1494 void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1495 XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1496 vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW0);
1497 }
vcvtlsi2sd(XMMRegister dst,XMMRegister src1,Operand src2)1498 void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1499 vinstr(0x2a, dst, src1, src2, kF2, k0F, kW0);
1500 }
vcvtlsi2ss(XMMRegister dst,XMMRegister src1,Register src2)1501 void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1502 XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1503 vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW0);
1504 }
vcvtlsi2ss(XMMRegister dst,XMMRegister src1,Operand src2)1505 void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1506 vinstr(0x2a, dst, src1, src2, kF3, k0F, kW0);
1507 }
vcvtqsi2ss(XMMRegister dst,XMMRegister src1,Register src2)1508 void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1509 XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1510 vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW1);
1511 }
vcvtqsi2ss(XMMRegister dst,XMMRegister src1,Operand src2)1512 void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1513 vinstr(0x2a, dst, src1, src2, kF3, k0F, kW1);
1514 }
vcvtqsi2sd(XMMRegister dst,XMMRegister src1,Register src2)1515 void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1516 XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1517 vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW1);
1518 }
vcvtqsi2sd(XMMRegister dst,XMMRegister src1,Operand src2)1519 void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1520 vinstr(0x2a, dst, src1, src2, kF2, k0F, kW1);
1521 }
vcvttss2si(Register dst,XMMRegister src)1522 void vcvttss2si(Register dst, XMMRegister src) {
1523 XMMRegister idst = XMMRegister::from_code(dst.code());
1524 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1525 }
vcvttss2si(Register dst,Operand src)1526 void vcvttss2si(Register dst, Operand src) {
1527 XMMRegister idst = XMMRegister::from_code(dst.code());
1528 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1529 }
vcvttsd2si(Register dst,XMMRegister src)1530 void vcvttsd2si(Register dst, XMMRegister src) {
1531 XMMRegister idst = XMMRegister::from_code(dst.code());
1532 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1533 }
vcvttsd2si(Register dst,Operand src)1534 void vcvttsd2si(Register dst, Operand src) {
1535 XMMRegister idst = XMMRegister::from_code(dst.code());
1536 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1537 }
vcvttss2siq(Register dst,XMMRegister src)1538 void vcvttss2siq(Register dst, XMMRegister src) {
1539 XMMRegister idst = XMMRegister::from_code(dst.code());
1540 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1541 }
vcvttss2siq(Register dst,Operand src)1542 void vcvttss2siq(Register dst, Operand src) {
1543 XMMRegister idst = XMMRegister::from_code(dst.code());
1544 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1545 }
vcvttsd2siq(Register dst,XMMRegister src)1546 void vcvttsd2siq(Register dst, XMMRegister src) {
1547 XMMRegister idst = XMMRegister::from_code(dst.code());
1548 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1549 }
vcvttsd2siq(Register dst,Operand src)1550 void vcvttsd2siq(Register dst, Operand src) {
1551 XMMRegister idst = XMMRegister::from_code(dst.code());
1552 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1553 }
vcvtsd2si(Register dst,XMMRegister src)1554 void vcvtsd2si(Register dst, XMMRegister src) {
1555 XMMRegister idst = XMMRegister::from_code(dst.code());
1556 vinstr(0x2d, idst, xmm0, src, kF2, k0F, kW0);
1557 }
vroundss(XMMRegister dst,XMMRegister src1,XMMRegister src2,RoundingMode mode)1558 void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1559 RoundingMode mode) {
1560 vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
1561 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1562 }
vroundss(XMMRegister dst,XMMRegister src1,Operand src2,RoundingMode mode)1563 void vroundss(XMMRegister dst, XMMRegister src1, Operand src2,
1564 RoundingMode mode) {
1565 vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
1566 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1567 }
vroundsd(XMMRegister dst,XMMRegister src1,XMMRegister src2,RoundingMode mode)1568 void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1569 RoundingMode mode) {
1570 vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
1571 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1572 }
vroundsd(XMMRegister dst,XMMRegister src1,Operand src2,RoundingMode mode)1573 void vroundsd(XMMRegister dst, XMMRegister src1, Operand src2,
1574 RoundingMode mode) {
1575 vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
1576 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1577 }
vroundps(XMMRegister dst,XMMRegister src,RoundingMode mode)1578 void vroundps(XMMRegister dst, XMMRegister src, RoundingMode mode) {
1579 vinstr(0x08, dst, xmm0, src, k66, k0F3A, kWIG);
1580 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1581 }
vroundps(YMMRegister dst,YMMRegister src,RoundingMode mode)1582 void vroundps(YMMRegister dst, YMMRegister src, RoundingMode mode) {
1583 vinstr(0x08, dst, ymm0, src, k66, k0F3A, kWIG, AVX);
1584 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1585 }
vroundpd(XMMRegister dst,XMMRegister src,RoundingMode mode)1586 void vroundpd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
1587 vinstr(0x09, dst, xmm0, src, k66, k0F3A, kWIG);
1588 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1589 }
vroundpd(YMMRegister dst,YMMRegister src,RoundingMode mode)1590 void vroundpd(YMMRegister dst, YMMRegister src, RoundingMode mode) {
1591 vinstr(0x09, dst, ymm0, src, k66, k0F3A, kWIG, AVX);
1592 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1593 }
1594
1595 template <typename Reg, typename Op>
vsd(byte op,Reg dst,Reg src1,Op src2)1596 void vsd(byte op, Reg dst, Reg src1, Op src2) {
1597 vinstr(op, dst, src1, src2, kF2, k0F, kWIG, AVX);
1598 }
1599
vmovss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1600 void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1601 vss(0x10, dst, src1, src2);
1602 }
vmovss(XMMRegister dst,Operand src)1603 void vmovss(XMMRegister dst, Operand src) { vss(0x10, dst, xmm0, src); }
vmovss(Operand dst,XMMRegister src)1604 void vmovss(Operand dst, XMMRegister src) { vss(0x11, src, xmm0, dst); }
1605 void vucomiss(XMMRegister dst, XMMRegister src);
1606 void vucomiss(XMMRegister dst, Operand src);
1607 void vss(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1608 void vss(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1609
vshufps(XMMRegister dst,XMMRegister src1,XMMRegister src2,byte imm8)1610 void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
1611 vps(0xC6, dst, src1, src2, imm8);
1612 }
vshufps(YMMRegister dst,YMMRegister src1,YMMRegister src2,byte imm8)1613 void vshufps(YMMRegister dst, YMMRegister src1, YMMRegister src2, byte imm8) {
1614 vps(0xC6, dst, src1, src2, imm8);
1615 }
1616
vmovaps(XMMRegister dst,XMMRegister src)1617 void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
vmovaps(YMMRegister dst,YMMRegister src)1618 void vmovaps(YMMRegister dst, YMMRegister src) { vps(0x28, dst, ymm0, src); }
vmovaps(XMMRegister dst,Operand src)1619 void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); }
vmovaps(YMMRegister dst,Operand src)1620 void vmovaps(YMMRegister dst, Operand src) { vps(0x28, dst, ymm0, src); }
vmovups(XMMRegister dst,XMMRegister src)1621 void vmovups(XMMRegister dst, XMMRegister src) { vps(0x10, dst, xmm0, src); }
vmovups(YMMRegister dst,YMMRegister src)1622 void vmovups(YMMRegister dst, YMMRegister src) { vps(0x10, dst, ymm0, src); }
vmovups(XMMRegister dst,Operand src)1623 void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); }
vmovups(YMMRegister dst,Operand src)1624 void vmovups(YMMRegister dst, Operand src) { vps(0x10, dst, ymm0, src); }
vmovups(Operand dst,XMMRegister src)1625 void vmovups(Operand dst, XMMRegister src) { vps(0x11, src, xmm0, dst); }
vmovups(Operand dst,YMMRegister src)1626 void vmovups(Operand dst, YMMRegister src) { vps(0x11, src, ymm0, dst); }
vmovapd(XMMRegister dst,XMMRegister src)1627 void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); }
vmovapd(YMMRegister dst,YMMRegister src)1628 void vmovapd(YMMRegister dst, YMMRegister src) { vpd(0x28, dst, ymm0, src); }
vmovupd(XMMRegister dst,Operand src)1629 void vmovupd(XMMRegister dst, Operand src) { vpd(0x10, dst, xmm0, src); }
vmovupd(YMMRegister dst,Operand src)1630 void vmovupd(YMMRegister dst, Operand src) { vpd(0x10, dst, ymm0, src); }
vmovupd(Operand dst,XMMRegister src)1631 void vmovupd(Operand dst, XMMRegister src) { vpd(0x11, src, xmm0, dst); }
vmovupd(Operand dst,YMMRegister src)1632 void vmovupd(Operand dst, YMMRegister src) { vpd(0x11, src, ymm0, dst); }
vmovmskps(Register dst,XMMRegister src)1633 void vmovmskps(Register dst, XMMRegister src) {
1634 XMMRegister idst = XMMRegister::from_code(dst.code());
1635 vps(0x50, idst, xmm0, src);
1636 }
vmovmskpd(Register dst,XMMRegister src)1637 void vmovmskpd(Register dst, XMMRegister src) {
1638 XMMRegister idst = XMMRegister::from_code(dst.code());
1639 vpd(0x50, idst, xmm0, src);
1640 }
1641 void vpmovmskb(Register dst, XMMRegister src);
vcmpeqss(XMMRegister dst,XMMRegister src)1642 void vcmpeqss(XMMRegister dst, XMMRegister src) {
1643 vss(0xC2, dst, dst, src);
1644 emit(0x00); // EQ == 0
1645 }
vcmpeqsd(XMMRegister dst,XMMRegister src)1646 void vcmpeqsd(XMMRegister dst, XMMRegister src) {
1647 vsd(0xC2, dst, dst, src);
1648 emit(0x00); // EQ == 0
1649 }
vcmpps(XMMRegister dst,XMMRegister src1,XMMRegister src2,int8_t cmp)1650 void vcmpps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1651 vps(0xC2, dst, src1, src2);
1652 emit(cmp);
1653 }
vcmpps(YMMRegister dst,YMMRegister src1,YMMRegister src2,int8_t cmp)1654 void vcmpps(YMMRegister dst, YMMRegister src1, YMMRegister src2, int8_t cmp) {
1655 vps(0xC2, dst, src1, src2);
1656 emit(cmp);
1657 }
vcmpps(XMMRegister dst,XMMRegister src1,Operand src2,int8_t cmp)1658 void vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, int8_t cmp) {
1659 vps(0xC2, dst, src1, src2);
1660 emit(cmp);
1661 }
vcmpps(YMMRegister dst,YMMRegister src1,Operand src2,int8_t cmp)1662 void vcmpps(YMMRegister dst, YMMRegister src1, Operand src2, int8_t cmp) {
1663 vps(0xC2, dst, src1, src2);
1664 emit(cmp);
1665 }
vcmppd(XMMRegister dst,XMMRegister src1,XMMRegister src2,int8_t cmp)1666 void vcmppd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1667 vpd(0xC2, dst, src1, src2);
1668 emit(cmp);
1669 }
vcmppd(YMMRegister dst,YMMRegister src1,YMMRegister src2,int8_t cmp)1670 void vcmppd(YMMRegister dst, YMMRegister src1, YMMRegister src2, int8_t cmp) {
1671 vpd(0xC2, dst, src1, src2);
1672 emit(cmp);
1673 }
vcmppd(XMMRegister dst,XMMRegister src1,Operand src2,int8_t cmp)1674 void vcmppd(XMMRegister dst, XMMRegister src1, Operand src2, int8_t cmp) {
1675 vpd(0xC2, dst, src1, src2);
1676 emit(cmp);
1677 }
vcmppd(YMMRegister dst,YMMRegister src1,Operand src2,int8_t cmp)1678 void vcmppd(YMMRegister dst, YMMRegister src1, Operand src2, int8_t cmp) {
1679 vpd(0xC2, dst, src1, src2);
1680 emit(cmp);
1681 }
1682 #define AVX_CMP_P(instr, imm8, SIMDRegister) \
1683 void instr##ps(SIMDRegister dst, SIMDRegister src1, SIMDRegister src2) { \
1684 vcmpps(dst, src1, src2, imm8); \
1685 } \
1686 void instr##ps(SIMDRegister dst, SIMDRegister src1, Operand src2) { \
1687 vcmpps(dst, src1, src2, imm8); \
1688 } \
1689 void instr##pd(SIMDRegister dst, SIMDRegister src1, SIMDRegister src2) { \
1690 vcmppd(dst, src1, src2, imm8); \
1691 } \
1692 void instr##pd(SIMDRegister dst, SIMDRegister src1, Operand src2) { \
1693 vcmppd(dst, src1, src2, imm8); \
1694 }
1695
1696 AVX_CMP_P(vcmpeq, 0x0, XMMRegister)
1697 AVX_CMP_P(vcmpeq, 0x0, YMMRegister)
1698 AVX_CMP_P(vcmplt, 0x1, XMMRegister)
1699 AVX_CMP_P(vcmplt, 0x1, YMMRegister)
1700 AVX_CMP_P(vcmple, 0x2, XMMRegister)
1701 AVX_CMP_P(vcmple, 0x2, YMMRegister)
1702 AVX_CMP_P(vcmpunord, 0x3, XMMRegister)
1703 AVX_CMP_P(vcmpunord, 0x3, YMMRegister)
1704 AVX_CMP_P(vcmpneq, 0x4, XMMRegister)
1705 AVX_CMP_P(vcmpneq, 0x4, YMMRegister)
1706 AVX_CMP_P(vcmpnlt, 0x5, XMMRegister)
1707 AVX_CMP_P(vcmpnlt, 0x5, YMMRegister)
1708 AVX_CMP_P(vcmpnle, 0x6, XMMRegister)
1709 AVX_CMP_P(vcmpnle, 0x6, YMMRegister)
1710 AVX_CMP_P(vcmpge, 0xd, XMMRegister)
1711 AVX_CMP_P(vcmpge, 0xd, YMMRegister)
1712
1713 #undef AVX_CMP_P
1714
vlddqu(XMMRegister dst,Operand src)1715 void vlddqu(XMMRegister dst, Operand src) {
1716 vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG);
1717 }
vinsertps(XMMRegister dst,XMMRegister src1,XMMRegister src2,byte imm8)1718 void vinsertps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1719 byte imm8) {
1720 vinstr(0x21, dst, src1, src2, k66, k0F3A, kWIG);
1721 emit(imm8);
1722 }
vinsertps(XMMRegister dst,XMMRegister src1,Operand src2,byte imm8)1723 void vinsertps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8) {
1724 vinstr(0x21, dst, src1, src2, k66, k0F3A, kWIG);
1725 emit(imm8);
1726 }
vpextrq(Register dst,XMMRegister src,int8_t imm8)1727 void vpextrq(Register dst, XMMRegister src, int8_t imm8) {
1728 XMMRegister idst = XMMRegister::from_code(dst.code());
1729 vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW1);
1730 emit(imm8);
1731 }
vpinsrb(XMMRegister dst,XMMRegister src1,Register src2,uint8_t imm8)1732 void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
1733 XMMRegister isrc = XMMRegister::from_code(src2.code());
1734 vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0);
1735 emit(imm8);
1736 }
vpinsrb(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t imm8)1737 void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1738 vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0);
1739 emit(imm8);
1740 }
vpinsrw(XMMRegister dst,XMMRegister src1,Register src2,uint8_t imm8)1741 void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
1742 XMMRegister isrc = XMMRegister::from_code(src2.code());
1743 vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0);
1744 emit(imm8);
1745 }
vpinsrw(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t imm8)1746 void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1747 vinstr(0xc4, dst, src1, src2, k66, k0F, kW0);
1748 emit(imm8);
1749 }
vpinsrd(XMMRegister dst,XMMRegister src1,Register src2,uint8_t imm8)1750 void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
1751 XMMRegister isrc = XMMRegister::from_code(src2.code());
1752 vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0);
1753 emit(imm8);
1754 }
vpinsrd(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t imm8)1755 void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1756 vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0);
1757 emit(imm8);
1758 }
vpinsrq(XMMRegister dst,XMMRegister src1,Register src2,uint8_t imm8)1759 void vpinsrq(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
1760 XMMRegister isrc = XMMRegister::from_code(src2.code());
1761 vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW1);
1762 emit(imm8);
1763 }
vpinsrq(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t imm8)1764 void vpinsrq(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1765 vinstr(0x22, dst, src1, src2, k66, k0F3A, kW1);
1766 emit(imm8);
1767 }
1768
vpshufd(XMMRegister dst,XMMRegister src,uint8_t imm8)1769 void vpshufd(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1770 vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
1771 emit(imm8);
1772 }
vpshufd(YMMRegister dst,YMMRegister src,uint8_t imm8)1773 void vpshufd(YMMRegister dst, YMMRegister src, uint8_t imm8) {
1774 vinstr(0x70, dst, ymm0, src, k66, k0F, kWIG);
1775 emit(imm8);
1776 }
vpshufd(XMMRegister dst,Operand src,uint8_t imm8)1777 void vpshufd(XMMRegister dst, Operand src, uint8_t imm8) {
1778 vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
1779 emit(imm8);
1780 }
vpshufd(YMMRegister dst,Operand src,uint8_t imm8)1781 void vpshufd(YMMRegister dst, Operand src, uint8_t imm8) {
1782 vinstr(0x70, dst, ymm0, src, k66, k0F, kWIG);
1783 emit(imm8);
1784 }
vpshuflw(XMMRegister dst,XMMRegister src,uint8_t imm8)1785 void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1786 vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
1787 emit(imm8);
1788 }
vpshuflw(YMMRegister dst,YMMRegister src,uint8_t imm8)1789 void vpshuflw(YMMRegister dst, YMMRegister src, uint8_t imm8) {
1790 vinstr(0x70, dst, ymm0, src, kF2, k0F, kWIG);
1791 emit(imm8);
1792 }
vpshuflw(XMMRegister dst,Operand src,uint8_t imm8)1793 void vpshuflw(XMMRegister dst, Operand src, uint8_t imm8) {
1794 vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
1795 emit(imm8);
1796 }
vpshuflw(YMMRegister dst,Operand src,uint8_t imm8)1797 void vpshuflw(YMMRegister dst, Operand src, uint8_t imm8) {
1798 vinstr(0x70, dst, ymm0, src, kF2, k0F, kWIG);
1799 emit(imm8);
1800 }
vpshufhw(XMMRegister dst,XMMRegister src,uint8_t imm8)1801 void vpshufhw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1802 vinstr(0x70, dst, xmm0, src, kF3, k0F, kWIG);
1803 emit(imm8);
1804 }
vpshufhw(YMMRegister dst,YMMRegister src,uint8_t imm8)1805 void vpshufhw(YMMRegister dst, YMMRegister src, uint8_t imm8) {
1806 vinstr(0x70, dst, ymm0, src, kF3, k0F, kWIG);
1807 emit(imm8);
1808 }
vpshufhw(XMMRegister dst,Operand src,uint8_t imm8)1809 void vpshufhw(XMMRegister dst, Operand src, uint8_t imm8) {
1810 vinstr(0x70, dst, xmm0, src, kF3, k0F, kWIG);
1811 emit(imm8);
1812 }
vpshufhw(YMMRegister dst,Operand src,uint8_t imm8)1813 void vpshufhw(YMMRegister dst, Operand src, uint8_t imm8) {
1814 vinstr(0x70, dst, ymm0, src, kF3, k0F, kWIG);
1815 emit(imm8);
1816 }
1817
vpblendw(XMMRegister dst,XMMRegister src1,XMMRegister src2,uint8_t mask)1818 void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1819 uint8_t mask) {
1820 vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
1821 emit(mask);
1822 }
vpblendw(YMMRegister dst,YMMRegister src1,YMMRegister src2,uint8_t mask)1823 void vpblendw(YMMRegister dst, YMMRegister src1, YMMRegister src2,
1824 uint8_t mask) {
1825 vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
1826 emit(mask);
1827 }
vpblendw(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t mask)1828 void vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask) {
1829 vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
1830 emit(mask);
1831 }
vpblendw(YMMRegister dst,YMMRegister src1,Operand src2,uint8_t mask)1832 void vpblendw(YMMRegister dst, YMMRegister src1, Operand src2, uint8_t mask) {
1833 vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
1834 emit(mask);
1835 }
1836
vpalignr(XMMRegister dst,XMMRegister src1,XMMRegister src2,uint8_t imm8)1837 void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1838 uint8_t imm8) {
1839 vinstr(0x0F, dst, src1, src2, k66, k0F3A, kWIG);
1840 emit(imm8);
1841 }
vpalignr(YMMRegister dst,YMMRegister src1,YMMRegister src2,uint8_t imm8)1842 void vpalignr(YMMRegister dst, YMMRegister src1, YMMRegister src2,
1843 uint8_t imm8) {
1844 vinstr(0x0F, dst, src1, src2, k66, k0F3A, kWIG);
1845 emit(imm8);
1846 }
vpalignr(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t imm8)1847 void vpalignr(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1848 vinstr(0x0F, dst, src1, src2, k66, k0F3A, kWIG);
1849 emit(imm8);
1850 }
vpalignr(YMMRegister dst,YMMRegister src1,Operand src2,uint8_t imm8)1851 void vpalignr(YMMRegister dst, YMMRegister src1, Operand src2, uint8_t imm8) {
1852 vinstr(0x0F, dst, src1, src2, k66, k0F3A, kWIG);
1853 emit(imm8);
1854 }
1855
1856 void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1857 void vps(byte op, YMMRegister dst, YMMRegister src1, YMMRegister src2);
1858 void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1859 void vps(byte op, YMMRegister dst, YMMRegister src1, Operand src2);
1860 void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1861 byte imm8);
1862 void vps(byte op, YMMRegister dst, YMMRegister src1, YMMRegister src2,
1863 byte imm8);
1864 void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1865 void vpd(byte op, YMMRegister dst, YMMRegister src1, YMMRegister src2);
1866 void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1867 void vpd(byte op, YMMRegister dst, YMMRegister src1, Operand src2);
1868
1869 // AVX2 instructions
1870 #define AVX2_INSTRUCTION(instr, prefix, escape1, escape2, opcode) \
1871 template <typename Reg, typename Op> \
1872 void instr(Reg dst, Op src) { \
1873 vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0, \
1874 AVX2); \
1875 }
AVX2_BROADCAST_LIST(AVX2_INSTRUCTION)1876 AVX2_BROADCAST_LIST(AVX2_INSTRUCTION)
1877 #undef AVX2_INSTRUCTION
1878
1879 // BMI instruction
1880 void andnq(Register dst, Register src1, Register src2) {
1881 bmi1q(0xf2, dst, src1, src2);
1882 }
andnq(Register dst,Register src1,Operand src2)1883 void andnq(Register dst, Register src1, Operand src2) {
1884 bmi1q(0xf2, dst, src1, src2);
1885 }
andnl(Register dst,Register src1,Register src2)1886 void andnl(Register dst, Register src1, Register src2) {
1887 bmi1l(0xf2, dst, src1, src2);
1888 }
andnl(Register dst,Register src1,Operand src2)1889 void andnl(Register dst, Register src1, Operand src2) {
1890 bmi1l(0xf2, dst, src1, src2);
1891 }
bextrq(Register dst,Register src1,Register src2)1892 void bextrq(Register dst, Register src1, Register src2) {
1893 bmi1q(0xf7, dst, src2, src1);
1894 }
bextrq(Register dst,Operand src1,Register src2)1895 void bextrq(Register dst, Operand src1, Register src2) {
1896 bmi1q(0xf7, dst, src2, src1);
1897 }
bextrl(Register dst,Register src1,Register src2)1898 void bextrl(Register dst, Register src1, Register src2) {
1899 bmi1l(0xf7, dst, src2, src1);
1900 }
bextrl(Register dst,Operand src1,Register src2)1901 void bextrl(Register dst, Operand src1, Register src2) {
1902 bmi1l(0xf7, dst, src2, src1);
1903 }
blsiq(Register dst,Register src)1904 void blsiq(Register dst, Register src) { bmi1q(0xf3, rbx, dst, src); }
blsiq(Register dst,Operand src)1905 void blsiq(Register dst, Operand src) { bmi1q(0xf3, rbx, dst, src); }
blsil(Register dst,Register src)1906 void blsil(Register dst, Register src) { bmi1l(0xf3, rbx, dst, src); }
blsil(Register dst,Operand src)1907 void blsil(Register dst, Operand src) { bmi1l(0xf3, rbx, dst, src); }
blsmskq(Register dst,Register src)1908 void blsmskq(Register dst, Register src) { bmi1q(0xf3, rdx, dst, src); }
blsmskq(Register dst,Operand src)1909 void blsmskq(Register dst, Operand src) { bmi1q(0xf3, rdx, dst, src); }
blsmskl(Register dst,Register src)1910 void blsmskl(Register dst, Register src) { bmi1l(0xf3, rdx, dst, src); }
blsmskl(Register dst,Operand src)1911 void blsmskl(Register dst, Operand src) { bmi1l(0xf3, rdx, dst, src); }
blsrq(Register dst,Register src)1912 void blsrq(Register dst, Register src) { bmi1q(0xf3, rcx, dst, src); }
blsrq(Register dst,Operand src)1913 void blsrq(Register dst, Operand src) { bmi1q(0xf3, rcx, dst, src); }
blsrl(Register dst,Register src)1914 void blsrl(Register dst, Register src) { bmi1l(0xf3, rcx, dst, src); }
blsrl(Register dst,Operand src)1915 void blsrl(Register dst, Operand src) { bmi1l(0xf3, rcx, dst, src); }
1916 void tzcntq(Register dst, Register src);
1917 void tzcntq(Register dst, Operand src);
1918 void tzcntl(Register dst, Register src);
1919 void tzcntl(Register dst, Operand src);
1920
1921 void lzcntq(Register dst, Register src);
1922 void lzcntq(Register dst, Operand src);
1923 void lzcntl(Register dst, Register src);
1924 void lzcntl(Register dst, Operand src);
1925
1926 void popcntq(Register dst, Register src);
1927 void popcntq(Register dst, Operand src);
1928 void popcntl(Register dst, Register src);
1929 void popcntl(Register dst, Operand src);
1930
bzhiq(Register dst,Register src1,Register src2)1931 void bzhiq(Register dst, Register src1, Register src2) {
1932 bmi2q(kNoPrefix, 0xf5, dst, src2, src1);
1933 }
bzhiq(Register dst,Operand src1,Register src2)1934 void bzhiq(Register dst, Operand src1, Register src2) {
1935 bmi2q(kNoPrefix, 0xf5, dst, src2, src1);
1936 }
bzhil(Register dst,Register src1,Register src2)1937 void bzhil(Register dst, Register src1, Register src2) {
1938 bmi2l(kNoPrefix, 0xf5, dst, src2, src1);
1939 }
bzhil(Register dst,Operand src1,Register src2)1940 void bzhil(Register dst, Operand src1, Register src2) {
1941 bmi2l(kNoPrefix, 0xf5, dst, src2, src1);
1942 }
mulxq(Register dst1,Register dst2,Register src)1943 void mulxq(Register dst1, Register dst2, Register src) {
1944 bmi2q(kF2, 0xf6, dst1, dst2, src);
1945 }
mulxq(Register dst1,Register dst2,Operand src)1946 void mulxq(Register dst1, Register dst2, Operand src) {
1947 bmi2q(kF2, 0xf6, dst1, dst2, src);
1948 }
mulxl(Register dst1,Register dst2,Register src)1949 void mulxl(Register dst1, Register dst2, Register src) {
1950 bmi2l(kF2, 0xf6, dst1, dst2, src);
1951 }
mulxl(Register dst1,Register dst2,Operand src)1952 void mulxl(Register dst1, Register dst2, Operand src) {
1953 bmi2l(kF2, 0xf6, dst1, dst2, src);
1954 }
pdepq(Register dst,Register src1,Register src2)1955 void pdepq(Register dst, Register src1, Register src2) {
1956 bmi2q(kF2, 0xf5, dst, src1, src2);
1957 }
pdepq(Register dst,Register src1,Operand src2)1958 void pdepq(Register dst, Register src1, Operand src2) {
1959 bmi2q(kF2, 0xf5, dst, src1, src2);
1960 }
pdepl(Register dst,Register src1,Register src2)1961 void pdepl(Register dst, Register src1, Register src2) {
1962 bmi2l(kF2, 0xf5, dst, src1, src2);
1963 }
pdepl(Register dst,Register src1,Operand src2)1964 void pdepl(Register dst, Register src1, Operand src2) {
1965 bmi2l(kF2, 0xf5, dst, src1, src2);
1966 }
pextq(Register dst,Register src1,Register src2)1967 void pextq(Register dst, Register src1, Register src2) {
1968 bmi2q(kF3, 0xf5, dst, src1, src2);
1969 }
pextq(Register dst,Register src1,Operand src2)1970 void pextq(Register dst, Register src1, Operand src2) {
1971 bmi2q(kF3, 0xf5, dst, src1, src2);
1972 }
pextl(Register dst,Register src1,Register src2)1973 void pextl(Register dst, Register src1, Register src2) {
1974 bmi2l(kF3, 0xf5, dst, src1, src2);
1975 }
pextl(Register dst,Register src1,Operand src2)1976 void pextl(Register dst, Register src1, Operand src2) {
1977 bmi2l(kF3, 0xf5, dst, src1, src2);
1978 }
sarxq(Register dst,Register src1,Register src2)1979 void sarxq(Register dst, Register src1, Register src2) {
1980 bmi2q(kF3, 0xf7, dst, src2, src1);
1981 }
sarxq(Register dst,Operand src1,Register src2)1982 void sarxq(Register dst, Operand src1, Register src2) {
1983 bmi2q(kF3, 0xf7, dst, src2, src1);
1984 }
sarxl(Register dst,Register src1,Register src2)1985 void sarxl(Register dst, Register src1, Register src2) {
1986 bmi2l(kF3, 0xf7, dst, src2, src1);
1987 }
sarxl(Register dst,Operand src1,Register src2)1988 void sarxl(Register dst, Operand src1, Register src2) {
1989 bmi2l(kF3, 0xf7, dst, src2, src1);
1990 }
shlxq(Register dst,Register src1,Register src2)1991 void shlxq(Register dst, Register src1, Register src2) {
1992 bmi2q(k66, 0xf7, dst, src2, src1);
1993 }
shlxq(Register dst,Operand src1,Register src2)1994 void shlxq(Register dst, Operand src1, Register src2) {
1995 bmi2q(k66, 0xf7, dst, src2, src1);
1996 }
shlxl(Register dst,Register src1,Register src2)1997 void shlxl(Register dst, Register src1, Register src2) {
1998 bmi2l(k66, 0xf7, dst, src2, src1);
1999 }
shlxl(Register dst,Operand src1,Register src2)2000 void shlxl(Register dst, Operand src1, Register src2) {
2001 bmi2l(k66, 0xf7, dst, src2, src1);
2002 }
shrxq(Register dst,Register src1,Register src2)2003 void shrxq(Register dst, Register src1, Register src2) {
2004 bmi2q(kF2, 0xf7, dst, src2, src1);
2005 }
shrxq(Register dst,Operand src1,Register src2)2006 void shrxq(Register dst, Operand src1, Register src2) {
2007 bmi2q(kF2, 0xf7, dst, src2, src1);
2008 }
shrxl(Register dst,Register src1,Register src2)2009 void shrxl(Register dst, Register src1, Register src2) {
2010 bmi2l(kF2, 0xf7, dst, src2, src1);
2011 }
shrxl(Register dst,Operand src1,Register src2)2012 void shrxl(Register dst, Operand src1, Register src2) {
2013 bmi2l(kF2, 0xf7, dst, src2, src1);
2014 }
2015 void rorxq(Register dst, Register src, byte imm8);
2016 void rorxq(Register dst, Operand src, byte imm8);
2017 void rorxl(Register dst, Register src, byte imm8);
2018 void rorxl(Register dst, Operand src, byte imm8);
2019
2020 void mfence();
2021 void lfence();
2022 void pause();
2023
2024 // Check the code size generated from label to here.
SizeOfCodeGeneratedSince(Label * label)2025 int SizeOfCodeGeneratedSince(Label* label) {
2026 return pc_offset() - label->pos();
2027 }
2028
2029 // Record a deoptimization reason that can be used by a log or cpu profiler.
2030 // Use --trace-deopt to enable.
2031 void RecordDeoptReason(DeoptimizeReason reason, uint32_t node_id,
2032 SourcePosition position, int id);
2033
2034 // Writes a single word of data in the code stream.
2035 // Used for inline tables, e.g., jump-tables.
2036 void db(uint8_t data);
2037 void dd(uint32_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO);
2038 void dq(uint64_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO);
2039 void dp(uintptr_t data, RelocInfo::Mode rmode = RelocInfo::NO_INFO) {
2040 dq(data, rmode);
2041 }
2042 void dq(Label* label);
2043
2044 // Patch entries for partial constant pool.
2045 void PatchConstPool();
2046
2047 // Check if use partial constant pool for this rmode.
2048 static bool UseConstPoolFor(RelocInfo::Mode rmode);
2049
2050 // Check if there is less than kGap bytes available in the buffer.
2051 // If this is the case, we need to grow the buffer before emitting
2052 // an instruction or relocation information.
buffer_overflow()2053 inline bool buffer_overflow() const {
2054 return pc_ >= reloc_info_writer.pos() - kGap;
2055 }
2056
2057 // Get the number of bytes available in the buffer.
available_space()2058 inline int available_space() const {
2059 return static_cast<int>(reloc_info_writer.pos() - pc_);
2060 }
2061
2062 static bool IsNop(Address addr);
2063
2064 // Avoid overflows for displacements etc.
2065 static constexpr int kMaximalBufferSize = 512 * MB;
2066
byte_at(int pos)2067 byte byte_at(int pos) { return buffer_start_[pos]; }
set_byte_at(int pos,byte value)2068 void set_byte_at(int pos, byte value) { buffer_start_[pos] = value; }
2069
2070 #if defined(V8_OS_WIN_X64)
2071 win64_unwindinfo::BuiltinUnwindInfo GetUnwindInfo() const;
2072 #endif
2073
2074 protected:
2075 // Call near indirect
2076 void call(Operand operand);
2077
2078 private:
addr_at(int pos)2079 Address addr_at(int pos) {
2080 return reinterpret_cast<Address>(buffer_start_ + pos);
2081 }
long_at(int pos)2082 uint32_t long_at(int pos) {
2083 return ReadUnalignedValue<uint32_t>(addr_at(pos));
2084 }
long_at_put(int pos,uint32_t x)2085 void long_at_put(int pos, uint32_t x) {
2086 WriteUnalignedValue(addr_at(pos), x);
2087 }
2088
2089 // code emission
2090 void GrowBuffer();
2091
emit(byte x)2092 void emit(byte x) { *pc_++ = x; }
2093 inline void emitl(uint32_t x);
2094 inline void emitq(uint64_t x);
2095 inline void emitw(uint16_t x);
2096 inline void emit_runtime_entry(Address entry, RelocInfo::Mode rmode);
2097 inline void emit(Immediate x);
2098 inline void emit(Immediate64 x);
2099
2100 // Emits a REX prefix that encodes a 64-bit operand size and
2101 // the top bit of both register codes.
2102 // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2103 // REX.W is set.
2104 inline void emit_rex_64(XMMRegister reg, Register rm_reg);
2105 inline void emit_rex_64(Register reg, XMMRegister rm_reg);
2106 inline void emit_rex_64(Register reg, Register rm_reg);
2107 inline void emit_rex_64(XMMRegister reg, XMMRegister rm_reg);
2108
2109 // Emits a REX prefix that encodes a 64-bit operand size and
2110 // the top bit of the destination, index, and base register codes.
2111 // The high bit of reg is used for REX.R, the high bit of op's base
2112 // register is used for REX.B, and the high bit of op's index register
2113 // is used for REX.X. REX.W is set.
2114 inline void emit_rex_64(Register reg, Operand op);
2115 inline void emit_rex_64(XMMRegister reg, Operand op);
2116
2117 // Emits a REX prefix that encodes a 64-bit operand size and
2118 // the top bit of the register code.
2119 // The high bit of register is used for REX.B.
2120 // REX.W is set and REX.R and REX.X are clear.
2121 inline void emit_rex_64(Register rm_reg);
2122
2123 // Emits a REX prefix that encodes a 64-bit operand size and
2124 // the top bit of the index and base register codes.
2125 // The high bit of op's base register is used for REX.B, and the high
2126 // bit of op's index register is used for REX.X.
2127 // REX.W is set and REX.R clear.
2128 inline void emit_rex_64(Operand op);
2129
2130 // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size.
emit_rex_64()2131 void emit_rex_64() { emit(0x48); }
2132
2133 // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2134 // REX.W is clear.
2135 inline void emit_rex_32(Register reg, Register rm_reg);
2136
2137 // The high bit of reg is used for REX.R, the high bit of op's base
2138 // register is used for REX.B, and the high bit of op's index register
2139 // is used for REX.X. REX.W is cleared.
2140 inline void emit_rex_32(Register reg, Operand op);
2141
2142 // High bit of rm_reg goes to REX.B.
2143 // REX.W, REX.R and REX.X are clear.
2144 inline void emit_rex_32(Register rm_reg);
2145
2146 // High bit of base goes to REX.B and high bit of index to REX.X.
2147 // REX.W and REX.R are clear.
2148 inline void emit_rex_32(Operand op);
2149
2150 // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2151 // REX.W is cleared. If no REX bits are set, no byte is emitted.
2152 inline void emit_optional_rex_32(Register reg, Register rm_reg);
2153
2154 // The high bit of reg is used for REX.R, the high bit of op's base
2155 // register is used for REX.B, and the high bit of op's index register
2156 // is used for REX.X. REX.W is cleared. If no REX bits are set, nothing
2157 // is emitted.
2158 inline void emit_optional_rex_32(Register reg, Operand op);
2159
2160 // As for emit_optional_rex_32(Register, Register), except that
2161 // the registers are XMM registers.
2162 inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base);
2163
2164 // As for emit_optional_rex_32(Register, Register), except that
2165 // one of the registers is an XMM registers.
2166 inline void emit_optional_rex_32(XMMRegister reg, Register base);
2167
2168 // As for emit_optional_rex_32(Register, Register), except that
2169 // one of the registers is an XMM registers.
2170 inline void emit_optional_rex_32(Register reg, XMMRegister base);
2171
2172 // As for emit_optional_rex_32(Register, Operand), except that
2173 // the register is an XMM register.
2174 inline void emit_optional_rex_32(XMMRegister reg, Operand op);
2175
2176 // Optionally do as emit_rex_32(Register) if the register number has
2177 // the high bit set.
2178 inline void emit_optional_rex_32(Register rm_reg);
2179 inline void emit_optional_rex_32(XMMRegister rm_reg);
2180
2181 // Optionally do as emit_rex_32(Operand) if the operand register
2182 // numbers have a high bit set.
2183 inline void emit_optional_rex_32(Operand op);
2184
2185 // Calls emit_rex_32(Register) for all non-byte registers.
2186 inline void emit_optional_rex_8(Register reg);
2187
2188 // Calls emit_rex_32(Register, Operand) for all non-byte registers, and
2189 // emit_optional_rex_32(Register, Operand) for byte registers.
2190 inline void emit_optional_rex_8(Register reg, Operand op);
2191
emit_rex(int size)2192 void emit_rex(int size) {
2193 if (size == kInt64Size) {
2194 emit_rex_64();
2195 } else {
2196 DCHECK_EQ(size, kInt32Size);
2197 }
2198 }
2199
2200 template <class P1>
emit_rex(P1 p1,int size)2201 void emit_rex(P1 p1, int size) {
2202 if (size == kInt64Size) {
2203 emit_rex_64(p1);
2204 } else {
2205 DCHECK_EQ(size, kInt32Size);
2206 emit_optional_rex_32(p1);
2207 }
2208 }
2209
2210 template <class P1, class P2>
emit_rex(P1 p1,P2 p2,int size)2211 void emit_rex(P1 p1, P2 p2, int size) {
2212 if (size == kInt64Size) {
2213 emit_rex_64(p1, p2);
2214 } else {
2215 DCHECK_EQ(size, kInt32Size);
2216 emit_optional_rex_32(p1, p2);
2217 }
2218 }
2219
2220 // Emit vex prefix
emit_vex2_byte0()2221 void emit_vex2_byte0() { emit(0xc5); }
2222 inline void emit_vex2_byte1(XMMRegister reg, XMMRegister v, VectorLength l,
2223 SIMDPrefix pp);
emit_vex3_byte0()2224 void emit_vex3_byte0() { emit(0xc4); }
2225 inline void emit_vex3_byte1(XMMRegister reg, XMMRegister rm, LeadingOpcode m);
2226 inline void emit_vex3_byte1(XMMRegister reg, Operand rm, LeadingOpcode m);
2227 inline void emit_vex3_byte2(VexW w, XMMRegister v, VectorLength l,
2228 SIMDPrefix pp);
2229 inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, XMMRegister rm,
2230 VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2231 VexW w);
2232 inline void emit_vex_prefix(Register reg, Register v, Register rm,
2233 VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2234 VexW w);
2235 inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, Operand rm,
2236 VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2237 VexW w);
2238 inline void emit_vex_prefix(Register reg, Register v, Operand rm,
2239 VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2240 VexW w);
2241
2242 // Emit the ModR/M byte, and optionally the SIB byte and
2243 // 1- or 4-byte offset for a memory operand. Also encodes
2244 // the second operand of the operation, a register or operation
2245 // subcode, into the reg field of the ModR/M byte.
emit_operand(Register reg,Operand adr)2246 void emit_operand(Register reg, Operand adr) {
2247 emit_operand(reg.low_bits(), adr);
2248 }
2249
2250 // Emit the ModR/M byte, and optionally the SIB byte and
2251 // 1- or 4-byte offset for a memory operand. Also used to encode
2252 // a three-bit opcode extension into the ModR/M byte.
2253 void emit_operand(int rm, Operand adr);
2254
2255 // Emit a ModR/M byte with registers coded in the reg and rm_reg fields.
emit_modrm(Register reg,Register rm_reg)2256 void emit_modrm(Register reg, Register rm_reg) {
2257 emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits());
2258 }
2259
2260 // Emit a ModR/M byte with an operation subcode in the reg field and
2261 // a register in the rm_reg field.
emit_modrm(int code,Register rm_reg)2262 void emit_modrm(int code, Register rm_reg) {
2263 DCHECK(is_uint3(code));
2264 emit(0xC0 | code << 3 | rm_reg.low_bits());
2265 }
2266
2267 // Emit the code-object-relative offset of the label's position
2268 inline void emit_code_relative_offset(Label* label);
2269
2270 // The first argument is the reg field, the second argument is the r/m field.
2271 void emit_sse_operand(XMMRegister dst, XMMRegister src);
2272 void emit_sse_operand(XMMRegister reg, Operand adr);
2273 void emit_sse_operand(Register reg, Operand adr);
2274 void emit_sse_operand(XMMRegister dst, Register src);
2275 void emit_sse_operand(Register dst, XMMRegister src);
2276 void emit_sse_operand(XMMRegister dst);
2277
2278 // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
2279 // AND, OR, XOR, or CMP. The encodings of these operations are all
2280 // similar, differing just in the opcode or in the reg field of the
2281 // ModR/M byte.
2282 void arithmetic_op_8(byte opcode, Register reg, Register rm_reg);
2283 void arithmetic_op_8(byte opcode, Register reg, Operand rm_reg);
2284 void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
2285 void arithmetic_op_16(byte opcode, Register reg, Operand rm_reg);
2286 // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2287 void arithmetic_op(byte opcode, Register reg, Register rm_reg, int size);
2288 void arithmetic_op(byte opcode, Register reg, Operand rm_reg, int size);
2289 // Operate on a byte in memory or register.
2290 void immediate_arithmetic_op_8(byte subcode, Register dst, Immediate src);
2291 void immediate_arithmetic_op_8(byte subcode, Operand dst, Immediate src);
2292 // Operate on a word in memory or register.
2293 void immediate_arithmetic_op_16(byte subcode, Register dst, Immediate src);
2294 void immediate_arithmetic_op_16(byte subcode, Operand dst, Immediate src);
2295 // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2296 void immediate_arithmetic_op(byte subcode, Register dst, Immediate src,
2297 int size);
2298 void immediate_arithmetic_op(byte subcode, Operand dst, Immediate src,
2299 int size);
2300
2301 // Emit machine code for a shift operation.
2302 void shift(Operand dst, Immediate shift_amount, int subcode, int size);
2303 void shift(Register dst, Immediate shift_amount, int subcode, int size);
2304 // Shift dst by cl % 64 bits.
2305 void shift(Register dst, int subcode, int size);
2306 void shift(Operand dst, int subcode, int size);
2307
2308 void emit_farith(int b1, int b2, int i);
2309
2310 // labels
2311 // void print(Label* L);
2312 void bind_to(Label* L, int pos);
2313
2314 // record reloc info for current pc_
2315 void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
2316
2317 // Arithmetics
emit_add(Register dst,Register src,int size)2318 void emit_add(Register dst, Register src, int size) {
2319 arithmetic_op(0x03, dst, src, size);
2320 }
2321
emit_add(Register dst,Immediate src,int size)2322 void emit_add(Register dst, Immediate src, int size) {
2323 immediate_arithmetic_op(0x0, dst, src, size);
2324 }
2325
emit_add(Register dst,Operand src,int size)2326 void emit_add(Register dst, Operand src, int size) {
2327 arithmetic_op(0x03, dst, src, size);
2328 }
2329
emit_add(Operand dst,Register src,int size)2330 void emit_add(Operand dst, Register src, int size) {
2331 arithmetic_op(0x1, src, dst, size);
2332 }
2333
emit_add(Operand dst,Immediate src,int size)2334 void emit_add(Operand dst, Immediate src, int size) {
2335 immediate_arithmetic_op(0x0, dst, src, size);
2336 }
2337
emit_and(Register dst,Register src,int size)2338 void emit_and(Register dst, Register src, int size) {
2339 arithmetic_op(0x23, dst, src, size);
2340 }
2341
emit_and(Register dst,Operand src,int size)2342 void emit_and(Register dst, Operand src, int size) {
2343 arithmetic_op(0x23, dst, src, size);
2344 }
2345
emit_and(Operand dst,Register src,int size)2346 void emit_and(Operand dst, Register src, int size) {
2347 arithmetic_op(0x21, src, dst, size);
2348 }
2349
emit_and(Register dst,Immediate src,int size)2350 void emit_and(Register dst, Immediate src, int size) {
2351 immediate_arithmetic_op(0x4, dst, src, size);
2352 }
2353
emit_and(Operand dst,Immediate src,int size)2354 void emit_and(Operand dst, Immediate src, int size) {
2355 immediate_arithmetic_op(0x4, dst, src, size);
2356 }
2357
emit_cmp(Register dst,Register src,int size)2358 void emit_cmp(Register dst, Register src, int size) {
2359 arithmetic_op(0x3B, dst, src, size);
2360 }
2361
emit_cmp(Register dst,Operand src,int size)2362 void emit_cmp(Register dst, Operand src, int size) {
2363 arithmetic_op(0x3B, dst, src, size);
2364 }
2365
emit_cmp(Operand dst,Register src,int size)2366 void emit_cmp(Operand dst, Register src, int size) {
2367 arithmetic_op(0x39, src, dst, size);
2368 }
2369
emit_cmp(Register dst,Immediate src,int size)2370 void emit_cmp(Register dst, Immediate src, int size) {
2371 immediate_arithmetic_op(0x7, dst, src, size);
2372 }
2373
emit_cmp(Operand dst,Immediate src,int size)2374 void emit_cmp(Operand dst, Immediate src, int size) {
2375 immediate_arithmetic_op(0x7, dst, src, size);
2376 }
2377
2378 // Compare {al,ax,eax,rax} with src. If equal, set ZF and write dst into
2379 // src. Otherwise clear ZF and write src into {al,ax,eax,rax}. This
2380 // operation is only atomic if prefixed by the lock instruction.
2381 void emit_cmpxchg(Operand dst, Register src, int size);
2382
2383 void emit_dec(Register dst, int size);
2384 void emit_dec(Operand dst, int size);
2385
2386 // Divide rdx:rax by src. Quotient in rax, remainder in rdx when size is 64.
2387 // Divide edx:eax by lower 32 bits of src. Quotient in eax, remainder in edx
2388 // when size is 32.
2389 void emit_idiv(Register src, int size);
2390 void emit_div(Register src, int size);
2391
2392 // Signed multiply instructions.
2393 // rdx:rax = rax * src when size is 64 or edx:eax = eax * src when size is 32.
2394 void emit_imul(Register src, int size);
2395 void emit_imul(Operand src, int size);
2396 void emit_imul(Register dst, Register src, int size);
2397 void emit_imul(Register dst, Operand src, int size);
2398 void emit_imul(Register dst, Register src, Immediate imm, int size);
2399 void emit_imul(Register dst, Operand src, Immediate imm, int size);
2400
2401 void emit_inc(Register dst, int size);
2402 void emit_inc(Operand dst, int size);
2403
2404 void emit_lea(Register dst, Operand src, int size);
2405
2406 void emit_mov(Register dst, Operand src, int size);
2407 void emit_mov(Register dst, Register src, int size);
2408 void emit_mov(Operand dst, Register src, int size);
2409 void emit_mov(Register dst, Immediate value, int size);
2410 void emit_mov(Operand dst, Immediate value, int size);
2411 void emit_mov(Register dst, Immediate64 value, int size);
2412
2413 void emit_movzxb(Register dst, Operand src, int size);
2414 void emit_movzxb(Register dst, Register src, int size);
2415 void emit_movzxw(Register dst, Operand src, int size);
2416 void emit_movzxw(Register dst, Register src, int size);
2417
2418 void emit_neg(Register dst, int size);
2419 void emit_neg(Operand dst, int size);
2420
2421 void emit_not(Register dst, int size);
2422 void emit_not(Operand dst, int size);
2423
emit_or(Register dst,Register src,int size)2424 void emit_or(Register dst, Register src, int size) {
2425 arithmetic_op(0x0B, dst, src, size);
2426 }
2427
emit_or(Register dst,Operand src,int size)2428 void emit_or(Register dst, Operand src, int size) {
2429 arithmetic_op(0x0B, dst, src, size);
2430 }
2431
emit_or(Operand dst,Register src,int size)2432 void emit_or(Operand dst, Register src, int size) {
2433 arithmetic_op(0x9, src, dst, size);
2434 }
2435
emit_or(Register dst,Immediate src,int size)2436 void emit_or(Register dst, Immediate src, int size) {
2437 immediate_arithmetic_op(0x1, dst, src, size);
2438 }
2439
emit_or(Operand dst,Immediate src,int size)2440 void emit_or(Operand dst, Immediate src, int size) {
2441 immediate_arithmetic_op(0x1, dst, src, size);
2442 }
2443
2444 void emit_repmovs(int size);
2445
emit_sbb(Register dst,Register src,int size)2446 void emit_sbb(Register dst, Register src, int size) {
2447 arithmetic_op(0x1b, dst, src, size);
2448 }
2449
emit_sub(Register dst,Register src,int size)2450 void emit_sub(Register dst, Register src, int size) {
2451 arithmetic_op(0x2B, dst, src, size);
2452 }
2453
emit_sub(Register dst,Immediate src,int size)2454 void emit_sub(Register dst, Immediate src, int size) {
2455 immediate_arithmetic_op(0x5, dst, src, size);
2456 }
2457
emit_sub(Register dst,Operand src,int size)2458 void emit_sub(Register dst, Operand src, int size) {
2459 arithmetic_op(0x2B, dst, src, size);
2460 }
2461
emit_sub(Operand dst,Register src,int size)2462 void emit_sub(Operand dst, Register src, int size) {
2463 arithmetic_op(0x29, src, dst, size);
2464 }
2465
emit_sub(Operand dst,Immediate src,int size)2466 void emit_sub(Operand dst, Immediate src, int size) {
2467 immediate_arithmetic_op(0x5, dst, src, size);
2468 }
2469
2470 void emit_test(Register dst, Register src, int size);
2471 void emit_test(Register reg, Immediate mask, int size);
2472 void emit_test(Operand op, Register reg, int size);
2473 void emit_test(Operand op, Immediate mask, int size);
emit_test(Register reg,Operand op,int size)2474 void emit_test(Register reg, Operand op, int size) {
2475 return emit_test(op, reg, size);
2476 }
2477
2478 void emit_xchg(Register dst, Register src, int size);
2479 void emit_xchg(Register dst, Operand src, int size);
2480
emit_xor(Register dst,Register src,int size)2481 void emit_xor(Register dst, Register src, int size) {
2482 if (size == kInt64Size && dst.code() == src.code()) {
2483 // 32 bit operations zero the top 32 bits of 64 bit registers. Therefore
2484 // there is no need to make this a 64 bit operation.
2485 arithmetic_op(0x33, dst, src, kInt32Size);
2486 } else {
2487 arithmetic_op(0x33, dst, src, size);
2488 }
2489 }
2490
emit_xor(Register dst,Operand src,int size)2491 void emit_xor(Register dst, Operand src, int size) {
2492 arithmetic_op(0x33, dst, src, size);
2493 }
2494
emit_xor(Register dst,Immediate src,int size)2495 void emit_xor(Register dst, Immediate src, int size) {
2496 immediate_arithmetic_op(0x6, dst, src, size);
2497 }
2498
emit_xor(Operand dst,Immediate src,int size)2499 void emit_xor(Operand dst, Immediate src, int size) {
2500 immediate_arithmetic_op(0x6, dst, src, size);
2501 }
2502
emit_xor(Operand dst,Register src,int size)2503 void emit_xor(Operand dst, Register src, int size) {
2504 arithmetic_op(0x31, src, dst, size);
2505 }
2506
2507 // Most BMI instructions are similar.
2508 void bmi1q(byte op, Register reg, Register vreg, Register rm);
2509 void bmi1q(byte op, Register reg, Register vreg, Operand rm);
2510 void bmi1l(byte op, Register reg, Register vreg, Register rm);
2511 void bmi1l(byte op, Register reg, Register vreg, Operand rm);
2512 void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2513 void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm);
2514 void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2515 void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm);
2516
2517 // record the position of jmp/jcc instruction
2518 void record_farjmp_position(Label* L, int pos);
2519
2520 bool is_optimizable_farjmp(int idx);
2521
2522 void AllocateAndInstallRequestedHeapObjects(Isolate* isolate);
2523
2524 int WriteCodeComments();
2525
2526 friend class EnsureSpace;
2527 friend class RegExpMacroAssemblerX64;
2528
2529 // code generation
2530 RelocInfoWriter reloc_info_writer;
2531
2532 // Internal reference positions, required for (potential) patching in
2533 // GrowBuffer(); contains only those internal references whose labels
2534 // are already bound.
2535 std::deque<int> internal_reference_positions_;
2536
2537 // Variables for this instance of assembler
2538 int farjmp_num_ = 0;
2539 std::deque<int> farjmp_positions_;
2540 std::map<Label*, std::vector<int>> label_farjmp_maps_;
2541
2542 ConstPool constpool_;
2543
2544 friend class ConstPool;
2545
2546 #if defined(V8_OS_WIN_X64)
2547 std::unique_ptr<win64_unwindinfo::XdataEncoder> xdata_encoder_;
2548 #endif
2549 };
2550
2551 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
2552 void Assembler::vinstr(byte op, YMMRegister dst, YMMRegister src1,
2553 YMMRegister src2, SIMDPrefix pp,
2554 LeadingOpcode m, VexW w, CpuFeature feature);
2555 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
2556 void Assembler::vinstr(byte op, YMMRegister dst, XMMRegister src1,
2557 XMMRegister src2, SIMDPrefix pp,
2558 LeadingOpcode m, VexW w, CpuFeature feature);
2559 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
2560 void Assembler::vinstr(byte op, YMMRegister dst, YMMRegister src1,
2561 Operand src2, SIMDPrefix pp, LeadingOpcode m,
2562 VexW w, CpuFeature feature);
2563 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
2564 void Assembler::vinstr(byte op, YMMRegister dst, YMMRegister src1,
2565 XMMRegister src2, SIMDPrefix pp,
2566 LeadingOpcode m, VexW w, CpuFeature feature);
2567 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
2568 void Assembler::vinstr(byte op, YMMRegister dst, XMMRegister src1,
2569 Operand src2, SIMDPrefix pp, LeadingOpcode m,
2570 VexW w, CpuFeature feature);
2571
2572 // Helper class that ensures that there is enough space for generating
2573 // instructions and relocation information. The constructor makes
2574 // sure that there is enough space and (in debug mode) the destructor
2575 // checks that we did not generate too much.
2576 class EnsureSpace {
2577 public:
EnsureSpace(Assembler * assembler)2578 explicit V8_INLINE EnsureSpace(Assembler* assembler) : assembler_(assembler) {
2579 if (V8_UNLIKELY(assembler_->buffer_overflow())) assembler_->GrowBuffer();
2580 #ifdef DEBUG
2581 space_before_ = assembler_->available_space();
2582 #endif
2583 }
2584
2585 #ifdef DEBUG
~EnsureSpace()2586 ~EnsureSpace() {
2587 int bytes_generated = space_before_ - assembler_->available_space();
2588 DCHECK(bytes_generated < assembler_->kGap);
2589 }
2590 #endif
2591
2592 private:
2593 Assembler* const assembler_;
2594 #ifdef DEBUG
2595 int space_before_;
2596 #endif
2597 };
2598
2599 } // namespace internal
2600 } // namespace v8
2601
2602 #endif // V8_CODEGEN_X64_ASSEMBLER_X64_H_
2603