1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 //
11 // - Redistribution in binary form must reproduce the above copyright
12 // notice, this list of conditions and the following disclaimer in the
13 // documentation and/or other materials provided with the distribution.
14 //
15 // - Neither the name of Sun Microsystems or the names of contributors may
16 // be used to endorse or promote products derived from this software without
17 // specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // The original source code covered by the above license above has been
32 // modified significantly by Google Inc.
33 // Copyright 2012 the V8 project authors. All rights reserved.
34
35 // A lightweight X64 Assembler.
36
37 #ifndef V8_CODEGEN_X64_ASSEMBLER_X64_H_
38 #define V8_CODEGEN_X64_ASSEMBLER_X64_H_
39
40 #include <deque>
41 #include <map>
42 #include <memory>
43 #include <vector>
44
45 #include "src/codegen/assembler.h"
46 #include "src/codegen/label.h"
47 #include "src/codegen/x64/constants-x64.h"
48 #include "src/codegen/x64/fma-instr.h"
49 #include "src/codegen/x64/register-x64.h"
50 #include "src/codegen/x64/sse-instr.h"
51 #include "src/objects/smi.h"
52 #if defined(V8_OS_WIN_X64)
53 #include "src/diagnostics/unwinding-info-win64.h"
54 #endif
55
56 namespace v8 {
57 namespace internal {
58
59 class SafepointTableBuilder;
60
61 // Utility functions
62
63 enum Condition {
64 // any value < 0 is considered no_condition
65 no_condition = -1,
66
67 overflow = 0,
68 no_overflow = 1,
69 below = 2,
70 above_equal = 3,
71 equal = 4,
72 not_equal = 5,
73 below_equal = 6,
74 above = 7,
75 negative = 8,
76 positive = 9,
77 parity_even = 10,
78 parity_odd = 11,
79 less = 12,
80 greater_equal = 13,
81 less_equal = 14,
82 greater = 15,
83
84 // Fake conditions that are handled by the
85 // opcodes using them.
86 always = 16,
87 never = 17,
88 // aliases
89 carry = below,
90 not_carry = above_equal,
91 zero = equal,
92 not_zero = not_equal,
93 sign = negative,
94 not_sign = positive,
95 last_condition = greater
96 };
97
98 // Returns the equivalent of !cc.
99 // Negation of the default no_condition (-1) results in a non-default
100 // no_condition value (-2). As long as tests for no_condition check
101 // for condition < 0, this will work as expected.
NegateCondition(Condition cc)102 inline Condition NegateCondition(Condition cc) {
103 return static_cast<Condition>(cc ^ 1);
104 }
105
106 enum RoundingMode {
107 kRoundToNearest = 0x0,
108 kRoundDown = 0x1,
109 kRoundUp = 0x2,
110 kRoundToZero = 0x3
111 };
112
113 // -----------------------------------------------------------------------------
114 // Machine instruction Immediates
115
116 class Immediate {
117 public:
Immediate(int32_t value)118 explicit constexpr Immediate(int32_t value) : value_(value) {}
Immediate(int32_t value,RelocInfo::Mode rmode)119 explicit constexpr Immediate(int32_t value, RelocInfo::Mode rmode)
120 : value_(value), rmode_(rmode) {}
Immediate(Smi value)121 explicit Immediate(Smi value)
122 : value_(static_cast<int32_t>(static_cast<intptr_t>(value.ptr()))) {
123 DCHECK(SmiValuesAre31Bits()); // Only available for 31-bit SMI.
124 }
125
126 private:
127 const int32_t value_;
128 const RelocInfo::Mode rmode_ = RelocInfo::NONE;
129
130 friend class Assembler;
131 };
132 ASSERT_TRIVIALLY_COPYABLE(Immediate);
133 static_assert(sizeof(Immediate) <= kSystemPointerSize,
134 "Immediate must be small enough to pass it by value");
135
136 class Immediate64 {
137 public:
Immediate64(int64_t value)138 explicit constexpr Immediate64(int64_t value) : value_(value) {}
Immediate64(int64_t value,RelocInfo::Mode rmode)139 explicit constexpr Immediate64(int64_t value, RelocInfo::Mode rmode)
140 : value_(value), rmode_(rmode) {}
Immediate64(Address value,RelocInfo::Mode rmode)141 explicit constexpr Immediate64(Address value, RelocInfo::Mode rmode)
142 : value_(static_cast<int64_t>(value)), rmode_(rmode) {}
143
144 private:
145 const int64_t value_;
146 const RelocInfo::Mode rmode_ = RelocInfo::NONE;
147
148 friend class Assembler;
149 };
150
151 // -----------------------------------------------------------------------------
152 // Machine instruction Operands
153
154 enum ScaleFactor : int8_t {
155 times_1 = 0,
156 times_2 = 1,
157 times_4 = 2,
158 times_8 = 3,
159 times_int_size = times_4,
160
161 times_half_system_pointer_size = times_4,
162 times_system_pointer_size = times_8,
163 times_tagged_size = (kTaggedSize == 8) ? times_8 : times_4,
164 };
165
166 class V8_EXPORT_PRIVATE Operand {
167 public:
168 struct Data {
169 byte rex = 0;
170 byte buf[9];
171 byte len = 1; // number of bytes of buf_ in use.
172 int8_t addend; // for rip + offset + addend.
173 };
174
175 // [base + disp/r]
Operand(Register base,int32_t disp)176 V8_INLINE Operand(Register base, int32_t disp) {
177 if (base == rsp || base == r12) {
178 // SIB byte is needed to encode (rsp + offset) or (r12 + offset).
179 set_sib(times_1, rsp, base);
180 }
181
182 if (disp == 0 && base != rbp && base != r13) {
183 set_modrm(0, base);
184 } else if (is_int8(disp)) {
185 set_modrm(1, base);
186 set_disp8(disp);
187 } else {
188 set_modrm(2, base);
189 set_disp32(disp);
190 }
191 }
192
193 // [base + index*scale + disp/r]
Operand(Register base,Register index,ScaleFactor scale,int32_t disp)194 V8_INLINE Operand(Register base, Register index, ScaleFactor scale,
195 int32_t disp) {
196 DCHECK(index != rsp);
197 set_sib(scale, index, base);
198 if (disp == 0 && base != rbp && base != r13) {
199 // This call to set_modrm doesn't overwrite the REX.B (or REX.X) bits
200 // possibly set by set_sib.
201 set_modrm(0, rsp);
202 } else if (is_int8(disp)) {
203 set_modrm(1, rsp);
204 set_disp8(disp);
205 } else {
206 set_modrm(2, rsp);
207 set_disp32(disp);
208 }
209 }
210
211 // [index*scale + disp/r]
Operand(Register index,ScaleFactor scale,int32_t disp)212 V8_INLINE Operand(Register index, ScaleFactor scale, int32_t disp) {
213 DCHECK(index != rsp);
214 set_modrm(0, rsp);
215 set_sib(scale, index, rbp);
216 set_disp32(disp);
217 }
218
219 // Offset from existing memory operand.
220 // Offset is added to existing displacement as 32-bit signed values and
221 // this must not overflow.
222 Operand(Operand base, int32_t offset);
223
224 // [rip + disp/r]
225 V8_INLINE explicit Operand(Label* label, int addend = 0) {
226 data_.addend = addend;
227 DCHECK_NOT_NULL(label);
228 DCHECK(addend == 0 || (is_int8(addend) && label->is_bound()));
229 set_modrm(0, rbp);
230 set_disp64(reinterpret_cast<intptr_t>(label));
231 }
232
233 Operand(const Operand&) V8_NOEXCEPT = default;
234
data()235 const Data& data() const { return data_; }
236
237 // Checks whether either base or index register is the given register.
238 // Does not check the "reg" part of the Operand.
239 bool AddressUsesRegister(Register reg) const;
240
241 private:
set_modrm(int mod,Register rm_reg)242 V8_INLINE void set_modrm(int mod, Register rm_reg) {
243 DCHECK(is_uint2(mod));
244 data_.buf[0] = mod << 6 | rm_reg.low_bits();
245 // Set REX.B to the high bit of rm.code().
246 data_.rex |= rm_reg.high_bit();
247 }
248
set_sib(ScaleFactor scale,Register index,Register base)249 V8_INLINE void set_sib(ScaleFactor scale, Register index, Register base) {
250 DCHECK_EQ(data_.len, 1);
251 DCHECK(is_uint2(scale));
252 // Use SIB with no index register only for base rsp or r12. Otherwise we
253 // would skip the SIB byte entirely.
254 DCHECK(index != rsp || base == rsp || base == r12);
255 data_.buf[1] = (scale << 6) | (index.low_bits() << 3) | base.low_bits();
256 data_.rex |= index.high_bit() << 1 | base.high_bit();
257 data_.len = 2;
258 }
259
set_disp8(int disp)260 V8_INLINE void set_disp8(int disp) {
261 DCHECK(is_int8(disp));
262 DCHECK(data_.len == 1 || data_.len == 2);
263 int8_t* p = reinterpret_cast<int8_t*>(&data_.buf[data_.len]);
264 *p = disp;
265 data_.len += sizeof(int8_t);
266 }
267
set_disp32(int disp)268 V8_INLINE void set_disp32(int disp) {
269 DCHECK(data_.len == 1 || data_.len == 2);
270 Address p = reinterpret_cast<Address>(&data_.buf[data_.len]);
271 WriteUnalignedValue(p, disp);
272 data_.len += sizeof(int32_t);
273 }
274
set_disp64(int64_t disp)275 V8_INLINE void set_disp64(int64_t disp) {
276 DCHECK_EQ(1, data_.len);
277 Address p = reinterpret_cast<Address>(&data_.buf[data_.len]);
278 WriteUnalignedValue(p, disp);
279 data_.len += sizeof(disp);
280 }
281
282 Data data_;
283 };
284 ASSERT_TRIVIALLY_COPYABLE(Operand);
285 static_assert(sizeof(Operand) <= 2 * kSystemPointerSize,
286 "Operand must be small enough to pass it by value");
287
288 #define ASSEMBLER_INSTRUCTION_LIST(V) \
289 V(add) \
290 V(and) \
291 V(cmp) \
292 V(cmpxchg) \
293 V(dec) \
294 V(idiv) \
295 V(div) \
296 V(imul) \
297 V(inc) \
298 V(lea) \
299 V(mov) \
300 V(movzxb) \
301 V(movzxw) \
302 V(not) \
303 V(or) \
304 V(repmovs) \
305 V(sbb) \
306 V(sub) \
307 V(test) \
308 V(xchg) \
309 V(xor)
310
311 // Shift instructions on operands/registers with kInt32Size and kInt64Size.
312 #define SHIFT_INSTRUCTION_LIST(V) \
313 V(rol, 0x0) \
314 V(ror, 0x1) \
315 V(rcl, 0x2) \
316 V(rcr, 0x3) \
317 V(shl, 0x4) \
318 V(shr, 0x5) \
319 V(sar, 0x7)
320
321 // Partial Constant Pool
322 // Different from complete constant pool (like arm does), partial constant pool
323 // only takes effects for shareable constants in order to reduce code size.
324 // Partial constant pool does not emit constant pool entries at the end of each
325 // code object. Instead, it keeps the first shareable constant inlined in the
326 // instructions and uses rip-relative memory loadings for the same constants in
327 // subsequent instructions. These rip-relative memory loadings will target at
328 // the position of the first inlined constant. For example:
329 //
330 // REX.W movq r10,0x7f9f75a32c20 ; 10 bytes
331 // …
332 // REX.W movq r10,0x7f9f75a32c20 ; 10 bytes
333 // …
334 //
335 // turns into
336 //
337 // REX.W movq r10,0x7f9f75a32c20 ; 10 bytes
338 // …
339 // REX.W movq r10,[rip+0xffffff96] ; 7 bytes
340 // …
341
342 class ConstPool {
343 public:
ConstPool(Assembler * assm)344 explicit ConstPool(Assembler* assm) : assm_(assm) {}
345 // Returns true when partial constant pool is valid for this entry.
346 bool TryRecordEntry(intptr_t data, RelocInfo::Mode mode);
IsEmpty()347 bool IsEmpty() const { return entries_.empty(); }
348
349 void PatchEntries();
350 // Discard any pending pool entries.
351 void Clear();
352
353 private:
354 // Adds a shared entry to entries_. Returns true if this is not the first time
355 // we add this entry, false otherwise.
356 bool AddSharedEntry(uint64_t data, int offset);
357
358 // Check if the instruction is a rip-relative move.
359 bool IsMoveRipRelative(Address instr);
360
361 Assembler* assm_;
362
363 // Values, pc offsets of entries.
364 using EntryMap = std::multimap<uint64_t, int>;
365 EntryMap entries_;
366
367 // Number of bytes taken up by the displacement of rip-relative addressing.
368 static constexpr int kRipRelativeDispSize = 4; // 32-bit displacement.
369 // Distance between the address of the displacement in the rip-relative move
370 // instruction and the head address of the instruction.
371 static constexpr int kMoveRipRelativeDispOffset =
372 3; // REX Opcode ModRM Displacement
373 // Distance between the address of the imm64 in the 'movq reg, imm64'
374 // instruction and the head address of the instruction.
375 static constexpr int kMoveImm64Offset = 2; // REX Opcode imm64
376 // A mask for rip-relative move instruction.
377 static constexpr uint32_t kMoveRipRelativeMask = 0x00C7FFFB;
378 // The bits for a rip-relative move instruction after mask.
379 static constexpr uint32_t kMoveRipRelativeInstr = 0x00058B48;
380 };
381
382 class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
383 private:
384 // We check before assembling an instruction that there is sufficient
385 // space to write an instruction and its relocation information.
386 // The relocation writer's position must be kGap bytes above the end of
387 // the generated instructions. This leaves enough space for the
388 // longest possible x64 instruction, 15 bytes, and the longest possible
389 // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
390 // (There is a 15 byte limit on x64 instruction length that rules out some
391 // otherwise valid instructions.)
392 // This allows for a single, fast space check per instruction.
393 static constexpr int kGap = 32;
394 STATIC_ASSERT(AssemblerBase::kMinimalBufferSize >= 2 * kGap);
395
396 public:
397 // Create an assembler. Instructions and relocation information are emitted
398 // into a buffer, with the instructions starting from the beginning and the
399 // relocation information starting from the end of the buffer. See CodeDesc
400 // for a detailed comment on the layout (globals.h).
401 //
402 // If the provided buffer is nullptr, the assembler allocates and grows its
403 // own buffer. Otherwise it takes ownership of the provided buffer.
404 explicit Assembler(const AssemblerOptions&,
405 std::unique_ptr<AssemblerBuffer> = {});
406 ~Assembler() override = default;
407
408 // GetCode emits any pending (non-emitted) code and fills the descriptor desc.
409 static constexpr int kNoHandlerTable = 0;
410 static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr;
411 void GetCode(Isolate* isolate, CodeDesc* desc,
412 SafepointTableBuilder* safepoint_table_builder,
413 int handler_table_offset);
414
415 // Convenience wrapper for code without safepoint or handler tables.
GetCode(Isolate * isolate,CodeDesc * desc)416 void GetCode(Isolate* isolate, CodeDesc* desc) {
417 GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable);
418 }
419
420 void FinalizeJumpOptimizationInfo();
421
422 // Unused on this architecture.
MaybeEmitOutOfLineConstantPool()423 void MaybeEmitOutOfLineConstantPool() {}
424
425 // Read/Modify the code target in the relative branch/call instruction at pc.
426 // On the x64 architecture, we use relative jumps with a 32-bit displacement
427 // to jump to other Code objects in the Code space in the heap.
428 // Jumps to C functions are done indirectly through a 64-bit register holding
429 // the absolute address of the target.
430 // These functions convert between absolute Addresses of Code objects and
431 // the relative displacements stored in the code.
432 // The isolate argument is unused (and may be nullptr) when skipping flushing.
433 static inline Address target_address_at(Address pc, Address constant_pool);
434 static inline void set_target_address_at(
435 Address pc, Address constant_pool, Address target,
436 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
437
438 // This sets the branch destination (which is in the instruction on x64).
439 // This is for calls and branches within generated code.
440 inline static void deserialization_set_special_target_at(
441 Address instruction_payload, Code code, Address target);
442
443 // Get the size of the special target encoded at 'instruction_payload'.
444 inline static int deserialization_special_target_size(
445 Address instruction_payload);
446
447 // This sets the internal reference at the pc.
448 inline static void deserialization_set_target_internal_reference_at(
449 Address pc, Address target,
450 RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
451
452 inline Handle<Code> code_target_object_handle_at(Address pc);
453 inline Handle<HeapObject> compressed_embedded_object_handle_at(Address pc);
454 inline Address runtime_entry_at(Address pc);
455
456 // Number of bytes taken up by the branch target in the code.
457 static constexpr int kSpecialTargetSize = 4; // 32-bit displacement.
458
459 // One byte opcode for test eax,0xXXXXXXXX.
460 static constexpr byte kTestEaxByte = 0xA9;
461 // One byte opcode for test al, 0xXX.
462 static constexpr byte kTestAlByte = 0xA8;
463 // One byte opcode for nop.
464 static constexpr byte kNopByte = 0x90;
465
466 // One byte prefix for a short conditional jump.
467 static constexpr byte kJccShortPrefix = 0x70;
468 static constexpr byte kJncShortOpcode = kJccShortPrefix | not_carry;
469 static constexpr byte kJcShortOpcode = kJccShortPrefix | carry;
470 static constexpr byte kJnzShortOpcode = kJccShortPrefix | not_zero;
471 static constexpr byte kJzShortOpcode = kJccShortPrefix | zero;
472
473 // VEX prefix encodings.
474 enum SIMDPrefix { kNone = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
475 enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
476 enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
477 enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
478
479 // ---------------------------------------------------------------------------
480 // Code generation
481 //
482 // Function names correspond one-to-one to x64 instruction mnemonics.
483 // Unless specified otherwise, instructions operate on 64-bit operands.
484 //
485 // If we need versions of an assembly instruction that operate on different
486 // width arguments, we add a single-letter suffix specifying the width.
487 // This is done for the following instructions: mov, cmp, inc, dec,
488 // add, sub, and test.
489 // There are no versions of these instructions without the suffix.
490 // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'.
491 // - Instructions on 16-bit (word) operands/registers have a trailing 'w'.
492 // - Instructions on 32-bit (doubleword) operands/registers use 'l'.
493 // - Instructions on 64-bit (quadword) operands/registers use 'q'.
494 // - Instructions on operands/registers with pointer size use 'p'.
495
496 #define DECLARE_INSTRUCTION(instruction) \
497 template <class P1> \
498 void instruction##_tagged(P1 p1) { \
499 emit_##instruction(p1, kTaggedSize); \
500 } \
501 \
502 template <class P1> \
503 void instruction##l(P1 p1) { \
504 emit_##instruction(p1, kInt32Size); \
505 } \
506 \
507 template <class P1> \
508 void instruction##q(P1 p1) { \
509 emit_##instruction(p1, kInt64Size); \
510 } \
511 \
512 template <class P1, class P2> \
513 void instruction##_tagged(P1 p1, P2 p2) { \
514 emit_##instruction(p1, p2, kTaggedSize); \
515 } \
516 \
517 template <class P1, class P2> \
518 void instruction##l(P1 p1, P2 p2) { \
519 emit_##instruction(p1, p2, kInt32Size); \
520 } \
521 \
522 template <class P1, class P2> \
523 void instruction##q(P1 p1, P2 p2) { \
524 emit_##instruction(p1, p2, kInt64Size); \
525 } \
526 \
527 template <class P1, class P2, class P3> \
528 void instruction##l(P1 p1, P2 p2, P3 p3) { \
529 emit_##instruction(p1, p2, p3, kInt32Size); \
530 } \
531 \
532 template <class P1, class P2, class P3> \
533 void instruction##q(P1 p1, P2 p2, P3 p3) { \
534 emit_##instruction(p1, p2, p3, kInt64Size); \
535 }
536 ASSEMBLER_INSTRUCTION_LIST(DECLARE_INSTRUCTION)
537 #undef DECLARE_INSTRUCTION
538
539 // Insert the smallest number of nop instructions
540 // possible to align the pc offset to a multiple
541 // of m, where m must be a power of 2.
542 void Align(int m);
543 // Insert the smallest number of zero bytes possible to align the pc offset
544 // to a mulitple of m. m must be a power of 2 (>= 2).
545 void DataAlign(int m);
546 void Nop(int bytes = 1);
547 // Aligns code to something that's optimal for a jump target for the platform.
548 void CodeTargetAlign();
549
550 // Stack
551 void pushfq();
552 void popfq();
553
554 void pushq(Immediate value);
555 // Push a 32 bit integer, and guarantee that it is actually pushed as a
556 // 32 bit value, the normal push will optimize the 8 bit case.
557 void pushq_imm32(int32_t imm32);
558 void pushq(Register src);
559 void pushq(Operand src);
560
561 void popq(Register dst);
562 void popq(Operand dst);
563
564 void leave();
565
566 // Moves
567 void movb(Register dst, Operand src);
568 void movb(Register dst, Immediate imm);
569 void movb(Operand dst, Register src);
570 void movb(Operand dst, Immediate imm);
571
572 // Move the low 16 bits of a 64-bit register value to a 16-bit
573 // memory location.
574 void movw(Register dst, Operand src);
575 void movw(Operand dst, Register src);
576 void movw(Operand dst, Immediate imm);
577
578 // Move the offset of the label location relative to the current
579 // position (after the move) to the destination.
580 void movl(Operand dst, Label* src);
581
582 // Load a heap number into a register.
583 // The heap number will not be allocated and embedded into the code right
584 // away. Instead, we emit the load of a dummy object. Later, when calling
585 // Assembler::GetCode, the heap number will be allocated and the code will be
586 // patched by replacing the dummy with the actual object. The RelocInfo for
587 // the embedded object gets already recorded correctly when emitting the dummy
588 // move.
589 void movq_heap_number(Register dst, double value);
590
591 void movq_string(Register dst, const StringConstantBase* str);
592
593 // Loads a 64-bit immediate into a register, potentially using the constant
594 // pool.
movq(Register dst,int64_t value)595 void movq(Register dst, int64_t value) { movq(dst, Immediate64(value)); }
movq(Register dst,uint64_t value)596 void movq(Register dst, uint64_t value) {
597 movq(dst, Immediate64(static_cast<int64_t>(value)));
598 }
599
600 // Loads a 64-bit immediate into a register without using the constant pool.
601 void movq_imm64(Register dst, int64_t value);
602
603 void movsxbl(Register dst, Register src);
604 void movsxbl(Register dst, Operand src);
605 void movsxbq(Register dst, Register src);
606 void movsxbq(Register dst, Operand src);
607 void movsxwl(Register dst, Register src);
608 void movsxwl(Register dst, Operand src);
609 void movsxwq(Register dst, Register src);
610 void movsxwq(Register dst, Operand src);
611 void movsxlq(Register dst, Register src);
612 void movsxlq(Register dst, Operand src);
613
614 // Repeated moves.
615 void repmovsb();
616 void repmovsw();
repmovsl()617 void repmovsl() { emit_repmovs(kInt32Size); }
repmovsq()618 void repmovsq() { emit_repmovs(kInt64Size); }
619
620 // Repeated store of doublewords (fill (E)CX bytes at ES:[(E)DI] with EAX).
621 void repstosl();
622 // Repeated store of quadwords (fill RCX quadwords at [RDI] with RAX).
623 void repstosq();
624
625 // Instruction to load from an immediate 64-bit pointer into RAX.
626 void load_rax(Address value, RelocInfo::Mode rmode);
627 void load_rax(ExternalReference ext);
628
629 // Conditional moves.
630 void cmovq(Condition cc, Register dst, Register src);
631 void cmovq(Condition cc, Register dst, Operand src);
632 void cmovl(Condition cc, Register dst, Register src);
633 void cmovl(Condition cc, Register dst, Operand src);
634
cmpb(Register dst,Immediate src)635 void cmpb(Register dst, Immediate src) {
636 immediate_arithmetic_op_8(0x7, dst, src);
637 }
638
639 void cmpb_al(Immediate src);
640
cmpb(Register dst,Register src)641 void cmpb(Register dst, Register src) { arithmetic_op_8(0x3A, dst, src); }
642
cmpb(Register dst,Operand src)643 void cmpb(Register dst, Operand src) { arithmetic_op_8(0x3A, dst, src); }
644
cmpb(Operand dst,Register src)645 void cmpb(Operand dst, Register src) { arithmetic_op_8(0x38, src, dst); }
646
cmpb(Operand dst,Immediate src)647 void cmpb(Operand dst, Immediate src) {
648 immediate_arithmetic_op_8(0x7, dst, src);
649 }
650
cmpw(Operand dst,Immediate src)651 void cmpw(Operand dst, Immediate src) {
652 immediate_arithmetic_op_16(0x7, dst, src);
653 }
654
cmpw(Register dst,Immediate src)655 void cmpw(Register dst, Immediate src) {
656 immediate_arithmetic_op_16(0x7, dst, src);
657 }
658
cmpw(Register dst,Operand src)659 void cmpw(Register dst, Operand src) { arithmetic_op_16(0x3B, dst, src); }
660
cmpw(Register dst,Register src)661 void cmpw(Register dst, Register src) { arithmetic_op_16(0x3B, dst, src); }
662
cmpw(Operand dst,Register src)663 void cmpw(Operand dst, Register src) { arithmetic_op_16(0x39, src, dst); }
664
testb(Register reg,Operand op)665 void testb(Register reg, Operand op) { testb(op, reg); }
666
testw(Register reg,Operand op)667 void testw(Register reg, Operand op) { testw(op, reg); }
668
andb(Register dst,Immediate src)669 void andb(Register dst, Immediate src) {
670 immediate_arithmetic_op_8(0x4, dst, src);
671 }
672
673 void decb(Register dst);
674 void decb(Operand dst);
675
676 // Lock prefix.
677 void lock();
678
679 void xchgb(Register reg, Operand op);
680 void xchgw(Register reg, Operand op);
681
682 void xaddb(Operand dst, Register src);
683 void xaddw(Operand dst, Register src);
684 void xaddl(Operand dst, Register src);
685 void xaddq(Operand dst, Register src);
686
687 void negb(Register reg);
688 void negw(Register reg);
689 void negl(Register reg);
690 void negq(Register reg);
691 void negb(Operand op);
692 void negw(Operand op);
693 void negl(Operand op);
694 void negq(Operand op);
695
696 void cmpxchgb(Operand dst, Register src);
697 void cmpxchgw(Operand dst, Register src);
698
699 // Sign-extends rax into rdx:rax.
700 void cqo();
701 // Sign-extends eax into edx:eax.
702 void cdq();
703
704 // Multiply eax by src, put the result in edx:eax.
705 void mull(Register src);
706 void mull(Operand src);
707 // Multiply rax by src, put the result in rdx:rax.
708 void mulq(Register src);
709
710 #define DECLARE_SHIFT_INSTRUCTION(instruction, subcode) \
711 void instruction##l(Register dst, Immediate imm8) { \
712 shift(dst, imm8, subcode, kInt32Size); \
713 } \
714 \
715 void instruction##q(Register dst, Immediate imm8) { \
716 shift(dst, imm8, subcode, kInt64Size); \
717 } \
718 \
719 void instruction##l(Operand dst, Immediate imm8) { \
720 shift(dst, imm8, subcode, kInt32Size); \
721 } \
722 \
723 void instruction##q(Operand dst, Immediate imm8) { \
724 shift(dst, imm8, subcode, kInt64Size); \
725 } \
726 \
727 void instruction##l_cl(Register dst) { shift(dst, subcode, kInt32Size); } \
728 \
729 void instruction##q_cl(Register dst) { shift(dst, subcode, kInt64Size); } \
730 \
731 void instruction##l_cl(Operand dst) { shift(dst, subcode, kInt32Size); } \
732 \
733 void instruction##q_cl(Operand dst) { shift(dst, subcode, kInt64Size); }
734 SHIFT_INSTRUCTION_LIST(DECLARE_SHIFT_INSTRUCTION)
735 #undef DECLARE_SHIFT_INSTRUCTION
736
737 // Shifts dst:src left by cl bits, affecting only dst.
738 void shld(Register dst, Register src);
739
740 // Shifts src:dst right by cl bits, affecting only dst.
741 void shrd(Register dst, Register src);
742
743 void store_rax(Address dst, RelocInfo::Mode mode);
744 void store_rax(ExternalReference ref);
745
subb(Register dst,Immediate src)746 void subb(Register dst, Immediate src) {
747 immediate_arithmetic_op_8(0x5, dst, src);
748 }
749
750 void sub_sp_32(uint32_t imm);
751
752 void testb(Register dst, Register src);
753 void testb(Register reg, Immediate mask);
754 void testb(Operand op, Immediate mask);
755 void testb(Operand op, Register reg);
756
757 void testw(Register dst, Register src);
758 void testw(Register reg, Immediate mask);
759 void testw(Operand op, Immediate mask);
760 void testw(Operand op, Register reg);
761
762 // Bit operations.
763 void bswapl(Register dst);
764 void bswapq(Register dst);
765 void btq(Operand dst, Register src);
766 void btsq(Operand dst, Register src);
767 void btsq(Register dst, Immediate imm8);
768 void btrq(Register dst, Immediate imm8);
769 void bsrq(Register dst, Register src);
770 void bsrq(Register dst, Operand src);
771 void bsrl(Register dst, Register src);
772 void bsrl(Register dst, Operand src);
773 void bsfq(Register dst, Register src);
774 void bsfq(Register dst, Operand src);
775 void bsfl(Register dst, Register src);
776 void bsfl(Register dst, Operand src);
777
778 // Miscellaneous
779 void clc();
780 void cld();
781 void cpuid();
782 void hlt();
783 void int3();
784 void nop();
785 void ret(int imm16);
786 void ud2();
787 void setcc(Condition cc, Register reg);
788
789 void pblendw(XMMRegister dst, Operand src, uint8_t mask);
790 void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask);
791 void palignr(XMMRegister dst, Operand src, uint8_t mask);
792 void palignr(XMMRegister dst, XMMRegister src, uint8_t mask);
793
794 // Label operations & relative jumps (PPUM Appendix D)
795 //
796 // Takes a branch opcode (cc) and a label (L) and generates
797 // either a backward branch or a forward branch and links it
798 // to the label fixup chain. Usage:
799 //
800 // Label L; // unbound label
801 // j(cc, &L); // forward branch to unbound label
802 // bind(&L); // bind label to the current pc
803 // j(cc, &L); // backward branch to bound label
804 // bind(&L); // illegal: a label may be bound only once
805 //
806 // Note: The same Label can be used for forward and backward branches
807 // but it may be bound only once.
808
809 void bind(Label* L); // binds an unbound label L to the current code position
810
811 // Calls
812 // Call near relative 32-bit displacement, relative to next instruction.
813 void call(Label* L);
814 void call(Address entry, RelocInfo::Mode rmode);
815
816 // Explicitly emit a near call / near jump. The displacement is relative to
817 // the next instructions (which starts at {pc_offset() + kNearJmpInstrSize}).
818 static constexpr int kNearJmpInstrSize = 5;
819 void near_call(intptr_t disp, RelocInfo::Mode rmode);
820 void near_jmp(intptr_t disp, RelocInfo::Mode rmode);
821
822 void call(Handle<Code> target,
823 RelocInfo::Mode rmode = RelocInfo::CODE_TARGET);
824
825 // Call near absolute indirect, address in register
826 void call(Register adr);
827
828 // Jumps
829 // Jump short or near relative.
830 // Use a 32-bit signed displacement.
831 // Unconditional jump to L
832 void jmp(Label* L, Label::Distance distance = Label::kFar);
833 void jmp(Handle<Code> target, RelocInfo::Mode rmode);
834
835 // Jump near absolute indirect (r64)
836 void jmp(Register adr);
837 void jmp(Operand src);
838
839 // Unconditional jump relative to the current address. Low-level routine,
840 // use with caution!
841 void jmp_rel(int offset);
842
843 // Conditional jumps
844 void j(Condition cc, Label* L, Label::Distance distance = Label::kFar);
845 void j(Condition cc, Address entry, RelocInfo::Mode rmode);
846 void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode);
847
848 // Floating-point operations
849 void fld(int i);
850
851 void fld1();
852 void fldz();
853 void fldpi();
854 void fldln2();
855
856 void fld_s(Operand adr);
857 void fld_d(Operand adr);
858
859 void fstp_s(Operand adr);
860 void fstp_d(Operand adr);
861 void fstp(int index);
862
863 void fild_s(Operand adr);
864 void fild_d(Operand adr);
865
866 void fist_s(Operand adr);
867
868 void fistp_s(Operand adr);
869 void fistp_d(Operand adr);
870
871 void fisttp_s(Operand adr);
872 void fisttp_d(Operand adr);
873
874 void fabs();
875 void fchs();
876
877 void fadd(int i);
878 void fsub(int i);
879 void fmul(int i);
880 void fdiv(int i);
881
882 void fisub_s(Operand adr);
883
884 void faddp(int i = 1);
885 void fsubp(int i = 1);
886 void fsubrp(int i = 1);
887 void fmulp(int i = 1);
888 void fdivp(int i = 1);
889 void fprem();
890 void fprem1();
891
892 void fxch(int i = 1);
893 void fincstp();
894 void ffree(int i = 0);
895
896 void ftst();
897 void fucomp(int i);
898 void fucompp();
899 void fucomi(int i);
900 void fucomip();
901
902 void fcompp();
903 void fnstsw_ax();
904 void fwait();
905 void fnclex();
906
907 void fsin();
908 void fcos();
909 void fptan();
910 void fyl2x();
911 void f2xm1();
912 void fscale();
913 void fninit();
914
915 void frndint();
916
917 void sahf();
918
919 void ucomiss(XMMRegister dst, XMMRegister src);
920 void ucomiss(XMMRegister dst, Operand src);
921 void movaps(XMMRegister dst, XMMRegister src);
922
923 // Don't use this unless it's important to keep the
924 // top half of the destination register unchanged.
925 // Use movaps when moving float values and movd for integer
926 // values in xmm registers.
927 void movss(XMMRegister dst, XMMRegister src);
928
929 void movss(XMMRegister dst, Operand src);
930 void movss(Operand dst, XMMRegister src);
931
932 void movlps(XMMRegister dst, Operand src);
933 void movlps(Operand dst, XMMRegister src);
934
935 void movhps(XMMRegister dst, Operand src);
936 void movhps(Operand dst, XMMRegister src);
937
938 void shufps(XMMRegister dst, XMMRegister src, byte imm8);
939
940 void cvttss2si(Register dst, Operand src);
941 void cvttss2si(Register dst, XMMRegister src);
942 void cvtlsi2ss(XMMRegister dst, Operand src);
943 void cvtlsi2ss(XMMRegister dst, Register src);
944
945 void movmskps(Register dst, XMMRegister src);
946
947 void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
948 SIMDPrefix pp, LeadingOpcode m, VexW w);
949 void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
950 SIMDPrefix pp, LeadingOpcode m, VexW w);
951
952 // SSE instructions
953 void sse_instr(XMMRegister dst, XMMRegister src, byte escape, byte opcode);
954 void sse_instr(XMMRegister dst, Operand src, byte escape, byte opcode);
955 #define DECLARE_SSE_INSTRUCTION(instruction, escape, opcode) \
956 void instruction(XMMRegister dst, XMMRegister src) { \
957 sse_instr(dst, src, 0x##escape, 0x##opcode); \
958 } \
959 void instruction(XMMRegister dst, Operand src) { \
960 sse_instr(dst, src, 0x##escape, 0x##opcode); \
961 }
962
963 SSE_UNOP_INSTRUCTION_LIST(DECLARE_SSE_INSTRUCTION)
964 SSE_BINOP_INSTRUCTION_LIST(DECLARE_SSE_INSTRUCTION)
965 #undef DECLARE_SSE_INSTRUCTION
966
967 // SSE instructions with prefix and SSE2 instructions
968 void sse2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape,
969 byte opcode);
970 void sse2_instr(XMMRegister dst, Operand src, byte prefix, byte escape,
971 byte opcode);
972 #define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
973 void instruction(XMMRegister dst, XMMRegister src) { \
974 sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
975 } \
976 void instruction(XMMRegister dst, Operand src) { \
977 sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
978 }
979
980 // These SSE instructions have the same encoding as the SSE2 instructions.
981 SSE_INSTRUCTION_LIST_SS(DECLARE_SSE2_INSTRUCTION)
SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)982 SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
983 SSE2_INSTRUCTION_LIST_SD(DECLARE_SSE2_INSTRUCTION)
984 SSE2_UNOP_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
985 #undef DECLARE_SSE2_INSTRUCTION
986
987 void sse2_instr(XMMRegister reg, byte imm8, byte prefix, byte escape,
988 byte opcode, int extension) {
989 XMMRegister ext_reg = XMMRegister::from_code(extension);
990 sse2_instr(ext_reg, reg, prefix, escape, opcode);
991 emit(imm8);
992 }
993
994 #define DECLARE_SSE2_SHIFT_IMM(instruction, prefix, escape, opcode, extension) \
995 void instruction(XMMRegister reg, byte imm8) { \
996 sse2_instr(reg, imm8, 0x##prefix, 0x##escape, 0x##opcode, 0x##extension); \
997 }
998 SSE2_INSTRUCTION_LIST_SHIFT_IMM(DECLARE_SSE2_SHIFT_IMM)
999 #undef DECLARE_SSE2_SHIFT_IMM
1000
1001 #define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1002 void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1003 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
1004 } \
1005 void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) { \
1006 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
1007 }
1008
1009 SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
1010 #undef DECLARE_SSE2_AVX_INSTRUCTION
1011
1012 #define DECLARE_SSE2_UNOP_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1013 void v##instruction(XMMRegister dst, XMMRegister src) { \
1014 vpd(0x##opcode, dst, xmm0, src); \
1015 } \
1016 void v##instruction(XMMRegister dst, Operand src) { \
1017 vpd(0x##opcode, dst, xmm0, src); \
1018 }
1019
1020 SSE2_UNOP_INSTRUCTION_LIST(DECLARE_SSE2_UNOP_AVX_INSTRUCTION)
1021 #undef DECLARE_SSE2_UNOP_AVX_INSTRUCTION
1022
1023 // SSE3
1024 void lddqu(XMMRegister dst, Operand src);
1025 void movddup(XMMRegister dst, Operand src);
1026 void movddup(XMMRegister dst, XMMRegister src);
1027
1028 // SSSE3
1029 void ssse3_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1030 byte escape2, byte opcode);
1031 void ssse3_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1032 byte escape2, byte opcode);
1033
1034 #define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \
1035 opcode) \
1036 void instruction(XMMRegister dst, XMMRegister src) { \
1037 ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1038 } \
1039 void instruction(XMMRegister dst, Operand src) { \
1040 ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1041 }
1042
1043 SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1044 SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1045 #undef DECLARE_SSSE3_INSTRUCTION
1046
1047 // SSE4
1048 void sse4_instr(Register dst, XMMRegister src, byte prefix, byte escape1,
1049 byte escape2, byte opcode, int8_t imm8);
1050 void sse4_instr(Operand dst, XMMRegister src, byte prefix, byte escape1,
1051 byte escape2, byte opcode, int8_t imm8);
1052 void sse4_instr(XMMRegister dst, Register src, byte prefix, byte escape1,
1053 byte escape2, byte opcode, int8_t imm8);
1054 void sse4_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1055 byte escape2, byte opcode);
1056 void sse4_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1057 byte escape2, byte opcode);
1058 #define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \
1059 opcode) \
1060 void instruction(XMMRegister dst, XMMRegister src) { \
1061 sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1062 } \
1063 void instruction(XMMRegister dst, Operand src) { \
1064 sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1065 }
1066
1067 SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1068 SSE4_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1069 DECLARE_SSE4_INSTRUCTION(pblendvb, 66, 0F, 38, 10)
1070 DECLARE_SSE4_INSTRUCTION(blendvps, 66, 0F, 38, 14)
1071 DECLARE_SSE4_INSTRUCTION(blendvpd, 66, 0F, 38, 15)
1072 #undef DECLARE_SSE4_INSTRUCTION
1073
1074 #define DECLARE_SSE4_EXTRACT_INSTRUCTION(instruction, prefix, escape1, \
1075 escape2, opcode) \
1076 void instruction(Register dst, XMMRegister src, uint8_t imm8) { \
1077 sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode, \
1078 imm8); \
1079 } \
1080 void instruction(Operand dst, XMMRegister src, uint8_t imm8) { \
1081 sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode, \
1082 imm8); \
1083 }
1084
1085 SSE4_EXTRACT_INSTRUCTION_LIST(DECLARE_SSE4_EXTRACT_INSTRUCTION)
1086 #undef DECLARE_SSE4_EXTRACT_INSTRUCTION
1087
1088 // SSE4.2
1089 void sse4_2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1090 byte escape2, byte opcode);
1091 void sse4_2_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1092 byte escape2, byte opcode);
1093 #define DECLARE_SSE4_2_INSTRUCTION(instruction, prefix, escape1, escape2, \
1094 opcode) \
1095 void instruction(XMMRegister dst, XMMRegister src) { \
1096 sse4_2_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1097 } \
1098 void instruction(XMMRegister dst, Operand src) { \
1099 sse4_2_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1100 }
1101
1102 SSE4_2_INSTRUCTION_LIST(DECLARE_SSE4_2_INSTRUCTION)
1103 #undef DECLARE_SSE4_2_INSTRUCTION
1104
1105 #define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
1106 opcode) \
1107 void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1108 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1109 } \
1110 void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) { \
1111 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1112 }
1113
SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)1114 SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1115 SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1116 SSE4_2_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1117 #undef DECLARE_SSE34_AVX_INSTRUCTION
1118
1119 #define DECLARE_SSSE3_UNOP_AVX_INSTRUCTION(instruction, prefix, escape1, \
1120 escape2, opcode) \
1121 void v##instruction(XMMRegister dst, XMMRegister src) { \
1122 vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
1123 } \
1124 void v##instruction(XMMRegister dst, Operand src) { \
1125 vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
1126 }
1127
1128 SSSE3_UNOP_INSTRUCTION_LIST(DECLARE_SSSE3_UNOP_AVX_INSTRUCTION)
1129 #undef DECLARE_SSSE3_UNOP_AVX_INSTRUCTION
1130
1131 void vpblendvb(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1132 XMMRegister mask) {
1133 vinstr(0x4C, dst, src1, src2, k66, k0F3A, kW0);
1134 // The mask operand is encoded in bits[7:4] of the immediate byte.
1135 emit(mask.code() << 4);
1136 }
1137
vblendvps(XMMRegister dst,XMMRegister src1,XMMRegister src2,XMMRegister mask)1138 void vblendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1139 XMMRegister mask) {
1140 vinstr(0x4A, dst, src1, src2, k66, k0F3A, kW0);
1141 // The mask operand is encoded in bits[7:4] of the immediate byte.
1142 emit(mask.code() << 4);
1143 }
1144
vblendvpd(XMMRegister dst,XMMRegister src1,XMMRegister src2,XMMRegister mask)1145 void vblendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1146 XMMRegister mask) {
1147 vinstr(0x4B, dst, src1, src2, k66, k0F3A, kW0);
1148 // The mask operand is encoded in bits[7:4] of the immediate byte.
1149 emit(mask.code() << 4);
1150 }
1151
1152 #define DECLARE_SSE4_PMOV_AVX_INSTRUCTION(instruction, prefix, escape1, \
1153 escape2, opcode) \
1154 void v##instruction(XMMRegister dst, XMMRegister src) { \
1155 vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
1156 } \
1157 void v##instruction(XMMRegister dst, Operand src) { \
1158 vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
1159 }
1160 SSE4_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_PMOV_AVX_INSTRUCTION)
1161 #undef DECLARE_SSE4_PMOV_AVX_INSTRUCTION
1162
1163 #define DECLARE_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, opcode) \
1164 void v##instruction(Register dst, XMMRegister src, uint8_t imm8) { \
1165 XMMRegister idst = XMMRegister::from_code(dst.code()); \
1166 vinstr(0x##opcode, src, xmm0, idst, k##prefix, k##escape1##escape2, kW0); \
1167 emit(imm8); \
1168 } \
1169 void v##instruction(Operand dst, XMMRegister src, uint8_t imm8) { \
1170 vinstr(0x##opcode, src, xmm0, dst, k##prefix, k##escape1##escape2, kW0); \
1171 emit(imm8); \
1172 }
1173
1174 SSE4_EXTRACT_INSTRUCTION_LIST(DECLARE_AVX_INSTRUCTION)
1175 #undef DECLARE_AVX_INSTRUCTION
1176
1177 void movd(XMMRegister dst, Register src);
1178 void movd(XMMRegister dst, Operand src);
1179 void movd(Register dst, XMMRegister src);
1180 void movq(XMMRegister dst, Register src);
1181 void movq(XMMRegister dst, Operand src);
1182 void movq(Register dst, XMMRegister src);
1183 void movq(XMMRegister dst, XMMRegister src);
1184
1185 // Don't use this unless it's important to keep the
1186 // top half of the destination register unchanged.
1187 // Use movapd when moving double values and movq for integer
1188 // values in xmm registers.
1189 void movsd(XMMRegister dst, XMMRegister src);
1190
1191 void movsd(Operand dst, XMMRegister src);
1192 void movsd(XMMRegister dst, Operand src);
1193
1194 void movdqa(Operand dst, XMMRegister src);
1195 void movdqa(XMMRegister dst, Operand src);
1196
1197 void movdqu(Operand dst, XMMRegister src);
1198 void movdqu(XMMRegister dst, Operand src);
1199 void movdqu(XMMRegister dst, XMMRegister src);
1200
1201 void movapd(XMMRegister dst, XMMRegister src);
1202 void movupd(XMMRegister dst, Operand src);
1203 void movupd(Operand dst, XMMRegister src);
1204
1205 void cvttsd2si(Register dst, Operand src);
1206 void cvttsd2si(Register dst, XMMRegister src);
1207 void cvttss2siq(Register dst, XMMRegister src);
1208 void cvttss2siq(Register dst, Operand src);
1209 void cvttsd2siq(Register dst, XMMRegister src);
1210 void cvttsd2siq(Register dst, Operand src);
1211 void cvttps2dq(XMMRegister dst, Operand src);
1212 void cvttps2dq(XMMRegister dst, XMMRegister src);
1213
1214 void cvtlsi2sd(XMMRegister dst, Operand src);
1215 void cvtlsi2sd(XMMRegister dst, Register src);
1216
1217 void cvtqsi2ss(XMMRegister dst, Operand src);
1218 void cvtqsi2ss(XMMRegister dst, Register src);
1219
1220 void cvtqsi2sd(XMMRegister dst, Operand src);
1221 void cvtqsi2sd(XMMRegister dst, Register src);
1222
1223 void cvtss2sd(XMMRegister dst, XMMRegister src);
1224 void cvtss2sd(XMMRegister dst, Operand src);
1225
1226 void cvtsd2si(Register dst, XMMRegister src);
1227 void cvtsd2siq(Register dst, XMMRegister src);
1228
1229 void haddps(XMMRegister dst, XMMRegister src);
1230 void haddps(XMMRegister dst, Operand src);
1231
1232 void ucomisd(XMMRegister dst, XMMRegister src);
1233 void ucomisd(XMMRegister dst, Operand src);
1234 void cmpltsd(XMMRegister dst, XMMRegister src);
1235
1236 void movmskpd(Register dst, XMMRegister src);
1237
1238 void pmovmskb(Register dst, XMMRegister src);
1239
1240 // SSE 4.1 instruction
1241 void insertps(XMMRegister dst, XMMRegister src, byte imm8);
1242 void insertps(XMMRegister dst, Operand src, byte imm8);
1243 void pextrq(Register dst, XMMRegister src, int8_t imm8);
1244 void pinsrb(XMMRegister dst, Register src, uint8_t imm8);
1245 void pinsrb(XMMRegister dst, Operand src, uint8_t imm8);
1246 void pinsrw(XMMRegister dst, Register src, uint8_t imm8);
1247 void pinsrw(XMMRegister dst, Operand src, uint8_t imm8);
1248 void pinsrd(XMMRegister dst, Register src, uint8_t imm8);
1249 void pinsrd(XMMRegister dst, Operand src, uint8_t imm8);
1250 void pinsrq(XMMRegister dst, Register src, uint8_t imm8);
1251 void pinsrq(XMMRegister dst, Operand src, uint8_t imm8);
1252
1253 void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
1254 void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1255 void roundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
1256 void roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1257
1258 void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp);
1259 void cmpps(XMMRegister dst, Operand src, int8_t cmp);
1260 void cmppd(XMMRegister dst, XMMRegister src, int8_t cmp);
1261 void cmppd(XMMRegister dst, Operand src, int8_t cmp);
1262
1263 #define SSE_CMP_P(instr, imm8) \
1264 void instr##ps(XMMRegister dst, XMMRegister src) { cmpps(dst, src, imm8); } \
1265 void instr##ps(XMMRegister dst, Operand src) { cmpps(dst, src, imm8); } \
1266 void instr##pd(XMMRegister dst, XMMRegister src) { cmppd(dst, src, imm8); } \
1267 void instr##pd(XMMRegister dst, Operand src) { cmppd(dst, src, imm8); }
1268
1269 SSE_CMP_P(cmpeq, 0x0)
1270 SSE_CMP_P(cmplt, 0x1)
1271 SSE_CMP_P(cmple, 0x2)
1272 SSE_CMP_P(cmpneq, 0x4)
1273 SSE_CMP_P(cmpnlt, 0x5)
1274 SSE_CMP_P(cmpnle, 0x6)
1275
1276 #undef SSE_CMP_P
1277
1278 void movups(XMMRegister dst, XMMRegister src);
1279 void movups(XMMRegister dst, Operand src);
1280 void movups(Operand dst, XMMRegister src);
1281 void psrldq(XMMRegister dst, uint8_t shift);
1282 void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1283 void pshufd(XMMRegister dst, Operand src, uint8_t shuffle);
1284 void pshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1285 void pshufhw(XMMRegister dst, Operand src, uint8_t shuffle);
1286 void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1287 void pshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
1288
movlhps(XMMRegister dst,XMMRegister src)1289 void movlhps(XMMRegister dst, XMMRegister src) {
1290 sse_instr(dst, src, 0x0F, 0x16);
1291 }
1292
1293 // AVX instruction
1294 void vmovddup(XMMRegister dst, XMMRegister src);
1295 void vmovddup(XMMRegister dst, Operand src);
1296 void vbroadcastss(XMMRegister dst, Operand src);
1297
1298 void fma_instr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1299 VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w);
1300 void fma_instr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
1301 VectorLength l, SIMDPrefix pp, LeadingOpcode m, VexW w);
1302
1303 #define FMA(instr, length, prefix, escape1, escape2, extension, opcode) \
1304 void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1305 fma_instr(0x##opcode, dst, src1, src2, k##length, k##prefix, \
1306 k##escape1##escape2, k##extension); \
1307 } \
1308 void instr(XMMRegister dst, XMMRegister src1, Operand src2) { \
1309 fma_instr(0x##opcode, dst, src1, src2, k##length, k##prefix, \
1310 k##escape1##escape2, k##extension); \
1311 }
1312 FMA_INSTRUCTION_LIST(FMA)
1313 #undef FMA
1314
1315 void vmovd(XMMRegister dst, Register src);
1316 void vmovd(XMMRegister dst, Operand src);
1317 void vmovd(Register dst, XMMRegister src);
1318 void vmovq(XMMRegister dst, Register src);
1319 void vmovq(XMMRegister dst, Operand src);
1320 void vmovq(Register dst, XMMRegister src);
1321
vmovsd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1322 void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1323 vsd(0x10, dst, src1, src2);
1324 }
vmovsd(XMMRegister dst,Operand src)1325 void vmovsd(XMMRegister dst, Operand src) { vsd(0x10, dst, xmm0, src); }
vmovsd(Operand dst,XMMRegister src)1326 void vmovsd(Operand dst, XMMRegister src) { vsd(0x11, src, xmm0, dst); }
1327 void vmovdqu(XMMRegister dst, Operand src);
1328 void vmovdqu(Operand dst, XMMRegister src);
1329
1330 void vmovlps(XMMRegister dst, XMMRegister src1, Operand src2);
1331 void vmovlps(Operand dst, XMMRegister src);
1332
1333 void vmovhps(XMMRegister dst, XMMRegister src1, Operand src2);
1334 void vmovhps(Operand dst, XMMRegister src);
1335
1336 #define AVX_SSE_UNOP(instr, escape, opcode) \
1337 void v##instr(XMMRegister dst, XMMRegister src2) { \
1338 vps(0x##opcode, dst, xmm0, src2); \
1339 } \
1340 void v##instr(XMMRegister dst, Operand src2) { \
1341 vps(0x##opcode, dst, xmm0, src2); \
1342 }
1343 SSE_UNOP_INSTRUCTION_LIST(AVX_SSE_UNOP)
1344 #undef AVX_SSE_UNOP
1345
1346 #define AVX_SSE_BINOP(instr, escape, opcode) \
1347 void v##instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1348 vps(0x##opcode, dst, src1, src2); \
1349 } \
1350 void v##instr(XMMRegister dst, XMMRegister src1, Operand src2) { \
1351 vps(0x##opcode, dst, src1, src2); \
1352 }
SSE_BINOP_INSTRUCTION_LIST(AVX_SSE_BINOP)1353 SSE_BINOP_INSTRUCTION_LIST(AVX_SSE_BINOP)
1354 #undef AVX_SSE_BINOP
1355
1356 #define AVX_3(instr, opcode, impl) \
1357 void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1358 impl(opcode, dst, src1, src2); \
1359 } \
1360 void instr(XMMRegister dst, XMMRegister src1, Operand src2) { \
1361 impl(opcode, dst, src1, src2); \
1362 }
1363
1364 AVX_3(vhaddps, 0x7c, vsd)
1365
1366 #define AVX_SCALAR(instr, prefix, escape, opcode) \
1367 void v##instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1368 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kWIG); \
1369 } \
1370 void v##instr(XMMRegister dst, XMMRegister src1, Operand src2) { \
1371 vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kWIG); \
1372 }
1373 SSE_INSTRUCTION_LIST_SS(AVX_SCALAR)
1374 SSE2_INSTRUCTION_LIST_SD(AVX_SCALAR)
1375 #undef AVX_SCALAR
1376
1377 #undef AVX_3
1378
1379 #define AVX_SSE2_SHIFT_IMM(instr, prefix, escape, opcode, extension) \
1380 void v##instr(XMMRegister dst, XMMRegister src, byte imm8) { \
1381 XMMRegister ext_reg = XMMRegister::from_code(extension); \
1382 vinstr(0x##opcode, ext_reg, dst, src, k##prefix, k##escape, kWIG); \
1383 emit(imm8); \
1384 }
1385 SSE2_INSTRUCTION_LIST_SHIFT_IMM(AVX_SSE2_SHIFT_IMM)
1386 #undef AVX_SSE2_SHIFT_IMM
1387
1388 void vmovlhps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1389 vinstr(0x16, dst, src1, src2, kNone, k0F, kWIG);
1390 }
vcvtss2sd(XMMRegister dst,XMMRegister src1,XMMRegister src2)1391 void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1392 vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1393 }
vcvtss2sd(XMMRegister dst,XMMRegister src1,Operand src2)1394 void vcvtss2sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1395 vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1396 }
vcvttps2dq(XMMRegister dst,XMMRegister src)1397 void vcvttps2dq(XMMRegister dst, XMMRegister src) {
1398 vinstr(0x5b, dst, xmm0, src, kF3, k0F, kWIG);
1399 }
vcvtlsi2sd(XMMRegister dst,XMMRegister src1,Register src2)1400 void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1401 XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1402 vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW0);
1403 }
vcvtlsi2sd(XMMRegister dst,XMMRegister src1,Operand src2)1404 void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1405 vinstr(0x2a, dst, src1, src2, kF2, k0F, kW0);
1406 }
vcvtlsi2ss(XMMRegister dst,XMMRegister src1,Register src2)1407 void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1408 XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1409 vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW0);
1410 }
vcvtlsi2ss(XMMRegister dst,XMMRegister src1,Operand src2)1411 void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1412 vinstr(0x2a, dst, src1, src2, kF3, k0F, kW0);
1413 }
vcvtqsi2ss(XMMRegister dst,XMMRegister src1,Register src2)1414 void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1415 XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1416 vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW1);
1417 }
vcvtqsi2ss(XMMRegister dst,XMMRegister src1,Operand src2)1418 void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1419 vinstr(0x2a, dst, src1, src2, kF3, k0F, kW1);
1420 }
vcvtqsi2sd(XMMRegister dst,XMMRegister src1,Register src2)1421 void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1422 XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1423 vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW1);
1424 }
vcvtqsi2sd(XMMRegister dst,XMMRegister src1,Operand src2)1425 void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1426 vinstr(0x2a, dst, src1, src2, kF2, k0F, kW1);
1427 }
vcvttss2si(Register dst,XMMRegister src)1428 void vcvttss2si(Register dst, XMMRegister src) {
1429 XMMRegister idst = XMMRegister::from_code(dst.code());
1430 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1431 }
vcvttss2si(Register dst,Operand src)1432 void vcvttss2si(Register dst, Operand src) {
1433 XMMRegister idst = XMMRegister::from_code(dst.code());
1434 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1435 }
vcvttsd2si(Register dst,XMMRegister src)1436 void vcvttsd2si(Register dst, XMMRegister src) {
1437 XMMRegister idst = XMMRegister::from_code(dst.code());
1438 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1439 }
vcvttsd2si(Register dst,Operand src)1440 void vcvttsd2si(Register dst, Operand src) {
1441 XMMRegister idst = XMMRegister::from_code(dst.code());
1442 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1443 }
vcvttss2siq(Register dst,XMMRegister src)1444 void vcvttss2siq(Register dst, XMMRegister src) {
1445 XMMRegister idst = XMMRegister::from_code(dst.code());
1446 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1447 }
vcvttss2siq(Register dst,Operand src)1448 void vcvttss2siq(Register dst, Operand src) {
1449 XMMRegister idst = XMMRegister::from_code(dst.code());
1450 vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1451 }
vcvttsd2siq(Register dst,XMMRegister src)1452 void vcvttsd2siq(Register dst, XMMRegister src) {
1453 XMMRegister idst = XMMRegister::from_code(dst.code());
1454 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1455 }
vcvttsd2siq(Register dst,Operand src)1456 void vcvttsd2siq(Register dst, Operand src) {
1457 XMMRegister idst = XMMRegister::from_code(dst.code());
1458 vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1459 }
vcvtsd2si(Register dst,XMMRegister src)1460 void vcvtsd2si(Register dst, XMMRegister src) {
1461 XMMRegister idst = XMMRegister::from_code(dst.code());
1462 vinstr(0x2d, idst, xmm0, src, kF2, k0F, kW0);
1463 }
vucomisd(XMMRegister dst,XMMRegister src)1464 void vucomisd(XMMRegister dst, XMMRegister src) {
1465 vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG);
1466 }
vucomisd(XMMRegister dst,Operand src)1467 void vucomisd(XMMRegister dst, Operand src) {
1468 vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG);
1469 }
vroundss(XMMRegister dst,XMMRegister src1,XMMRegister src2,RoundingMode mode)1470 void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1471 RoundingMode mode) {
1472 vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
1473 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1474 }
vroundsd(XMMRegister dst,XMMRegister src1,XMMRegister src2,RoundingMode mode)1475 void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1476 RoundingMode mode) {
1477 vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
1478 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1479 }
vroundps(XMMRegister dst,XMMRegister src,RoundingMode mode)1480 void vroundps(XMMRegister dst, XMMRegister src, RoundingMode mode) {
1481 vinstr(0x08, dst, xmm0, src, k66, k0F3A, kWIG);
1482 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1483 }
vroundpd(XMMRegister dst,XMMRegister src,RoundingMode mode)1484 void vroundpd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
1485 vinstr(0x09, dst, xmm0, src, k66, k0F3A, kWIG);
1486 emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1487 }
1488
vsd(byte op,XMMRegister dst,XMMRegister src1,XMMRegister src2)1489 void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1490 vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
1491 }
vsd(byte op,XMMRegister dst,XMMRegister src1,Operand src2)1492 void vsd(byte op, XMMRegister dst, XMMRegister src1, Operand src2) {
1493 vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
1494 }
1495
vmovss(XMMRegister dst,XMMRegister src1,XMMRegister src2)1496 void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1497 vss(0x10, dst, src1, src2);
1498 }
vmovss(XMMRegister dst,Operand src)1499 void vmovss(XMMRegister dst, Operand src) { vss(0x10, dst, xmm0, src); }
vmovss(Operand dst,XMMRegister src)1500 void vmovss(Operand dst, XMMRegister src) { vss(0x11, src, xmm0, dst); }
1501 void vucomiss(XMMRegister dst, XMMRegister src);
1502 void vucomiss(XMMRegister dst, Operand src);
1503 void vss(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1504 void vss(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1505
vshufps(XMMRegister dst,XMMRegister src1,XMMRegister src2,byte imm8)1506 void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
1507 vps(0xC6, dst, src1, src2, imm8);
1508 }
1509
vmovaps(XMMRegister dst,XMMRegister src)1510 void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
vmovups(XMMRegister dst,XMMRegister src)1511 void vmovups(XMMRegister dst, XMMRegister src) { vps(0x10, dst, xmm0, src); }
vmovups(XMMRegister dst,Operand src)1512 void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); }
vmovups(Operand dst,XMMRegister src)1513 void vmovups(Operand dst, XMMRegister src) { vps(0x11, src, xmm0, dst); }
vmovapd(XMMRegister dst,XMMRegister src)1514 void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); }
vmovupd(XMMRegister dst,Operand src)1515 void vmovupd(XMMRegister dst, Operand src) { vpd(0x10, dst, xmm0, src); }
vmovupd(Operand dst,XMMRegister src)1516 void vmovupd(Operand dst, XMMRegister src) { vpd(0x11, src, xmm0, dst); }
vmovmskps(Register dst,XMMRegister src)1517 void vmovmskps(Register dst, XMMRegister src) {
1518 XMMRegister idst = XMMRegister::from_code(dst.code());
1519 vps(0x50, idst, xmm0, src);
1520 }
vmovmskpd(Register dst,XMMRegister src)1521 void vmovmskpd(Register dst, XMMRegister src) {
1522 XMMRegister idst = XMMRegister::from_code(dst.code());
1523 vpd(0x50, idst, xmm0, src);
1524 }
1525 void vpmovmskb(Register dst, XMMRegister src);
vcmpps(XMMRegister dst,XMMRegister src1,XMMRegister src2,int8_t cmp)1526 void vcmpps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1527 vps(0xC2, dst, src1, src2);
1528 emit(cmp);
1529 }
vcmpps(XMMRegister dst,XMMRegister src1,Operand src2,int8_t cmp)1530 void vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, int8_t cmp) {
1531 vps(0xC2, dst, src1, src2);
1532 emit(cmp);
1533 }
vcmppd(XMMRegister dst,XMMRegister src1,XMMRegister src2,int8_t cmp)1534 void vcmppd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1535 vpd(0xC2, dst, src1, src2);
1536 emit(cmp);
1537 }
vcmppd(XMMRegister dst,XMMRegister src1,Operand src2,int8_t cmp)1538 void vcmppd(XMMRegister dst, XMMRegister src1, Operand src2, int8_t cmp) {
1539 vpd(0xC2, dst, src1, src2);
1540 emit(cmp);
1541 }
1542
1543 #define AVX_CMP_P(instr, imm8) \
1544 void instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1545 vcmpps(dst, src1, src2, imm8); \
1546 } \
1547 void instr##ps(XMMRegister dst, XMMRegister src1, Operand src2) { \
1548 vcmpps(dst, src1, src2, imm8); \
1549 } \
1550 void instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1551 vcmppd(dst, src1, src2, imm8); \
1552 } \
1553 void instr##pd(XMMRegister dst, XMMRegister src1, Operand src2) { \
1554 vcmppd(dst, src1, src2, imm8); \
1555 }
1556
1557 AVX_CMP_P(vcmpeq, 0x0)
1558 AVX_CMP_P(vcmplt, 0x1)
1559 AVX_CMP_P(vcmple, 0x2)
1560 AVX_CMP_P(vcmpneq, 0x4)
1561 AVX_CMP_P(vcmpnlt, 0x5)
1562 AVX_CMP_P(vcmpnle, 0x6)
1563
1564 #undef AVX_CMP_P
1565
vlddqu(XMMRegister dst,Operand src)1566 void vlddqu(XMMRegister dst, Operand src) {
1567 vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG);
1568 }
vinsertps(XMMRegister dst,XMMRegister src1,XMMRegister src2,byte imm8)1569 void vinsertps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1570 byte imm8) {
1571 vinstr(0x21, dst, src1, src2, k66, k0F3A, kWIG);
1572 emit(imm8);
1573 }
vinsertps(XMMRegister dst,XMMRegister src1,Operand src2,byte imm8)1574 void vinsertps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8) {
1575 vinstr(0x21, dst, src1, src2, k66, k0F3A, kWIG);
1576 emit(imm8);
1577 }
vpextrq(Register dst,XMMRegister src,int8_t imm8)1578 void vpextrq(Register dst, XMMRegister src, int8_t imm8) {
1579 XMMRegister idst = XMMRegister::from_code(dst.code());
1580 vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW1);
1581 emit(imm8);
1582 }
vpinsrb(XMMRegister dst,XMMRegister src1,Register src2,uint8_t imm8)1583 void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
1584 XMMRegister isrc = XMMRegister::from_code(src2.code());
1585 vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0);
1586 emit(imm8);
1587 }
vpinsrb(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t imm8)1588 void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1589 vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0);
1590 emit(imm8);
1591 }
vpinsrw(XMMRegister dst,XMMRegister src1,Register src2,uint8_t imm8)1592 void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
1593 XMMRegister isrc = XMMRegister::from_code(src2.code());
1594 vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0);
1595 emit(imm8);
1596 }
vpinsrw(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t imm8)1597 void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1598 vinstr(0xc4, dst, src1, src2, k66, k0F, kW0);
1599 emit(imm8);
1600 }
vpinsrd(XMMRegister dst,XMMRegister src1,Register src2,uint8_t imm8)1601 void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
1602 XMMRegister isrc = XMMRegister::from_code(src2.code());
1603 vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0);
1604 emit(imm8);
1605 }
vpinsrd(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t imm8)1606 void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1607 vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0);
1608 emit(imm8);
1609 }
vpinsrq(XMMRegister dst,XMMRegister src1,Register src2,uint8_t imm8)1610 void vpinsrq(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
1611 XMMRegister isrc = XMMRegister::from_code(src2.code());
1612 vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW1);
1613 emit(imm8);
1614 }
vpinsrq(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t imm8)1615 void vpinsrq(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1616 vinstr(0x22, dst, src1, src2, k66, k0F3A, kW1);
1617 emit(imm8);
1618 }
1619
vpshufd(XMMRegister dst,XMMRegister src,uint8_t imm8)1620 void vpshufd(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1621 vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
1622 emit(imm8);
1623 }
vpshufd(XMMRegister dst,Operand src,uint8_t imm8)1624 void vpshufd(XMMRegister dst, Operand src, uint8_t imm8) {
1625 vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
1626 emit(imm8);
1627 }
vpshuflw(XMMRegister dst,XMMRegister src,uint8_t imm8)1628 void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1629 vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
1630 emit(imm8);
1631 }
vpshuflw(XMMRegister dst,Operand src,uint8_t imm8)1632 void vpshuflw(XMMRegister dst, Operand src, uint8_t imm8) {
1633 vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
1634 emit(imm8);
1635 }
vpshufhw(XMMRegister dst,XMMRegister src,uint8_t imm8)1636 void vpshufhw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1637 vinstr(0x70, dst, xmm0, src, kF3, k0F, kWIG);
1638 emit(imm8);
1639 }
vpshufhw(XMMRegister dst,Operand src,uint8_t imm8)1640 void vpshufhw(XMMRegister dst, Operand src, uint8_t imm8) {
1641 vinstr(0x70, dst, xmm0, src, kF2, k0F, kWIG);
1642 emit(imm8);
1643 }
1644
vpblendw(XMMRegister dst,XMMRegister src1,XMMRegister src2,uint8_t mask)1645 void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1646 uint8_t mask) {
1647 vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
1648 emit(mask);
1649 }
vpblendw(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t mask)1650 void vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask) {
1651 vinstr(0x0E, dst, src1, src2, k66, k0F3A, kWIG);
1652 emit(mask);
1653 }
1654
vpalignr(XMMRegister dst,XMMRegister src1,XMMRegister src2,uint8_t imm8)1655 void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1656 uint8_t imm8) {
1657 vinstr(0x0F, dst, src1, src2, k66, k0F3A, kWIG);
1658 emit(imm8);
1659 }
vpalignr(XMMRegister dst,XMMRegister src1,Operand src2,uint8_t imm8)1660 void vpalignr(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1661 vinstr(0x0F, dst, src1, src2, k66, k0F3A, kWIG);
1662 emit(imm8);
1663 }
1664
1665 void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1666 void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1667 void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1668 byte imm8);
1669 void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1670 void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1671
1672 // BMI instruction
andnq(Register dst,Register src1,Register src2)1673 void andnq(Register dst, Register src1, Register src2) {
1674 bmi1q(0xf2, dst, src1, src2);
1675 }
andnq(Register dst,Register src1,Operand src2)1676 void andnq(Register dst, Register src1, Operand src2) {
1677 bmi1q(0xf2, dst, src1, src2);
1678 }
andnl(Register dst,Register src1,Register src2)1679 void andnl(Register dst, Register src1, Register src2) {
1680 bmi1l(0xf2, dst, src1, src2);
1681 }
andnl(Register dst,Register src1,Operand src2)1682 void andnl(Register dst, Register src1, Operand src2) {
1683 bmi1l(0xf2, dst, src1, src2);
1684 }
bextrq(Register dst,Register src1,Register src2)1685 void bextrq(Register dst, Register src1, Register src2) {
1686 bmi1q(0xf7, dst, src2, src1);
1687 }
bextrq(Register dst,Operand src1,Register src2)1688 void bextrq(Register dst, Operand src1, Register src2) {
1689 bmi1q(0xf7, dst, src2, src1);
1690 }
bextrl(Register dst,Register src1,Register src2)1691 void bextrl(Register dst, Register src1, Register src2) {
1692 bmi1l(0xf7, dst, src2, src1);
1693 }
bextrl(Register dst,Operand src1,Register src2)1694 void bextrl(Register dst, Operand src1, Register src2) {
1695 bmi1l(0xf7, dst, src2, src1);
1696 }
blsiq(Register dst,Register src)1697 void blsiq(Register dst, Register src) { bmi1q(0xf3, rbx, dst, src); }
blsiq(Register dst,Operand src)1698 void blsiq(Register dst, Operand src) { bmi1q(0xf3, rbx, dst, src); }
blsil(Register dst,Register src)1699 void blsil(Register dst, Register src) { bmi1l(0xf3, rbx, dst, src); }
blsil(Register dst,Operand src)1700 void blsil(Register dst, Operand src) { bmi1l(0xf3, rbx, dst, src); }
blsmskq(Register dst,Register src)1701 void blsmskq(Register dst, Register src) { bmi1q(0xf3, rdx, dst, src); }
blsmskq(Register dst,Operand src)1702 void blsmskq(Register dst, Operand src) { bmi1q(0xf3, rdx, dst, src); }
blsmskl(Register dst,Register src)1703 void blsmskl(Register dst, Register src) { bmi1l(0xf3, rdx, dst, src); }
blsmskl(Register dst,Operand src)1704 void blsmskl(Register dst, Operand src) { bmi1l(0xf3, rdx, dst, src); }
blsrq(Register dst,Register src)1705 void blsrq(Register dst, Register src) { bmi1q(0xf3, rcx, dst, src); }
blsrq(Register dst,Operand src)1706 void blsrq(Register dst, Operand src) { bmi1q(0xf3, rcx, dst, src); }
blsrl(Register dst,Register src)1707 void blsrl(Register dst, Register src) { bmi1l(0xf3, rcx, dst, src); }
blsrl(Register dst,Operand src)1708 void blsrl(Register dst, Operand src) { bmi1l(0xf3, rcx, dst, src); }
1709 void tzcntq(Register dst, Register src);
1710 void tzcntq(Register dst, Operand src);
1711 void tzcntl(Register dst, Register src);
1712 void tzcntl(Register dst, Operand src);
1713
1714 void lzcntq(Register dst, Register src);
1715 void lzcntq(Register dst, Operand src);
1716 void lzcntl(Register dst, Register src);
1717 void lzcntl(Register dst, Operand src);
1718
1719 void popcntq(Register dst, Register src);
1720 void popcntq(Register dst, Operand src);
1721 void popcntl(Register dst, Register src);
1722 void popcntl(Register dst, Operand src);
1723
bzhiq(Register dst,Register src1,Register src2)1724 void bzhiq(Register dst, Register src1, Register src2) {
1725 bmi2q(kNone, 0xf5, dst, src2, src1);
1726 }
bzhiq(Register dst,Operand src1,Register src2)1727 void bzhiq(Register dst, Operand src1, Register src2) {
1728 bmi2q(kNone, 0xf5, dst, src2, src1);
1729 }
bzhil(Register dst,Register src1,Register src2)1730 void bzhil(Register dst, Register src1, Register src2) {
1731 bmi2l(kNone, 0xf5, dst, src2, src1);
1732 }
bzhil(Register dst,Operand src1,Register src2)1733 void bzhil(Register dst, Operand src1, Register src2) {
1734 bmi2l(kNone, 0xf5, dst, src2, src1);
1735 }
mulxq(Register dst1,Register dst2,Register src)1736 void mulxq(Register dst1, Register dst2, Register src) {
1737 bmi2q(kF2, 0xf6, dst1, dst2, src);
1738 }
mulxq(Register dst1,Register dst2,Operand src)1739 void mulxq(Register dst1, Register dst2, Operand src) {
1740 bmi2q(kF2, 0xf6, dst1, dst2, src);
1741 }
mulxl(Register dst1,Register dst2,Register src)1742 void mulxl(Register dst1, Register dst2, Register src) {
1743 bmi2l(kF2, 0xf6, dst1, dst2, src);
1744 }
mulxl(Register dst1,Register dst2,Operand src)1745 void mulxl(Register dst1, Register dst2, Operand src) {
1746 bmi2l(kF2, 0xf6, dst1, dst2, src);
1747 }
pdepq(Register dst,Register src1,Register src2)1748 void pdepq(Register dst, Register src1, Register src2) {
1749 bmi2q(kF2, 0xf5, dst, src1, src2);
1750 }
pdepq(Register dst,Register src1,Operand src2)1751 void pdepq(Register dst, Register src1, Operand src2) {
1752 bmi2q(kF2, 0xf5, dst, src1, src2);
1753 }
pdepl(Register dst,Register src1,Register src2)1754 void pdepl(Register dst, Register src1, Register src2) {
1755 bmi2l(kF2, 0xf5, dst, src1, src2);
1756 }
pdepl(Register dst,Register src1,Operand src2)1757 void pdepl(Register dst, Register src1, Operand src2) {
1758 bmi2l(kF2, 0xf5, dst, src1, src2);
1759 }
pextq(Register dst,Register src1,Register src2)1760 void pextq(Register dst, Register src1, Register src2) {
1761 bmi2q(kF3, 0xf5, dst, src1, src2);
1762 }
pextq(Register dst,Register src1,Operand src2)1763 void pextq(Register dst, Register src1, Operand src2) {
1764 bmi2q(kF3, 0xf5, dst, src1, src2);
1765 }
pextl(Register dst,Register src1,Register src2)1766 void pextl(Register dst, Register src1, Register src2) {
1767 bmi2l(kF3, 0xf5, dst, src1, src2);
1768 }
pextl(Register dst,Register src1,Operand src2)1769 void pextl(Register dst, Register src1, Operand src2) {
1770 bmi2l(kF3, 0xf5, dst, src1, src2);
1771 }
sarxq(Register dst,Register src1,Register src2)1772 void sarxq(Register dst, Register src1, Register src2) {
1773 bmi2q(kF3, 0xf7, dst, src2, src1);
1774 }
sarxq(Register dst,Operand src1,Register src2)1775 void sarxq(Register dst, Operand src1, Register src2) {
1776 bmi2q(kF3, 0xf7, dst, src2, src1);
1777 }
sarxl(Register dst,Register src1,Register src2)1778 void sarxl(Register dst, Register src1, Register src2) {
1779 bmi2l(kF3, 0xf7, dst, src2, src1);
1780 }
sarxl(Register dst,Operand src1,Register src2)1781 void sarxl(Register dst, Operand src1, Register src2) {
1782 bmi2l(kF3, 0xf7, dst, src2, src1);
1783 }
shlxq(Register dst,Register src1,Register src2)1784 void shlxq(Register dst, Register src1, Register src2) {
1785 bmi2q(k66, 0xf7, dst, src2, src1);
1786 }
shlxq(Register dst,Operand src1,Register src2)1787 void shlxq(Register dst, Operand src1, Register src2) {
1788 bmi2q(k66, 0xf7, dst, src2, src1);
1789 }
shlxl(Register dst,Register src1,Register src2)1790 void shlxl(Register dst, Register src1, Register src2) {
1791 bmi2l(k66, 0xf7, dst, src2, src1);
1792 }
shlxl(Register dst,Operand src1,Register src2)1793 void shlxl(Register dst, Operand src1, Register src2) {
1794 bmi2l(k66, 0xf7, dst, src2, src1);
1795 }
shrxq(Register dst,Register src1,Register src2)1796 void shrxq(Register dst, Register src1, Register src2) {
1797 bmi2q(kF2, 0xf7, dst, src2, src1);
1798 }
shrxq(Register dst,Operand src1,Register src2)1799 void shrxq(Register dst, Operand src1, Register src2) {
1800 bmi2q(kF2, 0xf7, dst, src2, src1);
1801 }
shrxl(Register dst,Register src1,Register src2)1802 void shrxl(Register dst, Register src1, Register src2) {
1803 bmi2l(kF2, 0xf7, dst, src2, src1);
1804 }
shrxl(Register dst,Operand src1,Register src2)1805 void shrxl(Register dst, Operand src1, Register src2) {
1806 bmi2l(kF2, 0xf7, dst, src2, src1);
1807 }
1808 void rorxq(Register dst, Register src, byte imm8);
1809 void rorxq(Register dst, Operand src, byte imm8);
1810 void rorxl(Register dst, Register src, byte imm8);
1811 void rorxl(Register dst, Operand src, byte imm8);
1812
1813 void mfence();
1814 void lfence();
1815 void pause();
1816
1817 // Check the code size generated from label to here.
SizeOfCodeGeneratedSince(Label * label)1818 int SizeOfCodeGeneratedSince(Label* label) {
1819 return pc_offset() - label->pos();
1820 }
1821
1822 // Record a deoptimization reason that can be used by a log or cpu profiler.
1823 // Use --trace-deopt to enable.
1824 void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position,
1825 int id);
1826
1827 // Writes a single word of data in the code stream.
1828 // Used for inline tables, e.g., jump-tables.
1829 void db(uint8_t data);
1830 void dd(uint32_t data);
1831 void dq(uint64_t data);
dp(uintptr_t data)1832 void dp(uintptr_t data) { dq(data); }
1833 void dq(Label* label);
1834
1835 // Patch entries for partial constant pool.
1836 void PatchConstPool();
1837
1838 // Check if use partial constant pool for this rmode.
1839 static bool UseConstPoolFor(RelocInfo::Mode rmode);
1840
1841 // Check if there is less than kGap bytes available in the buffer.
1842 // If this is the case, we need to grow the buffer before emitting
1843 // an instruction or relocation information.
buffer_overflow()1844 inline bool buffer_overflow() const {
1845 return pc_ >= reloc_info_writer.pos() - kGap;
1846 }
1847
1848 // Get the number of bytes available in the buffer.
available_space()1849 inline int available_space() const {
1850 return static_cast<int>(reloc_info_writer.pos() - pc_);
1851 }
1852
1853 static bool IsNop(Address addr);
1854
1855 // Avoid overflows for displacements etc.
1856 static constexpr int kMaximalBufferSize = 512 * MB;
1857
byte_at(int pos)1858 byte byte_at(int pos) { return buffer_start_[pos]; }
set_byte_at(int pos,byte value)1859 void set_byte_at(int pos, byte value) { buffer_start_[pos] = value; }
1860
1861 #if defined(V8_OS_WIN_X64)
1862 win64_unwindinfo::BuiltinUnwindInfo GetUnwindInfo() const;
1863 #endif
1864
1865 protected:
1866 // Call near indirect
1867 void call(Operand operand);
1868
1869 private:
addr_at(int pos)1870 Address addr_at(int pos) {
1871 return reinterpret_cast<Address>(buffer_start_ + pos);
1872 }
long_at(int pos)1873 uint32_t long_at(int pos) {
1874 return ReadUnalignedValue<uint32_t>(addr_at(pos));
1875 }
long_at_put(int pos,uint32_t x)1876 void long_at_put(int pos, uint32_t x) {
1877 WriteUnalignedValue(addr_at(pos), x);
1878 }
1879
1880 // code emission
1881 void GrowBuffer();
1882
emit(byte x)1883 void emit(byte x) { *pc_++ = x; }
1884 inline void emitl(uint32_t x);
1885 inline void emitq(uint64_t x);
1886 inline void emitw(uint16_t x);
1887 inline void emit_runtime_entry(Address entry, RelocInfo::Mode rmode);
1888 inline void emit(Immediate x);
1889 inline void emit(Immediate64 x);
1890
1891 // Emits a REX prefix that encodes a 64-bit operand size and
1892 // the top bit of both register codes.
1893 // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1894 // REX.W is set.
1895 inline void emit_rex_64(XMMRegister reg, Register rm_reg);
1896 inline void emit_rex_64(Register reg, XMMRegister rm_reg);
1897 inline void emit_rex_64(Register reg, Register rm_reg);
1898 inline void emit_rex_64(XMMRegister reg, XMMRegister rm_reg);
1899
1900 // Emits a REX prefix that encodes a 64-bit operand size and
1901 // the top bit of the destination, index, and base register codes.
1902 // The high bit of reg is used for REX.R, the high bit of op's base
1903 // register is used for REX.B, and the high bit of op's index register
1904 // is used for REX.X. REX.W is set.
1905 inline void emit_rex_64(Register reg, Operand op);
1906 inline void emit_rex_64(XMMRegister reg, Operand op);
1907
1908 // Emits a REX prefix that encodes a 64-bit operand size and
1909 // the top bit of the register code.
1910 // The high bit of register is used for REX.B.
1911 // REX.W is set and REX.R and REX.X are clear.
1912 inline void emit_rex_64(Register rm_reg);
1913
1914 // Emits a REX prefix that encodes a 64-bit operand size and
1915 // the top bit of the index and base register codes.
1916 // The high bit of op's base register is used for REX.B, and the high
1917 // bit of op's index register is used for REX.X.
1918 // REX.W is set and REX.R clear.
1919 inline void emit_rex_64(Operand op);
1920
1921 // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size.
emit_rex_64()1922 void emit_rex_64() { emit(0x48); }
1923
1924 // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1925 // REX.W is clear.
1926 inline void emit_rex_32(Register reg, Register rm_reg);
1927
1928 // The high bit of reg is used for REX.R, the high bit of op's base
1929 // register is used for REX.B, and the high bit of op's index register
1930 // is used for REX.X. REX.W is cleared.
1931 inline void emit_rex_32(Register reg, Operand op);
1932
1933 // High bit of rm_reg goes to REX.B.
1934 // REX.W, REX.R and REX.X are clear.
1935 inline void emit_rex_32(Register rm_reg);
1936
1937 // High bit of base goes to REX.B and high bit of index to REX.X.
1938 // REX.W and REX.R are clear.
1939 inline void emit_rex_32(Operand op);
1940
1941 // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1942 // REX.W is cleared. If no REX bits are set, no byte is emitted.
1943 inline void emit_optional_rex_32(Register reg, Register rm_reg);
1944
1945 // The high bit of reg is used for REX.R, the high bit of op's base
1946 // register is used for REX.B, and the high bit of op's index register
1947 // is used for REX.X. REX.W is cleared. If no REX bits are set, nothing
1948 // is emitted.
1949 inline void emit_optional_rex_32(Register reg, Operand op);
1950
1951 // As for emit_optional_rex_32(Register, Register), except that
1952 // the registers are XMM registers.
1953 inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base);
1954
1955 // As for emit_optional_rex_32(Register, Register), except that
1956 // one of the registers is an XMM registers.
1957 inline void emit_optional_rex_32(XMMRegister reg, Register base);
1958
1959 // As for emit_optional_rex_32(Register, Register), except that
1960 // one of the registers is an XMM registers.
1961 inline void emit_optional_rex_32(Register reg, XMMRegister base);
1962
1963 // As for emit_optional_rex_32(Register, Operand), except that
1964 // the register is an XMM register.
1965 inline void emit_optional_rex_32(XMMRegister reg, Operand op);
1966
1967 // Optionally do as emit_rex_32(Register) if the register number has
1968 // the high bit set.
1969 inline void emit_optional_rex_32(Register rm_reg);
1970 inline void emit_optional_rex_32(XMMRegister rm_reg);
1971
1972 // Optionally do as emit_rex_32(Operand) if the operand register
1973 // numbers have a high bit set.
1974 inline void emit_optional_rex_32(Operand op);
1975
1976 // Calls emit_rex_32(Register) for all non-byte registers.
1977 inline void emit_optional_rex_8(Register reg);
1978
1979 // Calls emit_rex_32(Register, Operand) for all non-byte registers, and
1980 // emit_optional_rex_32(Register, Operand) for byte registers.
1981 inline void emit_optional_rex_8(Register reg, Operand op);
1982
emit_rex(int size)1983 void emit_rex(int size) {
1984 if (size == kInt64Size) {
1985 emit_rex_64();
1986 } else {
1987 DCHECK_EQ(size, kInt32Size);
1988 }
1989 }
1990
1991 template <class P1>
emit_rex(P1 p1,int size)1992 void emit_rex(P1 p1, int size) {
1993 if (size == kInt64Size) {
1994 emit_rex_64(p1);
1995 } else {
1996 DCHECK_EQ(size, kInt32Size);
1997 emit_optional_rex_32(p1);
1998 }
1999 }
2000
2001 template <class P1, class P2>
emit_rex(P1 p1,P2 p2,int size)2002 void emit_rex(P1 p1, P2 p2, int size) {
2003 if (size == kInt64Size) {
2004 emit_rex_64(p1, p2);
2005 } else {
2006 DCHECK_EQ(size, kInt32Size);
2007 emit_optional_rex_32(p1, p2);
2008 }
2009 }
2010
2011 // Emit vex prefix
emit_vex2_byte0()2012 void emit_vex2_byte0() { emit(0xc5); }
2013 inline void emit_vex2_byte1(XMMRegister reg, XMMRegister v, VectorLength l,
2014 SIMDPrefix pp);
emit_vex3_byte0()2015 void emit_vex3_byte0() { emit(0xc4); }
2016 inline void emit_vex3_byte1(XMMRegister reg, XMMRegister rm, LeadingOpcode m);
2017 inline void emit_vex3_byte1(XMMRegister reg, Operand rm, LeadingOpcode m);
2018 inline void emit_vex3_byte2(VexW w, XMMRegister v, VectorLength l,
2019 SIMDPrefix pp);
2020 inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, XMMRegister rm,
2021 VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2022 VexW w);
2023 inline void emit_vex_prefix(Register reg, Register v, Register rm,
2024 VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2025 VexW w);
2026 inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, Operand rm,
2027 VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2028 VexW w);
2029 inline void emit_vex_prefix(Register reg, Register v, Operand rm,
2030 VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2031 VexW w);
2032
2033 // Emit the ModR/M byte, and optionally the SIB byte and
2034 // 1- or 4-byte offset for a memory operand. Also encodes
2035 // the second operand of the operation, a register or operation
2036 // subcode, into the reg field of the ModR/M byte.
emit_operand(Register reg,Operand adr)2037 void emit_operand(Register reg, Operand adr) {
2038 emit_operand(reg.low_bits(), adr);
2039 }
2040
2041 // Emit the ModR/M byte, and optionally the SIB byte and
2042 // 1- or 4-byte offset for a memory operand. Also used to encode
2043 // a three-bit opcode extension into the ModR/M byte.
2044 void emit_operand(int rm, Operand adr);
2045
2046 // Emit a ModR/M byte with registers coded in the reg and rm_reg fields.
emit_modrm(Register reg,Register rm_reg)2047 void emit_modrm(Register reg, Register rm_reg) {
2048 emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits());
2049 }
2050
2051 // Emit a ModR/M byte with an operation subcode in the reg field and
2052 // a register in the rm_reg field.
emit_modrm(int code,Register rm_reg)2053 void emit_modrm(int code, Register rm_reg) {
2054 DCHECK(is_uint3(code));
2055 emit(0xC0 | code << 3 | rm_reg.low_bits());
2056 }
2057
2058 // Emit the code-object-relative offset of the label's position
2059 inline void emit_code_relative_offset(Label* label);
2060
2061 // The first argument is the reg field, the second argument is the r/m field.
2062 void emit_sse_operand(XMMRegister dst, XMMRegister src);
2063 void emit_sse_operand(XMMRegister reg, Operand adr);
2064 void emit_sse_operand(Register reg, Operand adr);
2065 void emit_sse_operand(XMMRegister dst, Register src);
2066 void emit_sse_operand(Register dst, XMMRegister src);
2067 void emit_sse_operand(XMMRegister dst);
2068
2069 // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
2070 // AND, OR, XOR, or CMP. The encodings of these operations are all
2071 // similar, differing just in the opcode or in the reg field of the
2072 // ModR/M byte.
2073 void arithmetic_op_8(byte opcode, Register reg, Register rm_reg);
2074 void arithmetic_op_8(byte opcode, Register reg, Operand rm_reg);
2075 void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
2076 void arithmetic_op_16(byte opcode, Register reg, Operand rm_reg);
2077 // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2078 void arithmetic_op(byte opcode, Register reg, Register rm_reg, int size);
2079 void arithmetic_op(byte opcode, Register reg, Operand rm_reg, int size);
2080 // Operate on a byte in memory or register.
2081 void immediate_arithmetic_op_8(byte subcode, Register dst, Immediate src);
2082 void immediate_arithmetic_op_8(byte subcode, Operand dst, Immediate src);
2083 // Operate on a word in memory or register.
2084 void immediate_arithmetic_op_16(byte subcode, Register dst, Immediate src);
2085 void immediate_arithmetic_op_16(byte subcode, Operand dst, Immediate src);
2086 // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2087 void immediate_arithmetic_op(byte subcode, Register dst, Immediate src,
2088 int size);
2089 void immediate_arithmetic_op(byte subcode, Operand dst, Immediate src,
2090 int size);
2091
2092 // Emit machine code for a shift operation.
2093 void shift(Operand dst, Immediate shift_amount, int subcode, int size);
2094 void shift(Register dst, Immediate shift_amount, int subcode, int size);
2095 // Shift dst by cl % 64 bits.
2096 void shift(Register dst, int subcode, int size);
2097 void shift(Operand dst, int subcode, int size);
2098
2099 void emit_farith(int b1, int b2, int i);
2100
2101 // labels
2102 // void print(Label* L);
2103 void bind_to(Label* L, int pos);
2104
2105 // record reloc info for current pc_
2106 void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
2107
2108 // Arithmetics
emit_add(Register dst,Register src,int size)2109 void emit_add(Register dst, Register src, int size) {
2110 arithmetic_op(0x03, dst, src, size);
2111 }
2112
emit_add(Register dst,Immediate src,int size)2113 void emit_add(Register dst, Immediate src, int size) {
2114 immediate_arithmetic_op(0x0, dst, src, size);
2115 }
2116
emit_add(Register dst,Operand src,int size)2117 void emit_add(Register dst, Operand src, int size) {
2118 arithmetic_op(0x03, dst, src, size);
2119 }
2120
emit_add(Operand dst,Register src,int size)2121 void emit_add(Operand dst, Register src, int size) {
2122 arithmetic_op(0x1, src, dst, size);
2123 }
2124
emit_add(Operand dst,Immediate src,int size)2125 void emit_add(Operand dst, Immediate src, int size) {
2126 immediate_arithmetic_op(0x0, dst, src, size);
2127 }
2128
emit_and(Register dst,Register src,int size)2129 void emit_and(Register dst, Register src, int size) {
2130 arithmetic_op(0x23, dst, src, size);
2131 }
2132
emit_and(Register dst,Operand src,int size)2133 void emit_and(Register dst, Operand src, int size) {
2134 arithmetic_op(0x23, dst, src, size);
2135 }
2136
emit_and(Operand dst,Register src,int size)2137 void emit_and(Operand dst, Register src, int size) {
2138 arithmetic_op(0x21, src, dst, size);
2139 }
2140
emit_and(Register dst,Immediate src,int size)2141 void emit_and(Register dst, Immediate src, int size) {
2142 immediate_arithmetic_op(0x4, dst, src, size);
2143 }
2144
emit_and(Operand dst,Immediate src,int size)2145 void emit_and(Operand dst, Immediate src, int size) {
2146 immediate_arithmetic_op(0x4, dst, src, size);
2147 }
2148
emit_cmp(Register dst,Register src,int size)2149 void emit_cmp(Register dst, Register src, int size) {
2150 arithmetic_op(0x3B, dst, src, size);
2151 }
2152
emit_cmp(Register dst,Operand src,int size)2153 void emit_cmp(Register dst, Operand src, int size) {
2154 arithmetic_op(0x3B, dst, src, size);
2155 }
2156
emit_cmp(Operand dst,Register src,int size)2157 void emit_cmp(Operand dst, Register src, int size) {
2158 arithmetic_op(0x39, src, dst, size);
2159 }
2160
emit_cmp(Register dst,Immediate src,int size)2161 void emit_cmp(Register dst, Immediate src, int size) {
2162 immediate_arithmetic_op(0x7, dst, src, size);
2163 }
2164
emit_cmp(Operand dst,Immediate src,int size)2165 void emit_cmp(Operand dst, Immediate src, int size) {
2166 immediate_arithmetic_op(0x7, dst, src, size);
2167 }
2168
2169 // Compare {al,ax,eax,rax} with src. If equal, set ZF and write dst into
2170 // src. Otherwise clear ZF and write src into {al,ax,eax,rax}. This
2171 // operation is only atomic if prefixed by the lock instruction.
2172 void emit_cmpxchg(Operand dst, Register src, int size);
2173
2174 void emit_dec(Register dst, int size);
2175 void emit_dec(Operand dst, int size);
2176
2177 // Divide rdx:rax by src. Quotient in rax, remainder in rdx when size is 64.
2178 // Divide edx:eax by lower 32 bits of src. Quotient in eax, remainder in edx
2179 // when size is 32.
2180 void emit_idiv(Register src, int size);
2181 void emit_div(Register src, int size);
2182
2183 // Signed multiply instructions.
2184 // rdx:rax = rax * src when size is 64 or edx:eax = eax * src when size is 32.
2185 void emit_imul(Register src, int size);
2186 void emit_imul(Operand src, int size);
2187 void emit_imul(Register dst, Register src, int size);
2188 void emit_imul(Register dst, Operand src, int size);
2189 void emit_imul(Register dst, Register src, Immediate imm, int size);
2190 void emit_imul(Register dst, Operand src, Immediate imm, int size);
2191
2192 void emit_inc(Register dst, int size);
2193 void emit_inc(Operand dst, int size);
2194
2195 void emit_lea(Register dst, Operand src, int size);
2196
2197 void emit_mov(Register dst, Operand src, int size);
2198 void emit_mov(Register dst, Register src, int size);
2199 void emit_mov(Operand dst, Register src, int size);
2200 void emit_mov(Register dst, Immediate value, int size);
2201 void emit_mov(Operand dst, Immediate value, int size);
2202 void emit_mov(Register dst, Immediate64 value, int size);
2203
2204 void emit_movzxb(Register dst, Operand src, int size);
2205 void emit_movzxb(Register dst, Register src, int size);
2206 void emit_movzxw(Register dst, Operand src, int size);
2207 void emit_movzxw(Register dst, Register src, int size);
2208
2209 void emit_neg(Register dst, int size);
2210 void emit_neg(Operand dst, int size);
2211
2212 void emit_not(Register dst, int size);
2213 void emit_not(Operand dst, int size);
2214
emit_or(Register dst,Register src,int size)2215 void emit_or(Register dst, Register src, int size) {
2216 arithmetic_op(0x0B, dst, src, size);
2217 }
2218
emit_or(Register dst,Operand src,int size)2219 void emit_or(Register dst, Operand src, int size) {
2220 arithmetic_op(0x0B, dst, src, size);
2221 }
2222
emit_or(Operand dst,Register src,int size)2223 void emit_or(Operand dst, Register src, int size) {
2224 arithmetic_op(0x9, src, dst, size);
2225 }
2226
emit_or(Register dst,Immediate src,int size)2227 void emit_or(Register dst, Immediate src, int size) {
2228 immediate_arithmetic_op(0x1, dst, src, size);
2229 }
2230
emit_or(Operand dst,Immediate src,int size)2231 void emit_or(Operand dst, Immediate src, int size) {
2232 immediate_arithmetic_op(0x1, dst, src, size);
2233 }
2234
2235 void emit_repmovs(int size);
2236
emit_sbb(Register dst,Register src,int size)2237 void emit_sbb(Register dst, Register src, int size) {
2238 arithmetic_op(0x1b, dst, src, size);
2239 }
2240
emit_sub(Register dst,Register src,int size)2241 void emit_sub(Register dst, Register src, int size) {
2242 arithmetic_op(0x2B, dst, src, size);
2243 }
2244
emit_sub(Register dst,Immediate src,int size)2245 void emit_sub(Register dst, Immediate src, int size) {
2246 immediate_arithmetic_op(0x5, dst, src, size);
2247 }
2248
emit_sub(Register dst,Operand src,int size)2249 void emit_sub(Register dst, Operand src, int size) {
2250 arithmetic_op(0x2B, dst, src, size);
2251 }
2252
emit_sub(Operand dst,Register src,int size)2253 void emit_sub(Operand dst, Register src, int size) {
2254 arithmetic_op(0x29, src, dst, size);
2255 }
2256
emit_sub(Operand dst,Immediate src,int size)2257 void emit_sub(Operand dst, Immediate src, int size) {
2258 immediate_arithmetic_op(0x5, dst, src, size);
2259 }
2260
2261 void emit_test(Register dst, Register src, int size);
2262 void emit_test(Register reg, Immediate mask, int size);
2263 void emit_test(Operand op, Register reg, int size);
2264 void emit_test(Operand op, Immediate mask, int size);
emit_test(Register reg,Operand op,int size)2265 void emit_test(Register reg, Operand op, int size) {
2266 return emit_test(op, reg, size);
2267 }
2268
2269 void emit_xchg(Register dst, Register src, int size);
2270 void emit_xchg(Register dst, Operand src, int size);
2271
emit_xor(Register dst,Register src,int size)2272 void emit_xor(Register dst, Register src, int size) {
2273 if (size == kInt64Size && dst.code() == src.code()) {
2274 // 32 bit operations zero the top 32 bits of 64 bit registers. Therefore
2275 // there is no need to make this a 64 bit operation.
2276 arithmetic_op(0x33, dst, src, kInt32Size);
2277 } else {
2278 arithmetic_op(0x33, dst, src, size);
2279 }
2280 }
2281
emit_xor(Register dst,Operand src,int size)2282 void emit_xor(Register dst, Operand src, int size) {
2283 arithmetic_op(0x33, dst, src, size);
2284 }
2285
emit_xor(Register dst,Immediate src,int size)2286 void emit_xor(Register dst, Immediate src, int size) {
2287 immediate_arithmetic_op(0x6, dst, src, size);
2288 }
2289
emit_xor(Operand dst,Immediate src,int size)2290 void emit_xor(Operand dst, Immediate src, int size) {
2291 immediate_arithmetic_op(0x6, dst, src, size);
2292 }
2293
emit_xor(Operand dst,Register src,int size)2294 void emit_xor(Operand dst, Register src, int size) {
2295 arithmetic_op(0x31, src, dst, size);
2296 }
2297
2298 // Most BMI instructions are similar.
2299 void bmi1q(byte op, Register reg, Register vreg, Register rm);
2300 void bmi1q(byte op, Register reg, Register vreg, Operand rm);
2301 void bmi1l(byte op, Register reg, Register vreg, Register rm);
2302 void bmi1l(byte op, Register reg, Register vreg, Operand rm);
2303 void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2304 void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm);
2305 void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2306 void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm);
2307
2308 // record the position of jmp/jcc instruction
2309 void record_farjmp_position(Label* L, int pos);
2310
2311 bool is_optimizable_farjmp(int idx);
2312
2313 void AllocateAndInstallRequestedHeapObjects(Isolate* isolate);
2314
2315 int WriteCodeComments();
2316
2317 friend class EnsureSpace;
2318 friend class RegExpMacroAssemblerX64;
2319
2320 // code generation
2321 RelocInfoWriter reloc_info_writer;
2322
2323 // Internal reference positions, required for (potential) patching in
2324 // GrowBuffer(); contains only those internal references whose labels
2325 // are already bound.
2326 std::deque<int> internal_reference_positions_;
2327
2328 // Variables for this instance of assembler
2329 int farjmp_num_ = 0;
2330 std::deque<int> farjmp_positions_;
2331 std::map<Label*, std::vector<int>> label_farjmp_maps_;
2332
2333 ConstPool constpool_;
2334
2335 friend class ConstPool;
2336
2337 #if defined(V8_OS_WIN_X64)
2338 std::unique_ptr<win64_unwindinfo::XdataEncoder> xdata_encoder_;
2339 #endif
2340 };
2341
2342 // Helper class that ensures that there is enough space for generating
2343 // instructions and relocation information. The constructor makes
2344 // sure that there is enough space and (in debug mode) the destructor
2345 // checks that we did not generate too much.
2346 class EnsureSpace {
2347 public:
EnsureSpace(Assembler * assembler)2348 explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
2349 if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
2350 #ifdef DEBUG
2351 space_before_ = assembler_->available_space();
2352 #endif
2353 }
2354
2355 #ifdef DEBUG
~EnsureSpace()2356 ~EnsureSpace() {
2357 int bytes_generated = space_before_ - assembler_->available_space();
2358 DCHECK(bytes_generated < assembler_->kGap);
2359 }
2360 #endif
2361
2362 private:
2363 Assembler* assembler_;
2364 #ifdef DEBUG
2365 int space_before_;
2366 #endif
2367 };
2368
2369 } // namespace internal
2370 } // namespace v8
2371
2372 #endif // V8_CODEGEN_X64_ASSEMBLER_X64_H_
2373