• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <limits>
6 
7 #include "src/base/overflowing-math.h"
8 #include "src/codegen/assembler.h"
9 #include "src/codegen/cpu-features.h"
10 #include "src/codegen/external-reference.h"
11 #include "src/codegen/macro-assembler.h"
12 #include "src/codegen/optimized-compilation-info.h"
13 #include "src/codegen/x64/assembler-x64.h"
14 #include "src/codegen/x64/register-x64.h"
15 #include "src/common/globals.h"
16 #include "src/compiler/backend/code-generator-impl.h"
17 #include "src/compiler/backend/code-generator.h"
18 #include "src/compiler/backend/gap-resolver.h"
19 #include "src/compiler/backend/instruction-codes.h"
20 #include "src/compiler/node-matchers.h"
21 #include "src/compiler/osr.h"
22 #include "src/heap/memory-chunk.h"
23 #include "src/objects/code-kind.h"
24 #include "src/objects/smi.h"
25 
26 #if V8_ENABLE_WEBASSEMBLY
27 #include "src/wasm/wasm-code-manager.h"
28 #include "src/wasm/wasm-objects.h"
29 #endif  // V8_ENABLE_WEBASSEMBLY
30 
31 namespace v8 {
32 namespace internal {
33 namespace compiler {
34 
35 #define __ tasm()->
36 
37 // Adds X64 specific methods for decoding operands.
38 class X64OperandConverter : public InstructionOperandConverter {
39  public:
X64OperandConverter(CodeGenerator * gen,Instruction * instr)40   X64OperandConverter(CodeGenerator* gen, Instruction* instr)
41       : InstructionOperandConverter(gen, instr) {}
42 
InputImmediate(size_t index)43   Immediate InputImmediate(size_t index) {
44     return ToImmediate(instr_->InputAt(index));
45   }
46 
InputOperand(size_t index,int extra=0)47   Operand InputOperand(size_t index, int extra = 0) {
48     return ToOperand(instr_->InputAt(index), extra);
49   }
50 
OutputOperand()51   Operand OutputOperand() { return ToOperand(instr_->Output()); }
52 
ToImmediate(InstructionOperand * operand)53   Immediate ToImmediate(InstructionOperand* operand) {
54     Constant constant = ToConstant(operand);
55     if (constant.type() == Constant::kFloat64) {
56       DCHECK_EQ(0, constant.ToFloat64().AsUint64());
57       return Immediate(0);
58     }
59     if (RelocInfo::IsWasmReference(constant.rmode())) {
60       return Immediate(constant.ToInt32(), constant.rmode());
61     }
62     return Immediate(constant.ToInt32());
63   }
64 
ToOperand(InstructionOperand * op,int extra=0)65   Operand ToOperand(InstructionOperand* op, int extra = 0) {
66     DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
67     return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
68   }
69 
SlotToOperand(int slot_index,int extra=0)70   Operand SlotToOperand(int slot_index, int extra = 0) {
71     FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
72     return Operand(offset.from_stack_pointer() ? rsp : rbp,
73                    offset.offset() + extra);
74   }
75 
NextOffset(size_t * offset)76   static size_t NextOffset(size_t* offset) {
77     size_t i = *offset;
78     (*offset)++;
79     return i;
80   }
81 
ScaleFor(AddressingMode one,AddressingMode mode)82   static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
83     STATIC_ASSERT(0 == static_cast<int>(times_1));
84     STATIC_ASSERT(1 == static_cast<int>(times_2));
85     STATIC_ASSERT(2 == static_cast<int>(times_4));
86     STATIC_ASSERT(3 == static_cast<int>(times_8));
87     int scale = static_cast<int>(mode - one);
88     DCHECK(scale >= 0 && scale < 4);
89     return static_cast<ScaleFactor>(scale);
90   }
91 
MemoryOperand(size_t * offset)92   Operand MemoryOperand(size_t* offset) {
93     AddressingMode mode = AddressingModeField::decode(instr_->opcode());
94     switch (mode) {
95       case kMode_MR: {
96         Register base = InputRegister(NextOffset(offset));
97         int32_t disp = 0;
98         return Operand(base, disp);
99       }
100       case kMode_MRI: {
101         Register base = InputRegister(NextOffset(offset));
102         int32_t disp = InputInt32(NextOffset(offset));
103         return Operand(base, disp);
104       }
105       case kMode_MR1:
106       case kMode_MR2:
107       case kMode_MR4:
108       case kMode_MR8: {
109         Register base = InputRegister(NextOffset(offset));
110         Register index = InputRegister(NextOffset(offset));
111         ScaleFactor scale = ScaleFor(kMode_MR1, mode);
112         int32_t disp = 0;
113         return Operand(base, index, scale, disp);
114       }
115       case kMode_MR1I:
116       case kMode_MR2I:
117       case kMode_MR4I:
118       case kMode_MR8I: {
119         Register base = InputRegister(NextOffset(offset));
120         Register index = InputRegister(NextOffset(offset));
121         ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
122         int32_t disp = InputInt32(NextOffset(offset));
123         return Operand(base, index, scale, disp);
124       }
125       case kMode_M1: {
126         Register base = InputRegister(NextOffset(offset));
127         int32_t disp = 0;
128         return Operand(base, disp);
129       }
130       case kMode_M2:
131         UNREACHABLE();  // Should use kModeMR with more compact encoding instead
132       case kMode_M4:
133       case kMode_M8: {
134         Register index = InputRegister(NextOffset(offset));
135         ScaleFactor scale = ScaleFor(kMode_M1, mode);
136         int32_t disp = 0;
137         return Operand(index, scale, disp);
138       }
139       case kMode_M1I:
140       case kMode_M2I:
141       case kMode_M4I:
142       case kMode_M8I: {
143         Register index = InputRegister(NextOffset(offset));
144         ScaleFactor scale = ScaleFor(kMode_M1I, mode);
145         int32_t disp = InputInt32(NextOffset(offset));
146         return Operand(index, scale, disp);
147       }
148       case kMode_Root: {
149         Register base = kRootRegister;
150         int32_t disp = InputInt32(NextOffset(offset));
151         return Operand(base, disp);
152       }
153       case kMode_None:
154         UNREACHABLE();
155     }
156     UNREACHABLE();
157   }
158 
MemoryOperand(size_t first_input=0)159   Operand MemoryOperand(size_t first_input = 0) {
160     return MemoryOperand(&first_input);
161   }
162 };
163 
164 namespace {
165 
HasAddressingMode(Instruction * instr)166 bool HasAddressingMode(Instruction* instr) {
167   return instr->addressing_mode() != kMode_None;
168 }
169 
HasImmediateInput(Instruction * instr,size_t index)170 bool HasImmediateInput(Instruction* instr, size_t index) {
171   return instr->InputAt(index)->IsImmediate();
172 }
173 
HasRegisterInput(Instruction * instr,size_t index)174 bool HasRegisterInput(Instruction* instr, size_t index) {
175   return instr->InputAt(index)->IsRegister();
176 }
177 
178 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
179  public:
OutOfLineLoadFloat32NaN(CodeGenerator * gen,XMMRegister result)180   OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
181       : OutOfLineCode(gen), result_(result) {}
182 
Generate()183   void Generate() final {
184     __ Xorps(result_, result_);
185     __ Divss(result_, result_);
186   }
187 
188  private:
189   XMMRegister const result_;
190 };
191 
192 class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
193  public:
OutOfLineLoadFloat64NaN(CodeGenerator * gen,XMMRegister result)194   OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
195       : OutOfLineCode(gen), result_(result) {}
196 
Generate()197   void Generate() final {
198     __ Xorpd(result_, result_);
199     __ Divsd(result_, result_);
200   }
201 
202  private:
203   XMMRegister const result_;
204 };
205 
206 class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
207  public:
OutOfLineTruncateDoubleToI(CodeGenerator * gen,Register result,XMMRegister input,StubCallMode stub_mode,UnwindingInfoWriter * unwinding_info_writer)208   OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
209                              XMMRegister input, StubCallMode stub_mode,
210                              UnwindingInfoWriter* unwinding_info_writer)
211       : OutOfLineCode(gen),
212         result_(result),
213         input_(input),
214 #if V8_ENABLE_WEBASSEMBLY
215         stub_mode_(stub_mode),
216 #endif  // V8_ENABLE_WEBASSEMBLY
217         unwinding_info_writer_(unwinding_info_writer),
218         isolate_(gen->isolate()),
219         zone_(gen->zone()) {
220   }
221 
Generate()222   void Generate() final {
223     __ AllocateStackSpace(kDoubleSize);
224     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
225                                                       kDoubleSize);
226     __ Movsd(MemOperand(rsp, 0), input_);
227 #if V8_ENABLE_WEBASSEMBLY
228     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
229       // A direct call to a wasm runtime stub defined in this module.
230       // Just encode the stub index. This will be patched when the code
231       // is added to the native module and copied into wasm code space.
232       __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
233 #else
234     // For balance.
235     if (false) {
236 #endif  // V8_ENABLE_WEBASSEMBLY
237     } else if (tasm()->options().inline_offheap_trampolines) {
238       // With embedded builtins we do not need the isolate here. This allows
239       // the call to be generated asynchronously.
240       __ CallBuiltin(Builtin::kDoubleToI);
241     } else {
242       __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
243     }
244     __ movl(result_, MemOperand(rsp, 0));
245     __ addq(rsp, Immediate(kDoubleSize));
246     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
247                                                       -kDoubleSize);
248   }
249 
250  private:
251   Register const result_;
252   XMMRegister const input_;
253 #if V8_ENABLE_WEBASSEMBLY
254   StubCallMode stub_mode_;
255 #endif  // V8_ENABLE_WEBASSEMBLY
256   UnwindingInfoWriter* const unwinding_info_writer_;
257   Isolate* isolate_;
258   Zone* zone_;
259 };
260 
261 class OutOfLineRecordWrite final : public OutOfLineCode {
262  public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand operand,Register value,Register scratch0,Register scratch1,RecordWriteMode mode,StubCallMode stub_mode)263   OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
264                        Register value, Register scratch0, Register scratch1,
265                        RecordWriteMode mode, StubCallMode stub_mode)
266       : OutOfLineCode(gen),
267         object_(object),
268         operand_(operand),
269         value_(value),
270         scratch0_(scratch0),
271         scratch1_(scratch1),
272         mode_(mode),
273 #if V8_ENABLE_WEBASSEMBLY
274         stub_mode_(stub_mode),
275 #endif  // V8_ENABLE_WEBASSEMBLY
276         zone_(gen->zone()) {
277     DCHECK(!AreAliased(object, scratch0, scratch1));
278     DCHECK(!AreAliased(value, scratch0, scratch1));
279   }
280 
Generate()281   void Generate() final {
282     if (COMPRESS_POINTERS_BOOL) {
283       __ DecompressTaggedPointer(value_, value_);
284     }
285     __ CheckPageFlag(value_, scratch0_,
286                      MemoryChunk::kPointersToHereAreInterestingMask, zero,
287                      exit());
288     __ leaq(scratch1_, operand_);
289 
290     RememberedSetAction const remembered_set_action =
291         mode_ > RecordWriteMode::kValueIsMap ||
292                 FLAG_use_full_record_write_builtin
293             ? RememberedSetAction::kEmit
294             : RememberedSetAction::kOmit;
295     SaveFPRegsMode const save_fp_mode = frame()->DidAllocateDoubleRegisters()
296                                             ? SaveFPRegsMode::kSave
297                                             : SaveFPRegsMode::kIgnore;
298 
299     if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
300       __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
301 #if V8_ENABLE_WEBASSEMBLY
302     } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
303       // A direct call to a wasm runtime stub defined in this module.
304       // Just encode the stub index. This will be patched when the code
305       // is added to the native module and copied into wasm code space.
306       __ CallRecordWriteStubSaveRegisters(object_, scratch1_,
307                                           remembered_set_action, save_fp_mode,
308                                           StubCallMode::kCallWasmRuntimeStub);
309 #endif  // V8_ENABLE_WEBASSEMBLY
310     } else {
311       __ CallRecordWriteStubSaveRegisters(object_, scratch1_,
312                                           remembered_set_action, save_fp_mode);
313     }
314   }
315 
316  private:
317   Register const object_;
318   Operand const operand_;
319   Register const value_;
320   Register const scratch0_;
321   Register const scratch1_;
322   RecordWriteMode const mode_;
323 #if V8_ENABLE_WEBASSEMBLY
324   StubCallMode const stub_mode_;
325 #endif  // V8_ENABLE_WEBASSEMBLY
326   Zone* zone_;
327 };
328 
329 template <std::memory_order order>
EmitStore(TurboAssembler * tasm,Operand operand,Register value,MachineRepresentation rep)330 void EmitStore(TurboAssembler* tasm, Operand operand, Register value,
331                MachineRepresentation rep) {
332   if (order == std::memory_order_relaxed) {
333     switch (rep) {
334       case MachineRepresentation::kWord8:
335         tasm->movb(operand, value);
336         break;
337       case MachineRepresentation::kWord16:
338         tasm->movw(operand, value);
339         break;
340       case MachineRepresentation::kWord32:
341         tasm->movl(operand, value);
342         break;
343       case MachineRepresentation::kWord64:
344         tasm->movq(operand, value);
345         break;
346       case MachineRepresentation::kTagged:
347         tasm->StoreTaggedField(operand, value);
348         break;
349       case MachineRepresentation::kSandboxedPointer:
350         tasm->StoreSandboxedPointerField(operand, value);
351         break;
352       default:
353         UNREACHABLE();
354     }
355     return;
356   }
357 
358   DCHECK_EQ(order, std::memory_order_seq_cst);
359   switch (rep) {
360     case MachineRepresentation::kWord8:
361       tasm->movq(kScratchRegister, value);
362       tasm->xchgb(kScratchRegister, operand);
363       break;
364     case MachineRepresentation::kWord16:
365       tasm->movq(kScratchRegister, value);
366       tasm->xchgw(kScratchRegister, operand);
367       break;
368     case MachineRepresentation::kWord32:
369       tasm->movq(kScratchRegister, value);
370       tasm->xchgl(kScratchRegister, operand);
371       break;
372     case MachineRepresentation::kWord64:
373       tasm->movq(kScratchRegister, value);
374       tasm->xchgq(kScratchRegister, operand);
375       break;
376     case MachineRepresentation::kTagged:
377       tasm->AtomicStoreTaggedField(operand, value);
378       break;
379     default:
380       UNREACHABLE();
381   }
382 }
383 
384 template <std::memory_order order>
385 void EmitStore(TurboAssembler* tasm, Operand operand, Immediate value,
386                MachineRepresentation rep);
387 
388 template <>
EmitStore(TurboAssembler * tasm,Operand operand,Immediate value,MachineRepresentation rep)389 void EmitStore<std::memory_order_relaxed>(TurboAssembler* tasm, Operand operand,
390                                           Immediate value,
391                                           MachineRepresentation rep) {
392   switch (rep) {
393     case MachineRepresentation::kWord8:
394       tasm->movb(operand, value);
395       break;
396     case MachineRepresentation::kWord16:
397       tasm->movw(operand, value);
398       break;
399     case MachineRepresentation::kWord32:
400       tasm->movl(operand, value);
401       break;
402     case MachineRepresentation::kWord64:
403       tasm->movq(operand, value);
404       break;
405     case MachineRepresentation::kTagged:
406       tasm->StoreTaggedField(operand, value);
407       break;
408     default:
409       UNREACHABLE();
410   }
411 }
412 
413 #ifdef V8_IS_TSAN
EmitMemoryProbeForTrapHandlerIfNeeded(TurboAssembler * tasm,Register scratch,Operand operand,StubCallMode mode,int size)414 void EmitMemoryProbeForTrapHandlerIfNeeded(TurboAssembler* tasm,
415                                            Register scratch, Operand operand,
416                                            StubCallMode mode, int size) {
417 #if V8_ENABLE_WEBASSEMBLY && V8_TRAP_HANDLER_SUPPORTED
418   // The wasm OOB trap handler needs to be able to look up the faulting
419   // instruction pointer to handle the SIGSEGV raised by an OOB access. It
420   // will not handle SIGSEGVs raised by the TSAN store helpers. Emit a
421   // redundant load here to give the trap handler a chance to handle any
422   // OOB SIGSEGVs.
423   if (trap_handler::IsTrapHandlerEnabled() &&
424       mode == StubCallMode::kCallWasmRuntimeStub) {
425     switch (size) {
426       case kInt8Size:
427         tasm->movb(scratch, operand);
428         break;
429       case kInt16Size:
430         tasm->movw(scratch, operand);
431         break;
432       case kInt32Size:
433         tasm->movl(scratch, operand);
434         break;
435       case kInt64Size:
436         tasm->movq(scratch, operand);
437         break;
438       default:
439         UNREACHABLE();
440     }
441   }
442 #endif
443 }
444 
445 class OutOfLineTSANStore : public OutOfLineCode {
446  public:
OutOfLineTSANStore(CodeGenerator * gen,Operand operand,Register value,Register scratch0,StubCallMode stub_mode,int size,std::memory_order order)447   OutOfLineTSANStore(CodeGenerator* gen, Operand operand, Register value,
448                      Register scratch0, StubCallMode stub_mode, int size,
449                      std::memory_order order)
450       : OutOfLineCode(gen),
451         operand_(operand),
452         value_(value),
453         scratch0_(scratch0),
454 #if V8_ENABLE_WEBASSEMBLY
455         stub_mode_(stub_mode),
456 #endif  // V8_ENABLE_WEBASSEMBLY
457         size_(size),
458         memory_order_(order),
459         zone_(gen->zone()) {
460     DCHECK(!AreAliased(value, scratch0));
461   }
462 
Generate()463   void Generate() final {
464     const SaveFPRegsMode save_fp_mode = frame()->DidAllocateDoubleRegisters()
465                                             ? SaveFPRegsMode::kSave
466                                             : SaveFPRegsMode::kIgnore;
467     __ leaq(scratch0_, operand_);
468 
469 #if V8_ENABLE_WEBASSEMBLY
470     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
471       // A direct call to a wasm runtime stub defined in this module.
472       // Just encode the stub index. This will be patched when the code
473       // is added to the native module and copied into wasm code space.
474       tasm()->CallTSANStoreStub(scratch0_, value_, save_fp_mode, size_,
475                                 StubCallMode::kCallWasmRuntimeStub,
476                                 memory_order_);
477       return;
478     }
479 #endif  // V8_ENABLE_WEBASSEMBLY
480 
481     tasm()->CallTSANStoreStub(scratch0_, value_, save_fp_mode, size_,
482                               StubCallMode::kCallBuiltinPointer, memory_order_);
483   }
484 
485  private:
486   Operand const operand_;
487   Register const value_;
488   Register const scratch0_;
489 #if V8_ENABLE_WEBASSEMBLY
490   StubCallMode const stub_mode_;
491 #endif  // V8_ENABLE_WEBASSEMBLY
492   int size_;
493   const std::memory_order memory_order_;
494   Zone* zone_;
495 };
496 
EmitTSANStoreOOL(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,Register value_reg,X64OperandConverter & i,StubCallMode mode,int size,std::memory_order order)497 void EmitTSANStoreOOL(Zone* zone, CodeGenerator* codegen, TurboAssembler* tasm,
498                       Operand operand, Register value_reg,
499                       X64OperandConverter& i, StubCallMode mode, int size,
500                       std::memory_order order) {
501   // The FOR_TESTING code doesn't initialize the root register. We can't call
502   // the TSAN builtin since we need to load the external reference through the
503   // root register.
504   // TODO(solanes, v8:7790, v8:11600): See if we can support the FOR_TESTING
505   // path. It is not crucial, but it would be nice to remove this restriction.
506   DCHECK_NE(codegen->code_kind(), CodeKind::FOR_TESTING);
507 
508   Register scratch0 = i.TempRegister(0);
509   auto tsan_ool = zone->New<OutOfLineTSANStore>(codegen, operand, value_reg,
510                                                 scratch0, mode, size, order);
511   tasm->jmp(tsan_ool->entry());
512   tasm->bind(tsan_ool->exit());
513 }
514 
515 template <std::memory_order order>
GetTSANValueRegister(TurboAssembler * tasm,Register value,X64OperandConverter & i,MachineRepresentation rep)516 Register GetTSANValueRegister(TurboAssembler* tasm, Register value,
517                               X64OperandConverter& i,
518                               MachineRepresentation rep) {
519   if (rep == MachineRepresentation::kSandboxedPointer) {
520     // SandboxedPointers need to be encoded.
521     Register value_reg = i.TempRegister(1);
522     tasm->movq(value_reg, value);
523     tasm->EncodeSandboxedPointer(value_reg);
524     return value_reg;
525   }
526   return value;
527 }
528 
529 template <std::memory_order order>
530 Register GetTSANValueRegister(TurboAssembler* tasm, Immediate value,
531                               X64OperandConverter& i,
532                               MachineRepresentation rep);
533 
534 template <>
GetTSANValueRegister(TurboAssembler * tasm,Immediate value,X64OperandConverter & i,MachineRepresentation rep)535 Register GetTSANValueRegister<std::memory_order_relaxed>(
536     TurboAssembler* tasm, Immediate value, X64OperandConverter& i,
537     MachineRepresentation rep) {
538   Register value_reg = i.TempRegister(1);
539   tasm->movq(value_reg, value);
540   if (rep == MachineRepresentation::kSandboxedPointer) {
541     // SandboxedPointers need to be encoded.
542     tasm->EncodeSandboxedPointer(value_reg);
543   }
544   return value_reg;
545 }
546 
547 template <std::memory_order order, typename ValueT>
EmitTSANAwareStore(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,ValueT value,X64OperandConverter & i,StubCallMode stub_call_mode,MachineRepresentation rep)548 void EmitTSANAwareStore(Zone* zone, CodeGenerator* codegen,
549                         TurboAssembler* tasm, Operand operand, ValueT value,
550                         X64OperandConverter& i, StubCallMode stub_call_mode,
551                         MachineRepresentation rep) {
552   // The FOR_TESTING code doesn't initialize the root register. We can't call
553   // the TSAN builtin since we need to load the external reference through the
554   // root register.
555   // TODO(solanes, v8:7790, v8:11600): See if we can support the FOR_TESTING
556   // path. It is not crucial, but it would be nice to remove this restriction.
557   if (codegen->code_kind() != CodeKind::FOR_TESTING) {
558     int size = ElementSizeInBytes(rep);
559     EmitMemoryProbeForTrapHandlerIfNeeded(tasm, i.TempRegister(0), operand,
560                                           stub_call_mode, size);
561     Register value_reg = GetTSANValueRegister<order>(tasm, value, i, rep);
562     EmitTSANStoreOOL(zone, codegen, tasm, operand, value_reg, i, stub_call_mode,
563                      size, order);
564   } else {
565     EmitStore<order>(tasm, operand, value, rep);
566   }
567 }
568 
569 class OutOfLineTSANRelaxedLoad final : public OutOfLineCode {
570  public:
OutOfLineTSANRelaxedLoad(CodeGenerator * gen,Operand operand,Register scratch0,StubCallMode stub_mode,int size)571   OutOfLineTSANRelaxedLoad(CodeGenerator* gen, Operand operand,
572                            Register scratch0, StubCallMode stub_mode, int size)
573       : OutOfLineCode(gen),
574         operand_(operand),
575         scratch0_(scratch0),
576 #if V8_ENABLE_WEBASSEMBLY
577         stub_mode_(stub_mode),
578 #endif  // V8_ENABLE_WEBASSEMBLY
579         size_(size),
580         zone_(gen->zone()) {
581   }
582 
Generate()583   void Generate() final {
584     const SaveFPRegsMode save_fp_mode = frame()->DidAllocateDoubleRegisters()
585                                             ? SaveFPRegsMode::kSave
586                                             : SaveFPRegsMode::kIgnore;
587     __ leaq(scratch0_, operand_);
588 
589 #if V8_ENABLE_WEBASSEMBLY
590     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
591       // A direct call to a wasm runtime stub defined in this module.
592       // Just encode the stub index. This will be patched when the code
593       // is added to the native module and copied into wasm code space.
594       __ CallTSANRelaxedLoadStub(scratch0_, save_fp_mode, size_,
595                                  StubCallMode::kCallWasmRuntimeStub);
596       return;
597     }
598 #endif  // V8_ENABLE_WEBASSEMBLY
599 
600     __ CallTSANRelaxedLoadStub(scratch0_, save_fp_mode, size_,
601                                StubCallMode::kCallBuiltinPointer);
602   }
603 
604  private:
605   Operand const operand_;
606   Register const scratch0_;
607 #if V8_ENABLE_WEBASSEMBLY
608   StubCallMode const stub_mode_;
609 #endif  // V8_ENABLE_WEBASSEMBLY
610   int size_;
611   Zone* zone_;
612 };
613 
EmitTSANRelaxedLoadOOLIfNeeded(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,X64OperandConverter & i,StubCallMode mode,int size)614 void EmitTSANRelaxedLoadOOLIfNeeded(Zone* zone, CodeGenerator* codegen,
615                                     TurboAssembler* tasm, Operand operand,
616                                     X64OperandConverter& i, StubCallMode mode,
617                                     int size) {
618   // The FOR_TESTING code doesn't initialize the root register. We can't call
619   // the TSAN builtin since we need to load the external reference through the
620   // root register.
621   // TODO(solanes, v8:7790, v8:11600): See if we can support the FOR_TESTING
622   // path. It is not crucial, but it would be nice to remove this if.
623   if (codegen->code_kind() == CodeKind::FOR_TESTING) return;
624 
625   Register scratch0 = i.TempRegister(0);
626   auto tsan_ool = zone->New<OutOfLineTSANRelaxedLoad>(codegen, operand,
627                                                       scratch0, mode, size);
628   tasm->jmp(tsan_ool->entry());
629   tasm->bind(tsan_ool->exit());
630 }
631 
632 #else
633 template <std::memory_order order, typename ValueT>
EmitTSANAwareStore(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,ValueT value,X64OperandConverter & i,StubCallMode stub_call_mode,MachineRepresentation rep)634 void EmitTSANAwareStore(Zone* zone, CodeGenerator* codegen,
635                         TurboAssembler* tasm, Operand operand, ValueT value,
636                         X64OperandConverter& i, StubCallMode stub_call_mode,
637                         MachineRepresentation rep) {
638   DCHECK(order == std::memory_order_relaxed ||
639          order == std::memory_order_seq_cst);
640   EmitStore<order>(tasm, operand, value, rep);
641 }
642 
EmitTSANRelaxedLoadOOLIfNeeded(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,X64OperandConverter & i,StubCallMode mode,int size)643 void EmitTSANRelaxedLoadOOLIfNeeded(Zone* zone, CodeGenerator* codegen,
644                                     TurboAssembler* tasm, Operand operand,
645                                     X64OperandConverter& i, StubCallMode mode,
646                                     int size) {}
647 #endif  // V8_IS_TSAN
648 
649 #if V8_ENABLE_WEBASSEMBLY
650 class WasmOutOfLineTrap : public OutOfLineCode {
651  public:
WasmOutOfLineTrap(CodeGenerator * gen,Instruction * instr)652   WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
653       : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
654 
Generate()655   void Generate() override {
656     X64OperandConverter i(gen_, instr_);
657     TrapId trap_id =
658         static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
659     GenerateWithTrapId(trap_id);
660   }
661 
662  protected:
663   CodeGenerator* gen_;
664 
GenerateWithTrapId(TrapId trap_id)665   void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
666 
667  private:
GenerateCallToTrap(TrapId trap_id)668   void GenerateCallToTrap(TrapId trap_id) {
669     if (!gen_->wasm_runtime_exception_support()) {
670       // We cannot test calls to the runtime in cctest/test-run-wasm.
671       // Therefore we emit a call to C here instead of a call to the runtime.
672       __ PrepareCallCFunction(0);
673       __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
674                        0);
675       __ LeaveFrame(StackFrame::WASM);
676       auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
677       size_t pop_size =
678           call_descriptor->ParameterSlotCount() * kSystemPointerSize;
679       // Use rcx as a scratch register, we return anyways immediately.
680       __ Ret(static_cast<int>(pop_size), rcx);
681     } else {
682       gen_->AssembleSourcePosition(instr_);
683       // A direct call to a wasm runtime stub defined in this module.
684       // Just encode the stub index. This will be patched when the code
685       // is added to the native module and copied into wasm code space.
686       __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
687       ReferenceMap* reference_map =
688           gen_->zone()->New<ReferenceMap>(gen_->zone());
689       gen_->RecordSafepoint(reference_map);
690       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
691     }
692   }
693 
694   Instruction* instr_;
695 };
696 
697 class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
698  public:
WasmProtectedInstructionTrap(CodeGenerator * gen,int pc,Instruction * instr)699   WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
700       : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
701 
Generate()702   void Generate() final {
703     DCHECK(FLAG_wasm_bounds_checks && !FLAG_wasm_enforce_bounds_checks);
704     gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
705     GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
706   }
707 
708  private:
709   int pc_;
710 };
711 
EmitOOLTrapIfNeeded(Zone * zone,CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,int pc)712 void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
713                          InstructionCode opcode, Instruction* instr,
714                          int pc) {
715   const MemoryAccessMode access_mode = instr->memory_access_mode();
716   if (access_mode == kMemoryAccessProtected) {
717     zone->New<WasmProtectedInstructionTrap>(codegen, pc, instr);
718   }
719 }
720 
721 #else
722 
EmitOOLTrapIfNeeded(Zone * zone,CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,int pc)723 void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
724                          InstructionCode opcode, Instruction* instr, int pc) {
725   DCHECK_NE(kMemoryAccessProtected, instr->memory_access_mode());
726 }
727 
728 #endif  // V8_ENABLE_WEBASSEMBLY
729 
730 }  // namespace
731 
732 #define ASSEMBLE_UNOP(asm_instr)         \
733   do {                                   \
734     if (instr->Output()->IsRegister()) { \
735       __ asm_instr(i.OutputRegister());  \
736     } else {                             \
737       __ asm_instr(i.OutputOperand());   \
738     }                                    \
739   } while (false)
740 
741 #define ASSEMBLE_BINOP(asm_instr)                                \
742   do {                                                           \
743     if (HasAddressingMode(instr)) {                              \
744       size_t index = 1;                                          \
745       Operand right = i.MemoryOperand(&index);                   \
746       __ asm_instr(i.InputRegister(0), right);                   \
747     } else {                                                     \
748       if (HasImmediateInput(instr, 1)) {                         \
749         if (HasRegisterInput(instr, 0)) {                        \
750           __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
751         } else {                                                 \
752           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));  \
753         }                                                        \
754       } else {                                                   \
755         if (HasRegisterInput(instr, 1)) {                        \
756           __ asm_instr(i.InputRegister(0), i.InputRegister(1));  \
757         } else {                                                 \
758           __ asm_instr(i.InputRegister(0), i.InputOperand(1));   \
759         }                                                        \
760       }                                                          \
761     }                                                            \
762   } while (false)
763 
764 #define ASSEMBLE_COMPARE(asm_instr)                              \
765   do {                                                           \
766     if (HasAddressingMode(instr)) {                              \
767       size_t index = 0;                                          \
768       Operand left = i.MemoryOperand(&index);                    \
769       if (HasImmediateInput(instr, index)) {                     \
770         __ asm_instr(left, i.InputImmediate(index));             \
771       } else {                                                   \
772         __ asm_instr(left, i.InputRegister(index));              \
773       }                                                          \
774     } else {                                                     \
775       if (HasImmediateInput(instr, 1)) {                         \
776         if (HasRegisterInput(instr, 0)) {                        \
777           __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
778         } else {                                                 \
779           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));  \
780         }                                                        \
781       } else {                                                   \
782         if (HasRegisterInput(instr, 1)) {                        \
783           __ asm_instr(i.InputRegister(0), i.InputRegister(1));  \
784         } else {                                                 \
785           __ asm_instr(i.InputRegister(0), i.InputOperand(1));   \
786         }                                                        \
787       }                                                          \
788     }                                                            \
789   } while (false)
790 
791 #define ASSEMBLE_MULT(asm_instr)                              \
792   do {                                                        \
793     if (HasImmediateInput(instr, 1)) {                        \
794       if (HasRegisterInput(instr, 0)) {                       \
795         __ asm_instr(i.OutputRegister(), i.InputRegister(0),  \
796                      i.InputImmediate(1));                    \
797       } else {                                                \
798         __ asm_instr(i.OutputRegister(), i.InputOperand(0),   \
799                      i.InputImmediate(1));                    \
800       }                                                       \
801     } else {                                                  \
802       if (HasRegisterInput(instr, 1)) {                       \
803         __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
804       } else {                                                \
805         __ asm_instr(i.OutputRegister(), i.InputOperand(1));  \
806       }                                                       \
807     }                                                         \
808   } while (false)
809 
810 #define ASSEMBLE_SHIFT(asm_instr, width)                                   \
811   do {                                                                     \
812     if (HasImmediateInput(instr, 1)) {                                     \
813       if (instr->Output()->IsRegister()) {                                 \
814         __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
815       } else {                                                             \
816         __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1)));  \
817       }                                                                    \
818     } else {                                                               \
819       if (instr->Output()->IsRegister()) {                                 \
820         __ asm_instr##_cl(i.OutputRegister());                             \
821       } else {                                                             \
822         __ asm_instr##_cl(i.OutputOperand());                              \
823       }                                                                    \
824     }                                                                      \
825   } while (false)
826 
827 #define ASSEMBLE_MOVX(asm_instr)                            \
828   do {                                                      \
829     if (HasAddressingMode(instr)) {                         \
830       __ asm_instr(i.OutputRegister(), i.MemoryOperand());  \
831     } else if (HasRegisterInput(instr, 0)) {                \
832       __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
833     } else {                                                \
834       __ asm_instr(i.OutputRegister(), i.InputOperand(0));  \
835     }                                                       \
836   } while (false)
837 
838 #define ASSEMBLE_SSE_BINOP(asm_instr)                                     \
839   do {                                                                    \
840     if (HasAddressingMode(instr)) {                                       \
841       size_t index = 1;                                                   \
842       Operand right = i.MemoryOperand(&index);                            \
843       __ asm_instr(i.InputDoubleRegister(0), right);                      \
844     } else {                                                              \
845       if (instr->InputAt(1)->IsFPRegister()) {                            \
846         __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
847       } else {                                                            \
848         __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1));        \
849       }                                                                   \
850     }                                                                     \
851   } while (false)
852 
853 #define ASSEMBLE_SSE_UNOP(asm_instr)                                    \
854   do {                                                                  \
855     if (instr->InputAt(0)->IsFPRegister()) {                            \
856       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
857     } else {                                                            \
858       __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0));        \
859     }                                                                   \
860   } while (false)
861 
862 #define ASSEMBLE_AVX_BINOP(asm_instr)                                          \
863   do {                                                                         \
864     CpuFeatureScope avx_scope(tasm(), AVX);                                    \
865     if (HasAddressingMode(instr)) {                                            \
866       size_t index = 1;                                                        \
867       Operand right = i.MemoryOperand(&index);                                 \
868       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), right); \
869     } else {                                                                   \
870       if (instr->InputAt(1)->IsFPRegister()) {                                 \
871         __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0),       \
872                      i.InputDoubleRegister(1));                                \
873       } else {                                                                 \
874         __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0),       \
875                      i.InputOperand(1));                                       \
876       }                                                                        \
877     }                                                                          \
878   } while (false)
879 
880 #define ASSEMBLE_IEEE754_BINOP(name)                                     \
881   do {                                                                   \
882     __ PrepareCallCFunction(2);                                          \
883     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
884   } while (false)
885 
886 #define ASSEMBLE_IEEE754_UNOP(name)                                      \
887   do {                                                                   \
888     __ PrepareCallCFunction(1);                                          \
889     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
890   } while (false)
891 
892 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
893   do {                                                          \
894     Label binop;                                                \
895     __ bind(&binop);                                            \
896     __ mov_inst(rax, i.MemoryOperand(1));                       \
897     __ movl(i.TempRegister(0), rax);                            \
898     __ bin_inst(i.TempRegister(0), i.InputRegister(0));         \
899     __ lock();                                                  \
900     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));     \
901     __ j(not_equal, &binop);                                    \
902   } while (false)
903 
904 #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
905   do {                                                            \
906     Label binop;                                                  \
907     __ bind(&binop);                                              \
908     __ mov_inst(rax, i.MemoryOperand(1));                         \
909     __ movq(i.TempRegister(0), rax);                              \
910     __ bin_inst(i.TempRegister(0), i.InputRegister(0));           \
911     __ lock();                                                    \
912     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));       \
913     __ j(not_equal, &binop);                                      \
914   } while (false)
915 
916 // Handles both SSE and AVX codegen. For SSE we use DefineSameAsFirst, so the
917 // dst and first src will be the same. For AVX we don't restrict it that way, so
918 // we will omit unnecessary moves.
919 #define ASSEMBLE_SIMD_BINOP(opcode)                                      \
920   do {                                                                   \
921     if (CpuFeatures::IsSupported(AVX)) {                                 \
922       CpuFeatureScope avx_scope(tasm(), AVX);                            \
923       __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
924                    i.InputSimd128Register(1));                           \
925     } else {                                                             \
926       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));   \
927       __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1));   \
928     }                                                                    \
929   } while (false)
930 
931 #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index)      \
932   do {                                                       \
933     if (instr->InputAt(index)->IsSimd128Register()) {        \
934       __ opcode(dst_operand, i.InputSimd128Register(index)); \
935     } else {                                                 \
936       __ opcode(dst_operand, i.InputOperand(index));         \
937     }                                                        \
938   } while (false)
939 
940 #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm)  \
941   do {                                                            \
942     if (instr->InputAt(index)->IsSimd128Register()) {             \
943       __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
944     } else {                                                      \
945       __ opcode(dst_operand, i.InputOperand(index), imm);         \
946     }                                                             \
947   } while (false)
948 
949 #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode)                    \
950   do {                                                          \
951     XMMRegister dst = i.OutputSimd128Register();                \
952     byte input_index = instr->InputCount() == 2 ? 1 : 0;        \
953     if (CpuFeatures::IsSupported(AVX)) {                        \
954       CpuFeatureScope avx_scope(tasm(), AVX);                   \
955       DCHECK(instr->InputAt(input_index)->IsSimd128Register()); \
956       __ v##opcode(dst, i.InputSimd128Register(0),              \
957                    i.InputSimd128Register(input_index));        \
958     } else {                                                    \
959       DCHECK_EQ(dst, i.InputSimd128Register(0));                \
960       ASSEMBLE_SIMD_INSTR(opcode, dst, input_index);            \
961     }                                                           \
962   } while (false)
963 
964 #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm)                \
965   do {                                                        \
966     XMMRegister dst = i.OutputSimd128Register();              \
967     XMMRegister src = i.InputSimd128Register(0);              \
968     if (CpuFeatures::IsSupported(AVX)) {                      \
969       CpuFeatureScope avx_scope(tasm(), AVX);                 \
970       DCHECK(instr->InputAt(1)->IsSimd128Register());         \
971       __ v##opcode(dst, src, i.InputSimd128Register(1), imm); \
972     } else {                                                  \
973       DCHECK_EQ(dst, src);                                    \
974       if (instr->InputAt(1)->IsSimd128Register()) {           \
975         __ opcode(dst, i.InputSimd128Register(1), imm);       \
976       } else {                                                \
977         __ opcode(dst, i.InputOperand(1), imm);               \
978       }                                                       \
979     }                                                         \
980   } while (false)
981 
982 #define ASSEMBLE_SIMD_ALL_TRUE(opcode)                       \
983   do {                                                       \
984     Register dst = i.OutputRegister();                       \
985     __ xorq(dst, dst);                                       \
986     __ Pxor(kScratchDoubleReg, kScratchDoubleReg);           \
987     __ opcode(kScratchDoubleReg, i.InputSimd128Register(0)); \
988     __ Ptest(kScratchDoubleReg, kScratchDoubleReg);          \
989     __ setcc(equal, dst);                                    \
990   } while (false)
991 
992 // This macro will directly emit the opcode if the shift is an immediate - the
993 // shift value will be taken modulo 2^width. Otherwise, it will emit code to
994 // perform the modulus operation.
995 #define ASSEMBLE_SIMD_SHIFT(opcode, width)                               \
996   do {                                                                   \
997     XMMRegister dst = i.OutputSimd128Register();                         \
998     if (HasImmediateInput(instr, 1)) {                                   \
999       if (CpuFeatures::IsSupported(AVX)) {                               \
1000         CpuFeatureScope avx_scope(tasm(), AVX);                          \
1001         __ v##opcode(dst, i.InputSimd128Register(0),                     \
1002                      byte{i.InputInt##width(1)});                        \
1003       } else {                                                           \
1004         DCHECK_EQ(dst, i.InputSimd128Register(0));                       \
1005         __ opcode(dst, byte{i.InputInt##width(1)});                      \
1006       }                                                                  \
1007     } else {                                                             \
1008       constexpr int mask = (1 << width) - 1;                             \
1009       __ movq(kScratchRegister, i.InputRegister(1));                     \
1010       __ andq(kScratchRegister, Immediate(mask));                        \
1011       __ Movq(kScratchDoubleReg, kScratchRegister);                      \
1012       if (CpuFeatures::IsSupported(AVX)) {                               \
1013         CpuFeatureScope avx_scope(tasm(), AVX);                          \
1014         __ v##opcode(dst, i.InputSimd128Register(0), kScratchDoubleReg); \
1015       } else {                                                           \
1016         DCHECK_EQ(dst, i.InputSimd128Register(0));                       \
1017         __ opcode(dst, kScratchDoubleReg);                               \
1018       }                                                                  \
1019     }                                                                    \
1020   } while (false)
1021 
1022 #define ASSEMBLE_PINSR(ASM_INSTR)                                        \
1023   do {                                                                   \
1024     XMMRegister dst = i.OutputSimd128Register();                         \
1025     XMMRegister src = i.InputSimd128Register(0);                         \
1026     uint8_t laneidx = i.InputUint8(1);                                   \
1027     uint32_t load_offset;                                                \
1028     if (HasAddressingMode(instr)) {                                      \
1029       __ ASM_INSTR(dst, src, i.MemoryOperand(2), laneidx, &load_offset); \
1030     } else if (instr->InputAt(2)->IsFPRegister()) {                      \
1031       __ Movq(kScratchRegister, i.InputDoubleRegister(2));               \
1032       __ ASM_INSTR(dst, src, kScratchRegister, laneidx, &load_offset);   \
1033     } else if (instr->InputAt(2)->IsRegister()) {                        \
1034       __ ASM_INSTR(dst, src, i.InputRegister(2), laneidx, &load_offset); \
1035     } else {                                                             \
1036       __ ASM_INSTR(dst, src, i.InputOperand(2), laneidx, &load_offset);  \
1037     }                                                                    \
1038     EmitOOLTrapIfNeeded(zone(), this, opcode, instr, load_offset);       \
1039   } while (false)
1040 
1041 #define ASSEMBLE_SEQ_CST_STORE(rep)                                       \
1042   do {                                                                    \
1043     Register value = i.InputRegister(0);                                  \
1044     Operand operand = i.MemoryOperand(1);                                 \
1045     EmitTSANAwareStore<std::memory_order_seq_cst>(                        \
1046         zone(), this, tasm(), operand, value, i, DetermineStubCallMode(), \
1047         rep);                                                             \
1048   } while (false)
1049 
AssembleDeconstructFrame()1050 void CodeGenerator::AssembleDeconstructFrame() {
1051   unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
1052   __ movq(rsp, rbp);
1053   __ popq(rbp);
1054 }
1055 
AssemblePrepareTailCall()1056 void CodeGenerator::AssemblePrepareTailCall() {
1057   if (frame_access_state()->has_frame()) {
1058     __ movq(rbp, MemOperand(rbp, 0));
1059   }
1060   frame_access_state()->SetFrameAccessToSP();
1061 }
1062 
1063 namespace {
1064 
AdjustStackPointerForTailCall(Instruction * instr,TurboAssembler * assembler,Linkage * linkage,OptimizedCompilationInfo * info,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)1065 void AdjustStackPointerForTailCall(Instruction* instr,
1066                                    TurboAssembler* assembler, Linkage* linkage,
1067                                    OptimizedCompilationInfo* info,
1068                                    FrameAccessState* state,
1069                                    int new_slot_above_sp,
1070                                    bool allow_shrinkage = true) {
1071   int stack_slot_delta;
1072   if (instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp)) {
1073     // For this special tail-call mode, the callee has the same arguments and
1074     // linkage as the caller, and arguments adapter frames must be preserved.
1075     // Thus we simply have reset the stack pointer register to its original
1076     // value before frame construction.
1077     // See also: AssembleConstructFrame.
1078     DCHECK(!info->is_osr());
1079     DCHECK(linkage->GetIncomingDescriptor()->CalleeSavedRegisters().is_empty());
1080     DCHECK(
1081         linkage->GetIncomingDescriptor()->CalleeSavedFPRegisters().is_empty());
1082     DCHECK_EQ(state->frame()->GetReturnSlotCount(), 0);
1083     stack_slot_delta = (state->frame()->GetTotalFrameSlotCount() -
1084                         kReturnAddressStackSlotCount) *
1085                        -1;
1086     DCHECK_LE(stack_slot_delta, 0);
1087   } else {
1088     int current_sp_offset = state->GetSPToFPSlotCount() +
1089                             StandardFrameConstants::kFixedSlotCountAboveFp;
1090     stack_slot_delta = new_slot_above_sp - current_sp_offset;
1091   }
1092 
1093   if (stack_slot_delta > 0) {
1094     assembler->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
1095     state->IncreaseSPDelta(stack_slot_delta);
1096   } else if (allow_shrinkage && stack_slot_delta < 0) {
1097     assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
1098     state->IncreaseSPDelta(stack_slot_delta);
1099   }
1100 }
1101 
SetupSimdImmediateInRegister(TurboAssembler * assembler,uint32_t * imms,XMMRegister reg)1102 void SetupSimdImmediateInRegister(TurboAssembler* assembler, uint32_t* imms,
1103                                   XMMRegister reg) {
1104   assembler->Move(reg, make_uint64(imms[3], imms[2]),
1105                   make_uint64(imms[1], imms[0]));
1106 }
1107 
1108 }  // namespace
1109 
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_slot_offset)1110 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
1111                                               int first_unused_slot_offset) {
1112   CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
1113   ZoneVector<MoveOperands*> pushes(zone());
1114   GetPushCompatibleMoves(instr, flags, &pushes);
1115 
1116   if (!pushes.empty() &&
1117       (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
1118        first_unused_slot_offset)) {
1119     DCHECK(!instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp));
1120     X64OperandConverter g(this, instr);
1121     for (auto move : pushes) {
1122       LocationOperand destination_location(
1123           LocationOperand::cast(move->destination()));
1124       InstructionOperand source(move->source());
1125       AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
1126                                     frame_access_state(),
1127                                     destination_location.index());
1128       if (source.IsStackSlot()) {
1129         LocationOperand source_location(LocationOperand::cast(source));
1130         __ Push(g.SlotToOperand(source_location.index()));
1131       } else if (source.IsRegister()) {
1132         LocationOperand source_location(LocationOperand::cast(source));
1133         __ Push(source_location.GetRegister());
1134       } else if (source.IsImmediate()) {
1135         __ Push(Immediate(ImmediateOperand::cast(source).inline_int32_value()));
1136       } else {
1137         // Pushes of non-scalar data types is not supported.
1138         UNIMPLEMENTED();
1139       }
1140       frame_access_state()->IncreaseSPDelta(1);
1141       move->Eliminate();
1142     }
1143   }
1144   AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
1145                                 frame_access_state(), first_unused_slot_offset,
1146                                 false);
1147 }
1148 
AssembleTailCallAfterGap(Instruction * instr,int first_unused_slot_offset)1149 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
1150                                              int first_unused_slot_offset) {
1151   AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
1152                                 frame_access_state(), first_unused_slot_offset);
1153 }
1154 
1155 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()1156 void CodeGenerator::AssembleCodeStartRegisterCheck() {
1157   __ ComputeCodeStartAddress(rbx);
1158   __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
1159   __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
1160 }
1161 
BailoutIfDeoptimized()1162 void CodeGenerator::BailoutIfDeoptimized() { __ BailoutIfDeoptimized(rbx); }
1163 
ShouldClearOutputRegisterBeforeInstruction(CodeGenerator * g,Instruction * instr)1164 bool ShouldClearOutputRegisterBeforeInstruction(CodeGenerator* g,
1165                                                 Instruction* instr) {
1166   X64OperandConverter i(g, instr);
1167   FlagsMode mode = FlagsModeField::decode(instr->opcode());
1168   if (mode == kFlags_set) {
1169     FlagsCondition condition = FlagsConditionField::decode(instr->opcode());
1170     if (condition != kUnorderedEqual && condition != kUnorderedNotEqual) {
1171       Register reg = i.OutputRegister(instr->OutputCount() - 1);
1172       // Do not clear output register when it is also input register.
1173       for (size_t index = 0; index < instr->InputCount(); ++index) {
1174         if (HasRegisterInput(instr, index) && reg == i.InputRegister(index))
1175           return false;
1176       }
1177       return true;
1178     }
1179   }
1180   return false;
1181 }
1182 
1183 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)1184 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
1185     Instruction* instr) {
1186   X64OperandConverter i(this, instr);
1187   InstructionCode opcode = instr->opcode();
1188   ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
1189   if (ShouldClearOutputRegisterBeforeInstruction(this, instr)) {
1190     // Transform setcc + movzxbl into xorl + setcc to avoid register stall and
1191     // encode one byte shorter.
1192     Register reg = i.OutputRegister(instr->OutputCount() - 1);
1193     __ xorl(reg, reg);
1194   }
1195   switch (arch_opcode) {
1196     case kArchCallCodeObject: {
1197       if (HasImmediateInput(instr, 0)) {
1198         Handle<CodeT> code = i.InputCode(0);
1199         __ Call(code, RelocInfo::CODE_TARGET);
1200       } else {
1201         Register reg = i.InputRegister(0);
1202         DCHECK_IMPLIES(
1203             instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
1204             reg == kJavaScriptCallCodeStartRegister);
1205         __ LoadCodeObjectEntry(reg, reg);
1206         __ call(reg);
1207       }
1208       RecordCallPosition(instr);
1209       frame_access_state()->ClearSPDelta();
1210       break;
1211     }
1212     case kArchCallBuiltinPointer: {
1213       DCHECK(!HasImmediateInput(instr, 0));
1214       Register builtin_index = i.InputRegister(0);
1215       __ CallBuiltinByIndex(builtin_index);
1216       RecordCallPosition(instr);
1217       frame_access_state()->ClearSPDelta();
1218       break;
1219     }
1220 #if V8_ENABLE_WEBASSEMBLY
1221     case kArchCallWasmFunction: {
1222       if (HasImmediateInput(instr, 0)) {
1223         Constant constant = i.ToConstant(instr->InputAt(0));
1224         Address wasm_code = static_cast<Address>(constant.ToInt64());
1225         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
1226           __ near_call(wasm_code, constant.rmode());
1227         } else {
1228           __ Call(wasm_code, constant.rmode());
1229         }
1230       } else {
1231         __ call(i.InputRegister(0));
1232       }
1233       RecordCallPosition(instr);
1234       frame_access_state()->ClearSPDelta();
1235       break;
1236     }
1237     case kArchTailCallWasm: {
1238       if (HasImmediateInput(instr, 0)) {
1239         Constant constant = i.ToConstant(instr->InputAt(0));
1240         Address wasm_code = static_cast<Address>(constant.ToInt64());
1241         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
1242           __ near_jmp(wasm_code, constant.rmode());
1243         } else {
1244           __ Move(kScratchRegister, wasm_code, constant.rmode());
1245           __ jmp(kScratchRegister);
1246         }
1247       } else {
1248         __ jmp(i.InputRegister(0));
1249       }
1250       unwinding_info_writer_.MarkBlockWillExit();
1251       frame_access_state()->ClearSPDelta();
1252       frame_access_state()->SetFrameAccessToDefault();
1253       break;
1254     }
1255 #endif  // V8_ENABLE_WEBASSEMBLY
1256     case kArchTailCallCodeObject: {
1257       if (HasImmediateInput(instr, 0)) {
1258         Handle<CodeT> code = i.InputCode(0);
1259         __ Jump(code, RelocInfo::CODE_TARGET);
1260       } else {
1261         Register reg = i.InputRegister(0);
1262         DCHECK_IMPLIES(
1263             instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
1264             reg == kJavaScriptCallCodeStartRegister);
1265         __ LoadCodeObjectEntry(reg, reg);
1266         __ jmp(reg);
1267       }
1268       unwinding_info_writer_.MarkBlockWillExit();
1269       frame_access_state()->ClearSPDelta();
1270       frame_access_state()->SetFrameAccessToDefault();
1271       break;
1272     }
1273     case kArchTailCallAddress: {
1274       CHECK(!HasImmediateInput(instr, 0));
1275       Register reg = i.InputRegister(0);
1276       DCHECK_IMPLIES(
1277           instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
1278           reg == kJavaScriptCallCodeStartRegister);
1279       __ jmp(reg);
1280       unwinding_info_writer_.MarkBlockWillExit();
1281       frame_access_state()->ClearSPDelta();
1282       frame_access_state()->SetFrameAccessToDefault();
1283       break;
1284     }
1285     case kArchCallJSFunction: {
1286       Register func = i.InputRegister(0);
1287       if (FLAG_debug_code) {
1288         // Check the function's context matches the context argument.
1289         __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
1290         __ Assert(equal, AbortReason::kWrongFunctionContext);
1291       }
1292       static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
1293       __ LoadTaggedPointerField(rcx,
1294                                 FieldOperand(func, JSFunction::kCodeOffset));
1295       __ CallCodeTObject(rcx);
1296       frame_access_state()->ClearSPDelta();
1297       RecordCallPosition(instr);
1298       break;
1299     }
1300     case kArchPrepareCallCFunction: {
1301       // Frame alignment requires using FP-relative frame addressing.
1302       frame_access_state()->SetFrameAccessToFP();
1303       int const num_parameters = MiscField::decode(instr->opcode());
1304       __ PrepareCallCFunction(num_parameters);
1305       break;
1306     }
1307     case kArchSaveCallerRegisters: {
1308       fp_mode_ =
1309           static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
1310       DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
1311              fp_mode_ == SaveFPRegsMode::kSave);
1312       // kReturnRegister0 should have been saved before entering the stub.
1313       int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
1314       DCHECK(IsAligned(bytes, kSystemPointerSize));
1315       DCHECK_EQ(0, frame_access_state()->sp_delta());
1316       frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
1317       DCHECK(!caller_registers_saved_);
1318       caller_registers_saved_ = true;
1319       break;
1320     }
1321     case kArchRestoreCallerRegisters: {
1322       DCHECK(fp_mode_ ==
1323              static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
1324       DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
1325              fp_mode_ == SaveFPRegsMode::kSave);
1326       // Don't overwrite the returned value.
1327       int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
1328       frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
1329       DCHECK_EQ(0, frame_access_state()->sp_delta());
1330       DCHECK(caller_registers_saved_);
1331       caller_registers_saved_ = false;
1332       break;
1333     }
1334     case kArchPrepareTailCall:
1335       AssemblePrepareTailCall();
1336       break;
1337     case kArchCallCFunction: {
1338       int const num_gp_parameters = ParamField::decode(instr->opcode());
1339       int const num_fp_parameters = FPParamField::decode(instr->opcode());
1340       Label return_location;
1341 #if V8_ENABLE_WEBASSEMBLY
1342       if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
1343         // Put the return address in a stack slot.
1344         __ leaq(kScratchRegister, Operand(&return_location, 0));
1345         __ movq(MemOperand(rbp, WasmExitFrameConstants::kCallingPCOffset),
1346                 kScratchRegister);
1347       }
1348 #endif  // V8_ENABLE_WEBASSEMBLY
1349       if (HasImmediateInput(instr, 0)) {
1350         ExternalReference ref = i.InputExternalReference(0);
1351         __ CallCFunction(ref, num_gp_parameters + num_fp_parameters);
1352       } else {
1353         Register func = i.InputRegister(0);
1354         __ CallCFunction(func, num_gp_parameters + num_fp_parameters);
1355       }
1356       __ bind(&return_location);
1357 #if V8_ENABLE_WEBASSEMBLY
1358       if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
1359         RecordSafepoint(instr->reference_map());
1360       }
1361 #endif  // V8_ENABLE_WEBASSEMBLY
1362       frame_access_state()->SetFrameAccessToDefault();
1363       // Ideally, we should decrement SP delta to match the change of stack
1364       // pointer in CallCFunction. However, for certain architectures (e.g.
1365       // ARM), there may be more strict alignment requirement, causing old SP
1366       // to be saved on the stack. In those cases, we can not calculate the SP
1367       // delta statically.
1368       frame_access_state()->ClearSPDelta();
1369       if (caller_registers_saved_) {
1370         // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
1371         // Here, we assume the sequence to be:
1372         //   kArchSaveCallerRegisters;
1373         //   kArchCallCFunction;
1374         //   kArchRestoreCallerRegisters;
1375         int bytes =
1376             __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
1377         frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
1378       }
1379       // TODO(turbofan): Do we need an lfence here?
1380       break;
1381     }
1382     case kArchJmp:
1383       AssembleArchJump(i.InputRpo(0));
1384       break;
1385     case kArchBinarySearchSwitch:
1386       AssembleArchBinarySearchSwitch(instr);
1387       break;
1388     case kArchTableSwitch:
1389       AssembleArchTableSwitch(instr);
1390       break;
1391     case kArchComment:
1392       __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
1393       break;
1394     case kArchAbortCSADcheck:
1395       DCHECK(i.InputRegister(0) == rdx);
1396       {
1397         // We don't actually want to generate a pile of code for this, so just
1398         // claim there is a stack frame, without generating one.
1399         FrameScope scope(tasm(), StackFrame::NO_FRAME_TYPE);
1400         __ Call(BUILTIN_CODE(isolate(), AbortCSADcheck),
1401                 RelocInfo::CODE_TARGET);
1402       }
1403       __ int3();
1404       unwinding_info_writer_.MarkBlockWillExit();
1405       break;
1406     case kArchDebugBreak:
1407       __ DebugBreak();
1408       break;
1409     case kArchThrowTerminator:
1410       unwinding_info_writer_.MarkBlockWillExit();
1411       break;
1412     case kArchNop:
1413       // don't emit code for nops.
1414       break;
1415     case kArchDeoptimize: {
1416       DeoptimizationExit* exit =
1417           BuildTranslation(instr, -1, 0, 0, OutputFrameStateCombine::Ignore());
1418       __ jmp(exit->label());
1419       break;
1420     }
1421     case kArchRet:
1422       AssembleReturn(instr->InputAt(0));
1423       break;
1424     case kArchFramePointer:
1425       __ movq(i.OutputRegister(), rbp);
1426       break;
1427     case kArchParentFramePointer:
1428       if (frame_access_state()->has_frame()) {
1429         __ movq(i.OutputRegister(), Operand(rbp, 0));
1430       } else {
1431         __ movq(i.OutputRegister(), rbp);
1432       }
1433       break;
1434     case kArchStackPointerGreaterThan: {
1435       // Potentially apply an offset to the current stack pointer before the
1436       // comparison to consider the size difference of an optimized frame versus
1437       // the contained unoptimized frames.
1438 
1439       Register lhs_register = rsp;
1440       uint32_t offset;
1441 
1442       if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
1443         lhs_register = kScratchRegister;
1444         __ leaq(lhs_register, Operand(rsp, static_cast<int32_t>(offset) * -1));
1445       }
1446 
1447       constexpr size_t kValueIndex = 0;
1448       if (HasAddressingMode(instr)) {
1449         __ cmpq(lhs_register, i.MemoryOperand(kValueIndex));
1450       } else {
1451         __ cmpq(lhs_register, i.InputRegister(kValueIndex));
1452       }
1453       break;
1454     }
1455     case kArchStackCheckOffset:
1456       __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
1457       break;
1458     case kArchTruncateDoubleToI: {
1459       auto result = i.OutputRegister();
1460       auto input = i.InputDoubleRegister(0);
1461       auto ool = zone()->New<OutOfLineTruncateDoubleToI>(
1462           this, result, input, DetermineStubCallMode(),
1463           &unwinding_info_writer_);
1464       // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
1465       // use of Cvttsd2siq requires the movl below to avoid sign extension.
1466       __ Cvttsd2siq(result, input);
1467       __ cmpq(result, Immediate(1));
1468       __ j(overflow, ool->entry());
1469       __ bind(ool->exit());
1470       __ movl(result, result);
1471       break;
1472     }
1473     case kArchStoreWithWriteBarrier:  // Fall through.
1474     case kArchAtomicStoreWithWriteBarrier: {
1475       RecordWriteMode mode =
1476           static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
1477       Register object = i.InputRegister(0);
1478       size_t index = 0;
1479       Operand operand = i.MemoryOperand(&index);
1480       Register value = i.InputRegister(index);
1481       Register scratch0 = i.TempRegister(0);
1482       Register scratch1 = i.TempRegister(1);
1483 
1484       if (FLAG_debug_code) {
1485         // Checking that |value| is not a cleared weakref: our write barrier
1486         // does not support that for now.
1487         __ Cmp(value, kClearedWeakHeapObjectLower32);
1488         __ Check(not_equal, AbortReason::kOperandIsCleared);
1489       }
1490 
1491       auto ool = zone()->New<OutOfLineRecordWrite>(this, object, operand, value,
1492                                                    scratch0, scratch1, mode,
1493                                                    DetermineStubCallMode());
1494       if (arch_opcode == kArchStoreWithWriteBarrier) {
1495         EmitTSANAwareStore<std::memory_order_relaxed>(
1496             zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
1497             MachineRepresentation::kTagged);
1498       } else {
1499         DCHECK_EQ(arch_opcode, kArchAtomicStoreWithWriteBarrier);
1500         EmitTSANAwareStore<std::memory_order_seq_cst>(
1501             zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
1502             MachineRepresentation::kTagged);
1503       }
1504       if (mode > RecordWriteMode::kValueIsPointer) {
1505         __ JumpIfSmi(value, ool->exit());
1506       }
1507       __ CheckPageFlag(object, scratch0,
1508                        MemoryChunk::kPointersFromHereAreInterestingMask,
1509                        not_zero, ool->entry());
1510       __ bind(ool->exit());
1511       break;
1512     }
1513     case kX64MFence:
1514       __ mfence();
1515       break;
1516     case kX64LFence:
1517       __ lfence();
1518       break;
1519     case kArchStackSlot: {
1520       FrameOffset offset =
1521           frame_access_state()->GetFrameOffset(i.InputInt32(0));
1522       Register base = offset.from_stack_pointer() ? rsp : rbp;
1523       __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
1524       break;
1525     }
1526     case kIeee754Float64Acos:
1527       ASSEMBLE_IEEE754_UNOP(acos);
1528       break;
1529     case kIeee754Float64Acosh:
1530       ASSEMBLE_IEEE754_UNOP(acosh);
1531       break;
1532     case kIeee754Float64Asin:
1533       ASSEMBLE_IEEE754_UNOP(asin);
1534       break;
1535     case kIeee754Float64Asinh:
1536       ASSEMBLE_IEEE754_UNOP(asinh);
1537       break;
1538     case kIeee754Float64Atan:
1539       ASSEMBLE_IEEE754_UNOP(atan);
1540       break;
1541     case kIeee754Float64Atanh:
1542       ASSEMBLE_IEEE754_UNOP(atanh);
1543       break;
1544     case kIeee754Float64Atan2:
1545       ASSEMBLE_IEEE754_BINOP(atan2);
1546       break;
1547     case kIeee754Float64Cbrt:
1548       ASSEMBLE_IEEE754_UNOP(cbrt);
1549       break;
1550     case kIeee754Float64Cos:
1551       ASSEMBLE_IEEE754_UNOP(cos);
1552       break;
1553     case kIeee754Float64Cosh:
1554       ASSEMBLE_IEEE754_UNOP(cosh);
1555       break;
1556     case kIeee754Float64Exp:
1557       ASSEMBLE_IEEE754_UNOP(exp);
1558       break;
1559     case kIeee754Float64Expm1:
1560       ASSEMBLE_IEEE754_UNOP(expm1);
1561       break;
1562     case kIeee754Float64Log:
1563       ASSEMBLE_IEEE754_UNOP(log);
1564       break;
1565     case kIeee754Float64Log1p:
1566       ASSEMBLE_IEEE754_UNOP(log1p);
1567       break;
1568     case kIeee754Float64Log2:
1569       ASSEMBLE_IEEE754_UNOP(log2);
1570       break;
1571     case kIeee754Float64Log10:
1572       ASSEMBLE_IEEE754_UNOP(log10);
1573       break;
1574     case kIeee754Float64Pow:
1575       ASSEMBLE_IEEE754_BINOP(pow);
1576       break;
1577     case kIeee754Float64Sin:
1578       ASSEMBLE_IEEE754_UNOP(sin);
1579       break;
1580     case kIeee754Float64Sinh:
1581       ASSEMBLE_IEEE754_UNOP(sinh);
1582       break;
1583     case kIeee754Float64Tan:
1584       ASSEMBLE_IEEE754_UNOP(tan);
1585       break;
1586     case kIeee754Float64Tanh:
1587       ASSEMBLE_IEEE754_UNOP(tanh);
1588       break;
1589     case kX64Add32:
1590       ASSEMBLE_BINOP(addl);
1591       break;
1592     case kX64Add:
1593       ASSEMBLE_BINOP(addq);
1594       break;
1595     case kX64Sub32:
1596       ASSEMBLE_BINOP(subl);
1597       break;
1598     case kX64Sub:
1599       ASSEMBLE_BINOP(subq);
1600       break;
1601     case kX64And32:
1602       ASSEMBLE_BINOP(andl);
1603       break;
1604     case kX64And:
1605       ASSEMBLE_BINOP(andq);
1606       break;
1607     case kX64Cmp8:
1608       ASSEMBLE_COMPARE(cmpb);
1609       break;
1610     case kX64Cmp16:
1611       ASSEMBLE_COMPARE(cmpw);
1612       break;
1613     case kX64Cmp32:
1614       ASSEMBLE_COMPARE(cmpl);
1615       break;
1616     case kX64Cmp:
1617       ASSEMBLE_COMPARE(cmpq);
1618       break;
1619     case kX64Test8:
1620       ASSEMBLE_COMPARE(testb);
1621       break;
1622     case kX64Test16:
1623       ASSEMBLE_COMPARE(testw);
1624       break;
1625     case kX64Test32:
1626       ASSEMBLE_COMPARE(testl);
1627       break;
1628     case kX64Test:
1629       ASSEMBLE_COMPARE(testq);
1630       break;
1631     case kX64Imul32:
1632       ASSEMBLE_MULT(imull);
1633       break;
1634     case kX64Imul:
1635       ASSEMBLE_MULT(imulq);
1636       break;
1637     case kX64ImulHigh32:
1638       if (HasRegisterInput(instr, 1)) {
1639         __ imull(i.InputRegister(1));
1640       } else {
1641         __ imull(i.InputOperand(1));
1642       }
1643       break;
1644     case kX64UmulHigh32:
1645       if (HasRegisterInput(instr, 1)) {
1646         __ mull(i.InputRegister(1));
1647       } else {
1648         __ mull(i.InputOperand(1));
1649       }
1650       break;
1651     case kX64Idiv32:
1652       __ cdq();
1653       __ idivl(i.InputRegister(1));
1654       break;
1655     case kX64Idiv:
1656       __ cqo();
1657       __ idivq(i.InputRegister(1));
1658       break;
1659     case kX64Udiv32:
1660       __ xorl(rdx, rdx);
1661       __ divl(i.InputRegister(1));
1662       break;
1663     case kX64Udiv:
1664       __ xorq(rdx, rdx);
1665       __ divq(i.InputRegister(1));
1666       break;
1667     case kX64Not:
1668       ASSEMBLE_UNOP(notq);
1669       break;
1670     case kX64Not32:
1671       ASSEMBLE_UNOP(notl);
1672       break;
1673     case kX64Neg:
1674       ASSEMBLE_UNOP(negq);
1675       break;
1676     case kX64Neg32:
1677       ASSEMBLE_UNOP(negl);
1678       break;
1679     case kX64Or32:
1680       ASSEMBLE_BINOP(orl);
1681       break;
1682     case kX64Or:
1683       ASSEMBLE_BINOP(orq);
1684       break;
1685     case kX64Xor32:
1686       ASSEMBLE_BINOP(xorl);
1687       break;
1688     case kX64Xor:
1689       ASSEMBLE_BINOP(xorq);
1690       break;
1691     case kX64Shl32:
1692       ASSEMBLE_SHIFT(shll, 5);
1693       break;
1694     case kX64Shl:
1695       ASSEMBLE_SHIFT(shlq, 6);
1696       break;
1697     case kX64Shr32:
1698       ASSEMBLE_SHIFT(shrl, 5);
1699       break;
1700     case kX64Shr:
1701       ASSEMBLE_SHIFT(shrq, 6);
1702       break;
1703     case kX64Sar32:
1704       ASSEMBLE_SHIFT(sarl, 5);
1705       break;
1706     case kX64Sar:
1707       ASSEMBLE_SHIFT(sarq, 6);
1708       break;
1709     case kX64Rol32:
1710       ASSEMBLE_SHIFT(roll, 5);
1711       break;
1712     case kX64Rol:
1713       ASSEMBLE_SHIFT(rolq, 6);
1714       break;
1715     case kX64Ror32:
1716       ASSEMBLE_SHIFT(rorl, 5);
1717       break;
1718     case kX64Ror:
1719       ASSEMBLE_SHIFT(rorq, 6);
1720       break;
1721     case kX64Lzcnt:
1722       if (HasRegisterInput(instr, 0)) {
1723         __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
1724       } else {
1725         __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
1726       }
1727       break;
1728     case kX64Lzcnt32:
1729       if (HasRegisterInput(instr, 0)) {
1730         __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
1731       } else {
1732         __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
1733       }
1734       break;
1735     case kX64Tzcnt:
1736       if (HasRegisterInput(instr, 0)) {
1737         __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
1738       } else {
1739         __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
1740       }
1741       break;
1742     case kX64Tzcnt32:
1743       if (HasRegisterInput(instr, 0)) {
1744         __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
1745       } else {
1746         __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
1747       }
1748       break;
1749     case kX64Popcnt:
1750       if (HasRegisterInput(instr, 0)) {
1751         __ Popcntq(i.OutputRegister(), i.InputRegister(0));
1752       } else {
1753         __ Popcntq(i.OutputRegister(), i.InputOperand(0));
1754       }
1755       break;
1756     case kX64Popcnt32:
1757       if (HasRegisterInput(instr, 0)) {
1758         __ Popcntl(i.OutputRegister(), i.InputRegister(0));
1759       } else {
1760         __ Popcntl(i.OutputRegister(), i.InputOperand(0));
1761       }
1762       break;
1763     case kX64Bswap:
1764       __ bswapq(i.OutputRegister());
1765       break;
1766     case kX64Bswap32:
1767       __ bswapl(i.OutputRegister());
1768       break;
1769     case kSSEFloat32Cmp:
1770       ASSEMBLE_SSE_BINOP(Ucomiss);
1771       break;
1772     case kSSEFloat32Add:
1773       ASSEMBLE_SSE_BINOP(addss);
1774       break;
1775     case kSSEFloat32Sub:
1776       ASSEMBLE_SSE_BINOP(subss);
1777       break;
1778     case kSSEFloat32Mul:
1779       ASSEMBLE_SSE_BINOP(mulss);
1780       break;
1781     case kSSEFloat32Div:
1782       ASSEMBLE_SSE_BINOP(divss);
1783       // Don't delete this mov. It may improve performance on some CPUs,
1784       // when there is a (v)mulss depending on the result.
1785       __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1786       break;
1787     case kSSEFloat32Sqrt:
1788       ASSEMBLE_SSE_UNOP(sqrtss);
1789       break;
1790     case kSSEFloat32ToFloat64:
1791       ASSEMBLE_SSE_UNOP(Cvtss2sd);
1792       break;
1793     case kSSEFloat32Round: {
1794       CpuFeatureScope sse_scope(tasm(), SSE4_1);
1795       RoundingMode const mode =
1796           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1797       __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1798       break;
1799     }
1800     case kSSEFloat32ToInt32:
1801       if (instr->InputAt(0)->IsFPRegister()) {
1802         __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
1803       } else {
1804         __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1805       }
1806       break;
1807     case kSSEFloat32ToUint32: {
1808       if (instr->InputAt(0)->IsFPRegister()) {
1809         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1810       } else {
1811         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1812       }
1813       break;
1814     }
1815     case kSSEFloat64Cmp:
1816       ASSEMBLE_SSE_BINOP(Ucomisd);
1817       break;
1818     case kSSEFloat64Add:
1819       ASSEMBLE_SSE_BINOP(addsd);
1820       break;
1821     case kSSEFloat64Sub:
1822       ASSEMBLE_SSE_BINOP(subsd);
1823       break;
1824     case kSSEFloat64Mul:
1825       ASSEMBLE_SSE_BINOP(mulsd);
1826       break;
1827     case kSSEFloat64Div:
1828       ASSEMBLE_SSE_BINOP(divsd);
1829       // Don't delete this mov. It may improve performance on some CPUs,
1830       // when there is a (v)mulsd depending on the result.
1831       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1832       break;
1833     case kSSEFloat64Mod: {
1834       __ AllocateStackSpace(kDoubleSize);
1835       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1836                                                        kDoubleSize);
1837       // Move values to st(0) and st(1).
1838       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
1839       __ fld_d(Operand(rsp, 0));
1840       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
1841       __ fld_d(Operand(rsp, 0));
1842       // Loop while fprem isn't done.
1843       Label mod_loop;
1844       __ bind(&mod_loop);
1845       // This instructions traps on all kinds inputs, but we are assuming the
1846       // floating point control word is set to ignore them all.
1847       __ fprem();
1848       // The following 2 instruction implicitly use rax.
1849       __ fnstsw_ax();
1850       if (CpuFeatures::IsSupported(SAHF)) {
1851         CpuFeatureScope sahf_scope(tasm(), SAHF);
1852         __ sahf();
1853       } else {
1854         __ shrl(rax, Immediate(8));
1855         __ andl(rax, Immediate(0xFF));
1856         __ pushq(rax);
1857         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1858                                                          kSystemPointerSize);
1859         __ popfq();
1860         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1861                                                          -kSystemPointerSize);
1862       }
1863       __ j(parity_even, &mod_loop);
1864       // Move output to stack and clean up.
1865       __ fstp(1);
1866       __ fstp_d(Operand(rsp, 0));
1867       __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
1868       __ addq(rsp, Immediate(kDoubleSize));
1869       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1870                                                        -kDoubleSize);
1871       break;
1872     }
1873     case kSSEFloat32Max: {
1874       Label compare_swap, done_compare;
1875       if (instr->InputAt(1)->IsFPRegister()) {
1876         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1877       } else {
1878         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1879       }
1880       auto ool =
1881           zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1882       __ j(parity_even, ool->entry());
1883       __ j(above, &done_compare, Label::kNear);
1884       __ j(below, &compare_swap, Label::kNear);
1885       __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
1886       __ testl(kScratchRegister, Immediate(1));
1887       __ j(zero, &done_compare, Label::kNear);
1888       __ bind(&compare_swap);
1889       if (instr->InputAt(1)->IsFPRegister()) {
1890         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1891       } else {
1892         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1893       }
1894       __ bind(&done_compare);
1895       __ bind(ool->exit());
1896       break;
1897     }
1898     case kSSEFloat32Min: {
1899       Label compare_swap, done_compare;
1900       if (instr->InputAt(1)->IsFPRegister()) {
1901         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1902       } else {
1903         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1904       }
1905       auto ool =
1906           zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1907       __ j(parity_even, ool->entry());
1908       __ j(below, &done_compare, Label::kNear);
1909       __ j(above, &compare_swap, Label::kNear);
1910       if (instr->InputAt(1)->IsFPRegister()) {
1911         __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
1912       } else {
1913         __ Movss(kScratchDoubleReg, i.InputOperand(1));
1914         __ Movmskps(kScratchRegister, kScratchDoubleReg);
1915       }
1916       __ testl(kScratchRegister, Immediate(1));
1917       __ j(zero, &done_compare, Label::kNear);
1918       __ bind(&compare_swap);
1919       if (instr->InputAt(1)->IsFPRegister()) {
1920         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1921       } else {
1922         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1923       }
1924       __ bind(&done_compare);
1925       __ bind(ool->exit());
1926       break;
1927     }
1928     case kSSEFloat64Max: {
1929       Label compare_swap, done_compare;
1930       if (instr->InputAt(1)->IsFPRegister()) {
1931         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1932       } else {
1933         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1934       }
1935       auto ool =
1936           zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1937       __ j(parity_even, ool->entry());
1938       __ j(above, &done_compare, Label::kNear);
1939       __ j(below, &compare_swap, Label::kNear);
1940       __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
1941       __ testl(kScratchRegister, Immediate(1));
1942       __ j(zero, &done_compare, Label::kNear);
1943       __ bind(&compare_swap);
1944       if (instr->InputAt(1)->IsFPRegister()) {
1945         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1946       } else {
1947         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1948       }
1949       __ bind(&done_compare);
1950       __ bind(ool->exit());
1951       break;
1952     }
1953     case kSSEFloat64Min: {
1954       Label compare_swap, done_compare;
1955       if (instr->InputAt(1)->IsFPRegister()) {
1956         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1957       } else {
1958         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1959       }
1960       auto ool =
1961           zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1962       __ j(parity_even, ool->entry());
1963       __ j(below, &done_compare, Label::kNear);
1964       __ j(above, &compare_swap, Label::kNear);
1965       if (instr->InputAt(1)->IsFPRegister()) {
1966         __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
1967       } else {
1968         __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1969         __ Movmskpd(kScratchRegister, kScratchDoubleReg);
1970       }
1971       __ testl(kScratchRegister, Immediate(1));
1972       __ j(zero, &done_compare, Label::kNear);
1973       __ bind(&compare_swap);
1974       if (instr->InputAt(1)->IsFPRegister()) {
1975         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1976       } else {
1977         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1978       }
1979       __ bind(&done_compare);
1980       __ bind(ool->exit());
1981       break;
1982     }
1983     case kSSEFloat64Sqrt:
1984       ASSEMBLE_SSE_UNOP(Sqrtsd);
1985       break;
1986     case kSSEFloat64Round: {
1987       CpuFeatureScope sse_scope(tasm(), SSE4_1);
1988       RoundingMode const mode =
1989           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1990       __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1991       break;
1992     }
1993     case kSSEFloat64ToFloat32:
1994       ASSEMBLE_SSE_UNOP(Cvtsd2ss);
1995       break;
1996     case kSSEFloat64ToInt32:
1997       if (instr->InputAt(0)->IsFPRegister()) {
1998         __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
1999       } else {
2000         __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
2001       }
2002       break;
2003     case kSSEFloat64ToUint32: {
2004       if (instr->InputAt(0)->IsFPRegister()) {
2005         __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
2006       } else {
2007         __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
2008       }
2009       if (MiscField::decode(instr->opcode())) {
2010         __ AssertZeroExtended(i.OutputRegister());
2011       }
2012       break;
2013     }
2014     case kSSEFloat32ToInt64: {
2015       Register output_reg = i.OutputRegister(0);
2016       if (instr->OutputCount() == 1) {
2017         if (instr->InputAt(0)->IsFPRegister()) {
2018           __ Cvttss2siq(output_reg, i.InputDoubleRegister(0));
2019         } else {
2020           __ Cvttss2siq(output_reg, i.InputOperand(0));
2021         }
2022         break;
2023       }
2024       DCHECK_EQ(2, instr->OutputCount());
2025       Register success_reg = i.OutputRegister(1);
2026       if (CpuFeatures::IsSupported(SSE4_1) || CpuFeatures::IsSupported(AVX)) {
2027         DoubleRegister rounded = kScratchDoubleReg;
2028         if (instr->InputAt(0)->IsFPRegister()) {
2029           __ Roundss(rounded, i.InputDoubleRegister(0), kRoundToZero);
2030           __ Cvttss2siq(output_reg, i.InputDoubleRegister(0));
2031         } else {
2032           __ Roundss(rounded, i.InputOperand(0), kRoundToZero);
2033           // Convert {rounded} instead of the input operand, to avoid another
2034           // load.
2035           __ Cvttss2siq(output_reg, rounded);
2036         }
2037         DoubleRegister converted_back = i.TempSimd128Register(0);
2038         __ Cvtqsi2ss(converted_back, output_reg);
2039         // Compare the converted back value to the rounded value, set
2040         // success_reg to 0 if they differ, or 1 on success.
2041         __ Cmpeqss(converted_back, rounded);
2042         __ Movq(success_reg, converted_back);
2043         __ And(success_reg, Immediate(1));
2044       } else {
2045         // Less efficient code for non-AVX and non-SSE4_1 CPUs.
2046         if (instr->InputAt(0)->IsFPRegister()) {
2047           __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
2048         } else {
2049           __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
2050         }
2051         __ Move(success_reg, 1);
2052         Label done;
2053         Label fail;
2054         __ Move(kScratchDoubleReg, float{INT64_MIN});
2055         if (instr->InputAt(0)->IsFPRegister()) {
2056           __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
2057         } else {
2058           __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
2059         }
2060         // If the input is NaN, then the conversion fails.
2061         __ j(parity_even, &fail, Label::kNear);
2062         // If the input is INT64_MIN, then the conversion succeeds.
2063         __ j(equal, &done, Label::kNear);
2064         __ cmpq(output_reg, Immediate(1));
2065         // If the conversion results in INT64_MIN, but the input was not
2066         // INT64_MIN, then the conversion fails.
2067         __ j(no_overflow, &done, Label::kNear);
2068         __ bind(&fail);
2069         __ Move(success_reg, 0);
2070         __ bind(&done);
2071       }
2072       break;
2073     }
2074     case kSSEFloat64ToInt64: {
2075       Register output_reg = i.OutputRegister(0);
2076       if (instr->OutputCount() == 1) {
2077         if (instr->InputAt(0)->IsFPRegister()) {
2078           __ Cvttsd2siq(output_reg, i.InputDoubleRegister(0));
2079         } else {
2080           __ Cvttsd2siq(output_reg, i.InputOperand(0));
2081         }
2082         break;
2083       }
2084       DCHECK_EQ(2, instr->OutputCount());
2085       Register success_reg = i.OutputRegister(1);
2086       if (CpuFeatures::IsSupported(SSE4_1) || CpuFeatures::IsSupported(AVX)) {
2087         DoubleRegister rounded = kScratchDoubleReg;
2088         if (instr->InputAt(0)->IsFPRegister()) {
2089           __ Roundsd(rounded, i.InputDoubleRegister(0), kRoundToZero);
2090           __ Cvttsd2siq(output_reg, i.InputDoubleRegister(0));
2091         } else {
2092           __ Roundsd(rounded, i.InputOperand(0), kRoundToZero);
2093           // Convert {rounded} instead of the input operand, to avoid another
2094           // load.
2095           __ Cvttsd2siq(output_reg, rounded);
2096         }
2097         DoubleRegister converted_back = i.TempSimd128Register(0);
2098         __ Cvtqsi2sd(converted_back, output_reg);
2099         // Compare the converted back value to the rounded value, set
2100         // success_reg to 0 if they differ, or 1 on success.
2101         __ Cmpeqsd(converted_back, rounded);
2102         __ Movq(success_reg, converted_back);
2103         __ And(success_reg, Immediate(1));
2104       } else {
2105         // Less efficient code for non-AVX and non-SSE4_1 CPUs.
2106         if (instr->InputAt(0)->IsFPRegister()) {
2107           __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
2108         } else {
2109           __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
2110         }
2111         __ Move(success_reg, 1);
2112         Label done;
2113         Label fail;
2114         __ Move(kScratchDoubleReg, double{INT64_MIN});
2115         if (instr->InputAt(0)->IsFPRegister()) {
2116           __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
2117         } else {
2118           __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
2119         }
2120         // If the input is NaN, then the conversion fails.
2121         __ j(parity_even, &fail, Label::kNear);
2122         // If the input is INT64_MIN, then the conversion succeeds.
2123         __ j(equal, &done, Label::kNear);
2124         __ cmpq(output_reg, Immediate(1));
2125         // If the conversion results in INT64_MIN, but the input was not
2126         // INT64_MIN, then the conversion fails.
2127         __ j(no_overflow, &done, Label::kNear);
2128         __ bind(&fail);
2129         __ Move(success_reg, 0);
2130         __ bind(&done);
2131       }
2132       break;
2133     }
2134     case kSSEFloat32ToUint64: {
2135       Label fail;
2136       if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 0);
2137       if (instr->InputAt(0)->IsFPRegister()) {
2138         __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
2139       } else {
2140         __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
2141       }
2142       if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 1);
2143       __ bind(&fail);
2144       break;
2145     }
2146     case kSSEFloat64ToUint64: {
2147       Label fail;
2148       if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 0);
2149       if (instr->InputAt(0)->IsFPRegister()) {
2150         __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
2151       } else {
2152         __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
2153       }
2154       if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 1);
2155       __ bind(&fail);
2156       break;
2157     }
2158     case kSSEInt32ToFloat64:
2159       if (HasRegisterInput(instr, 0)) {
2160         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2161       } else {
2162         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2163       }
2164       break;
2165     case kSSEInt32ToFloat32:
2166       if (HasRegisterInput(instr, 0)) {
2167         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2168       } else {
2169         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2170       }
2171       break;
2172     case kSSEInt64ToFloat32:
2173       if (HasRegisterInput(instr, 0)) {
2174         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2175       } else {
2176         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2177       }
2178       break;
2179     case kSSEInt64ToFloat64:
2180       if (HasRegisterInput(instr, 0)) {
2181         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2182       } else {
2183         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2184       }
2185       break;
2186     case kSSEUint64ToFloat32:
2187       if (HasRegisterInput(instr, 0)) {
2188         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2189       } else {
2190         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2191       }
2192       break;
2193     case kSSEUint64ToFloat64:
2194       if (HasRegisterInput(instr, 0)) {
2195         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2196       } else {
2197         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2198       }
2199       break;
2200     case kSSEUint32ToFloat64:
2201       if (HasRegisterInput(instr, 0)) {
2202         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2203       } else {
2204         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2205       }
2206       break;
2207     case kSSEUint32ToFloat32:
2208       if (HasRegisterInput(instr, 0)) {
2209         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2210       } else {
2211         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2212       }
2213       break;
2214     case kSSEFloat64ExtractLowWord32:
2215       if (instr->InputAt(0)->IsFPStackSlot()) {
2216         __ movl(i.OutputRegister(), i.InputOperand(0));
2217       } else {
2218         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2219       }
2220       break;
2221     case kSSEFloat64ExtractHighWord32:
2222       if (instr->InputAt(0)->IsFPStackSlot()) {
2223         __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
2224       } else {
2225         __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
2226       }
2227       break;
2228     case kSSEFloat64InsertLowWord32:
2229       if (HasRegisterInput(instr, 1)) {
2230         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
2231       } else {
2232         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
2233       }
2234       break;
2235     case kSSEFloat64InsertHighWord32:
2236       if (HasRegisterInput(instr, 1)) {
2237         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
2238       } else {
2239         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
2240       }
2241       break;
2242     case kSSEFloat64LoadLowWord32:
2243       if (HasRegisterInput(instr, 0)) {
2244         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2245       } else {
2246         __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
2247       }
2248       break;
2249     case kAVXFloat32Cmp: {
2250       CpuFeatureScope avx_scope(tasm(), AVX);
2251       if (instr->InputAt(1)->IsFPRegister()) {
2252         __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2253       } else {
2254         __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
2255       }
2256       break;
2257     }
2258     case kAVXFloat32Add:
2259       ASSEMBLE_AVX_BINOP(vaddss);
2260       break;
2261     case kAVXFloat32Sub:
2262       ASSEMBLE_AVX_BINOP(vsubss);
2263       break;
2264     case kAVXFloat32Mul:
2265       ASSEMBLE_AVX_BINOP(vmulss);
2266       break;
2267     case kAVXFloat32Div:
2268       ASSEMBLE_AVX_BINOP(vdivss);
2269       // Don't delete this mov. It may improve performance on some CPUs,
2270       // when there is a (v)mulss depending on the result.
2271       __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
2272       break;
2273     case kAVXFloat64Cmp: {
2274       CpuFeatureScope avx_scope(tasm(), AVX);
2275       if (instr->InputAt(1)->IsFPRegister()) {
2276         __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2277       } else {
2278         __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
2279       }
2280       break;
2281     }
2282     case kAVXFloat64Add:
2283       ASSEMBLE_AVX_BINOP(vaddsd);
2284       break;
2285     case kAVXFloat64Sub:
2286       ASSEMBLE_AVX_BINOP(vsubsd);
2287       break;
2288     case kAVXFloat64Mul:
2289       ASSEMBLE_AVX_BINOP(vmulsd);
2290       break;
2291     case kAVXFloat64Div:
2292       ASSEMBLE_AVX_BINOP(vdivsd);
2293       // Don't delete this mov. It may improve performance on some CPUs,
2294       // when there is a (v)mulsd depending on the result.
2295       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
2296       break;
2297     case kX64Float32Abs: {
2298       __ Absps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2299                kScratchRegister);
2300       break;
2301     }
2302     case kX64Float32Neg: {
2303       __ Negps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2304                kScratchRegister);
2305       break;
2306     }
2307     case kX64F64x2Abs:
2308     case kX64Float64Abs: {
2309       __ Abspd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2310                kScratchRegister);
2311       break;
2312     }
2313     case kX64F64x2Neg:
2314     case kX64Float64Neg: {
2315       __ Negpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2316                kScratchRegister);
2317       break;
2318     }
2319     case kSSEFloat64SilenceNaN:
2320       __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
2321       __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
2322       break;
2323     case kX64Movsxbl:
2324       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2325       ASSEMBLE_MOVX(movsxbl);
2326       __ AssertZeroExtended(i.OutputRegister());
2327       break;
2328     case kX64Movzxbl:
2329       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2330       ASSEMBLE_MOVX(movzxbl);
2331       __ AssertZeroExtended(i.OutputRegister());
2332       break;
2333     case kX64Movsxbq:
2334       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2335       ASSEMBLE_MOVX(movsxbq);
2336       break;
2337     case kX64Movzxbq:
2338       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2339       ASSEMBLE_MOVX(movzxbq);
2340       __ AssertZeroExtended(i.OutputRegister());
2341       break;
2342     case kX64Movb: {
2343       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2344       size_t index = 0;
2345       Operand operand = i.MemoryOperand(&index);
2346       if (HasImmediateInput(instr, index)) {
2347         Immediate value(Immediate(i.InputInt8(index)));
2348         EmitTSANAwareStore<std::memory_order_relaxed>(
2349             zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2350             MachineRepresentation::kWord8);
2351       } else {
2352         Register value(i.InputRegister(index));
2353         EmitTSANAwareStore<std::memory_order_relaxed>(
2354             zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2355             MachineRepresentation::kWord8);
2356       }
2357       break;
2358     }
2359     case kX64Movsxwl:
2360       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2361       ASSEMBLE_MOVX(movsxwl);
2362       __ AssertZeroExtended(i.OutputRegister());
2363       break;
2364     case kX64Movzxwl:
2365       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2366       ASSEMBLE_MOVX(movzxwl);
2367       __ AssertZeroExtended(i.OutputRegister());
2368       break;
2369     case kX64Movsxwq:
2370       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2371       ASSEMBLE_MOVX(movsxwq);
2372       break;
2373     case kX64Movzxwq:
2374       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2375       ASSEMBLE_MOVX(movzxwq);
2376       __ AssertZeroExtended(i.OutputRegister());
2377       break;
2378     case kX64Movw: {
2379       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2380       size_t index = 0;
2381       Operand operand = i.MemoryOperand(&index);
2382       if (HasImmediateInput(instr, index)) {
2383         Immediate value(Immediate(i.InputInt16(index)));
2384         EmitTSANAwareStore<std::memory_order_relaxed>(
2385             zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2386             MachineRepresentation::kWord16);
2387       } else {
2388         Register value(i.InputRegister(index));
2389         EmitTSANAwareStore<std::memory_order_relaxed>(
2390             zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2391             MachineRepresentation::kWord16);
2392       }
2393       break;
2394     }
2395     case kX64Movl:
2396       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2397       if (instr->HasOutput()) {
2398         if (HasAddressingMode(instr)) {
2399           Operand address(i.MemoryOperand());
2400           __ movl(i.OutputRegister(), address);
2401           EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2402                                          DetermineStubCallMode(), kInt32Size);
2403         } else {
2404           if (HasRegisterInput(instr, 0)) {
2405             __ movl(i.OutputRegister(), i.InputRegister(0));
2406           } else {
2407             __ movl(i.OutputRegister(), i.InputOperand(0));
2408           }
2409         }
2410         __ AssertZeroExtended(i.OutputRegister());
2411       } else {
2412         size_t index = 0;
2413         Operand operand = i.MemoryOperand(&index);
2414         if (HasImmediateInput(instr, index)) {
2415           Immediate value(i.InputImmediate(index));
2416           EmitTSANAwareStore<std::memory_order_relaxed>(
2417               zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2418               MachineRepresentation::kWord32);
2419         } else {
2420           Register value(i.InputRegister(index));
2421           EmitTSANAwareStore<std::memory_order_relaxed>(
2422               zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2423               MachineRepresentation::kWord32);
2424         }
2425       }
2426       break;
2427     case kX64Movsxlq:
2428       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2429       ASSEMBLE_MOVX(movsxlq);
2430       break;
2431     case kX64MovqDecompressTaggedSigned: {
2432       CHECK(instr->HasOutput());
2433       Operand address(i.MemoryOperand());
2434       __ DecompressTaggedSigned(i.OutputRegister(), address);
2435       EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2436                                      DetermineStubCallMode(), kTaggedSize);
2437       break;
2438     }
2439     case kX64MovqDecompressTaggedPointer: {
2440       CHECK(instr->HasOutput());
2441       Operand address(i.MemoryOperand());
2442       __ DecompressTaggedPointer(i.OutputRegister(), address);
2443       EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2444                                      DetermineStubCallMode(), kTaggedSize);
2445       break;
2446     }
2447     case kX64MovqDecompressAnyTagged: {
2448       CHECK(instr->HasOutput());
2449       Operand address(i.MemoryOperand());
2450       __ DecompressAnyTagged(i.OutputRegister(), address);
2451       EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2452                                      DetermineStubCallMode(), kTaggedSize);
2453       break;
2454     }
2455     case kX64MovqCompressTagged: {
2456       CHECK(!instr->HasOutput());
2457       size_t index = 0;
2458       Operand operand = i.MemoryOperand(&index);
2459       if (HasImmediateInput(instr, index)) {
2460         Immediate value(i.InputImmediate(index));
2461         EmitTSANAwareStore<std::memory_order_relaxed>(
2462             zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2463             MachineRepresentation::kTagged);
2464       } else {
2465         Register value(i.InputRegister(index));
2466         EmitTSANAwareStore<std::memory_order_relaxed>(
2467             zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2468             MachineRepresentation::kTagged);
2469       }
2470       break;
2471     }
2472     case kX64MovqDecodeSandboxedPointer: {
2473       CHECK(instr->HasOutput());
2474       Operand address(i.MemoryOperand());
2475       Register dst = i.OutputRegister();
2476       __ movq(dst, address);
2477       __ DecodeSandboxedPointer(dst);
2478       EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2479                                      DetermineStubCallMode(),
2480                                      kSystemPointerSize);
2481       break;
2482     }
2483     case kX64MovqEncodeSandboxedPointer: {
2484       CHECK(!instr->HasOutput());
2485       size_t index = 0;
2486       Operand operand = i.MemoryOperand(&index);
2487       CHECK(!HasImmediateInput(instr, index));
2488       Register value(i.InputRegister(index));
2489       EmitTSANAwareStore<std::memory_order_relaxed>(
2490           zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2491           MachineRepresentation::kSandboxedPointer);
2492       break;
2493     }
2494     case kX64Movq:
2495       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2496       if (instr->HasOutput()) {
2497         Operand address(i.MemoryOperand());
2498         __ movq(i.OutputRegister(), address);
2499         EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2500                                        DetermineStubCallMode(), kInt64Size);
2501       } else {
2502         size_t index = 0;
2503         Operand operand = i.MemoryOperand(&index);
2504         if (HasImmediateInput(instr, index)) {
2505           Immediate value(i.InputImmediate(index));
2506           EmitTSANAwareStore<std::memory_order_relaxed>(
2507               zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2508               MachineRepresentation::kWord64);
2509         } else {
2510           Register value(i.InputRegister(index));
2511           EmitTSANAwareStore<std::memory_order_relaxed>(
2512               zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2513               MachineRepresentation::kWord64);
2514         }
2515       }
2516       break;
2517     case kX64Movss:
2518       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2519       if (instr->HasOutput()) {
2520         __ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
2521       } else {
2522         size_t index = 0;
2523         Operand operand = i.MemoryOperand(&index);
2524         __ Movss(operand, i.InputDoubleRegister(index));
2525       }
2526       break;
2527     case kX64Movsd: {
2528       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2529       if (instr->HasOutput()) {
2530         __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
2531       } else {
2532         size_t index = 0;
2533         Operand operand = i.MemoryOperand(&index);
2534         __ Movsd(operand, i.InputDoubleRegister(index));
2535       }
2536       break;
2537     }
2538     case kX64Movdqu: {
2539       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2540       if (instr->HasOutput()) {
2541         __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
2542       } else {
2543         size_t index = 0;
2544         Operand operand = i.MemoryOperand(&index);
2545         __ Movdqu(operand, i.InputSimd128Register(index));
2546       }
2547       break;
2548     }
2549     case kX64BitcastFI:
2550       if (instr->InputAt(0)->IsFPStackSlot()) {
2551         __ movl(i.OutputRegister(), i.InputOperand(0));
2552       } else {
2553         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2554       }
2555       break;
2556     case kX64BitcastDL:
2557       if (instr->InputAt(0)->IsFPStackSlot()) {
2558         __ movq(i.OutputRegister(), i.InputOperand(0));
2559       } else {
2560         __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
2561       }
2562       break;
2563     case kX64BitcastIF:
2564       if (HasRegisterInput(instr, 0)) {
2565         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2566       } else {
2567         __ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
2568       }
2569       break;
2570     case kX64BitcastLD:
2571       if (HasRegisterInput(instr, 0)) {
2572         __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
2573       } else {
2574         __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
2575       }
2576       break;
2577     case kX64Lea32: {
2578       AddressingMode mode = AddressingModeField::decode(instr->opcode());
2579       // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
2580       // and addressing mode just happens to work out. The "addl"/"subl" forms
2581       // in these cases are faster based on measurements.
2582       if (i.InputRegister(0) == i.OutputRegister()) {
2583         if (mode == kMode_MRI) {
2584           int32_t constant_summand = i.InputInt32(1);
2585           DCHECK_NE(0, constant_summand);
2586           if (constant_summand > 0) {
2587             __ addl(i.OutputRegister(), Immediate(constant_summand));
2588           } else {
2589             __ subl(i.OutputRegister(),
2590                     Immediate(base::NegateWithWraparound(constant_summand)));
2591           }
2592         } else if (mode == kMode_MR1) {
2593           if (i.InputRegister(1) == i.OutputRegister()) {
2594             __ shll(i.OutputRegister(), Immediate(1));
2595           } else {
2596             __ addl(i.OutputRegister(), i.InputRegister(1));
2597           }
2598         } else if (mode == kMode_M2) {
2599           __ shll(i.OutputRegister(), Immediate(1));
2600         } else if (mode == kMode_M4) {
2601           __ shll(i.OutputRegister(), Immediate(2));
2602         } else if (mode == kMode_M8) {
2603           __ shll(i.OutputRegister(), Immediate(3));
2604         } else {
2605           __ leal(i.OutputRegister(), i.MemoryOperand());
2606         }
2607       } else if (mode == kMode_MR1 &&
2608                  i.InputRegister(1) == i.OutputRegister()) {
2609         __ addl(i.OutputRegister(), i.InputRegister(0));
2610       } else {
2611         __ leal(i.OutputRegister(), i.MemoryOperand());
2612       }
2613       __ AssertZeroExtended(i.OutputRegister());
2614       break;
2615     }
2616     case kX64Lea: {
2617       AddressingMode mode = AddressingModeField::decode(instr->opcode());
2618       // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
2619       // and addressing mode just happens to work out. The "addq"/"subq" forms
2620       // in these cases are faster based on measurements.
2621       if (i.InputRegister(0) == i.OutputRegister()) {
2622         if (mode == kMode_MRI) {
2623           int32_t constant_summand = i.InputInt32(1);
2624           if (constant_summand > 0) {
2625             __ addq(i.OutputRegister(), Immediate(constant_summand));
2626           } else if (constant_summand < 0) {
2627             __ subq(i.OutputRegister(), Immediate(-constant_summand));
2628           }
2629         } else if (mode == kMode_MR1) {
2630           if (i.InputRegister(1) == i.OutputRegister()) {
2631             __ shlq(i.OutputRegister(), Immediate(1));
2632           } else {
2633             __ addq(i.OutputRegister(), i.InputRegister(1));
2634           }
2635         } else if (mode == kMode_M2) {
2636           __ shlq(i.OutputRegister(), Immediate(1));
2637         } else if (mode == kMode_M4) {
2638           __ shlq(i.OutputRegister(), Immediate(2));
2639         } else if (mode == kMode_M8) {
2640           __ shlq(i.OutputRegister(), Immediate(3));
2641         } else {
2642           __ leaq(i.OutputRegister(), i.MemoryOperand());
2643         }
2644       } else if (mode == kMode_MR1 &&
2645                  i.InputRegister(1) == i.OutputRegister()) {
2646         __ addq(i.OutputRegister(), i.InputRegister(0));
2647       } else {
2648         __ leaq(i.OutputRegister(), i.MemoryOperand());
2649       }
2650       break;
2651     }
2652     case kX64Dec32:
2653       __ decl(i.OutputRegister());
2654       break;
2655     case kX64Inc32:
2656       __ incl(i.OutputRegister());
2657       break;
2658     case kX64Push: {
2659       int stack_decrement = i.InputInt32(0);
2660       int slots = stack_decrement / kSystemPointerSize;
2661       // Whenever codegen uses pushq, we need to check if stack_decrement
2662       // contains any extra padding and adjust the stack before the pushq.
2663       if (HasImmediateInput(instr, 1)) {
2664         __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
2665         __ pushq(i.InputImmediate(1));
2666       } else if (HasAddressingMode(instr)) {
2667         __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
2668         size_t index = 1;
2669         Operand operand = i.MemoryOperand(&index);
2670         __ pushq(operand);
2671       } else {
2672         InstructionOperand* input = instr->InputAt(1);
2673         if (input->IsRegister()) {
2674           __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
2675           __ pushq(i.InputRegister(1));
2676         } else if (input->IsFloatRegister() || input->IsDoubleRegister()) {
2677           DCHECK_GE(stack_decrement, kSystemPointerSize);
2678           __ AllocateStackSpace(stack_decrement);
2679           __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
2680         } else if (input->IsSimd128Register()) {
2681           DCHECK_GE(stack_decrement, kSimd128Size);
2682           __ AllocateStackSpace(stack_decrement);
2683           // TODO(bbudge) Use Movaps when slots are aligned.
2684           __ Movups(Operand(rsp, 0), i.InputSimd128Register(1));
2685         } else if (input->IsStackSlot() || input->IsFloatStackSlot() ||
2686                    input->IsDoubleStackSlot()) {
2687           __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
2688           __ pushq(i.InputOperand(1));
2689         } else {
2690           DCHECK(input->IsSimd128StackSlot());
2691           DCHECK_GE(stack_decrement, kSimd128Size);
2692           // TODO(bbudge) Use Movaps when slots are aligned.
2693           __ Movups(kScratchDoubleReg, i.InputOperand(1));
2694           __ AllocateStackSpace(stack_decrement);
2695           __ Movups(Operand(rsp, 0), kScratchDoubleReg);
2696         }
2697       }
2698       frame_access_state()->IncreaseSPDelta(slots);
2699       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2700                                                        stack_decrement);
2701       break;
2702     }
2703     case kX64Poke: {
2704       int slot = MiscField::decode(instr->opcode());
2705       if (HasImmediateInput(instr, 0)) {
2706         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
2707       } else if (instr->InputAt(0)->IsFPRegister()) {
2708         LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
2709         if (op->representation() == MachineRepresentation::kFloat64) {
2710           __ Movsd(Operand(rsp, slot * kSystemPointerSize),
2711                    i.InputDoubleRegister(0));
2712         } else {
2713           DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
2714           __ Movss(Operand(rsp, slot * kSystemPointerSize),
2715                    i.InputFloatRegister(0));
2716         }
2717       } else {
2718         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
2719       }
2720       break;
2721     }
2722     case kX64Peek: {
2723       int reverse_slot = i.InputInt32(0);
2724       int offset =
2725           FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
2726       if (instr->OutputAt(0)->IsFPRegister()) {
2727         LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
2728         if (op->representation() == MachineRepresentation::kFloat64) {
2729           __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
2730         } else if (op->representation() == MachineRepresentation::kFloat32) {
2731           __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
2732         } else {
2733           DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
2734           __ Movdqu(i.OutputSimd128Register(), Operand(rbp, offset));
2735         }
2736       } else {
2737         __ movq(i.OutputRegister(), Operand(rbp, offset));
2738       }
2739       break;
2740     }
2741     case kX64F64x2Splat: {
2742       XMMRegister dst = i.OutputSimd128Register();
2743       if (instr->InputAt(0)->IsFPRegister()) {
2744         __ Movddup(dst, i.InputDoubleRegister(0));
2745       } else {
2746         __ Movddup(dst, i.InputOperand(0));
2747       }
2748       break;
2749     }
2750     case kX64F64x2ExtractLane: {
2751       __ F64x2ExtractLane(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2752                           i.InputUint8(1));
2753       break;
2754     }
2755     case kX64F64x2ReplaceLane: {
2756       __ F64x2ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2757                           i.InputDoubleRegister(2), i.InputInt8(1));
2758       break;
2759     }
2760     case kX64F64x2Sqrt: {
2761       __ Sqrtpd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2762       break;
2763     }
2764     case kX64F64x2Add: {
2765       ASSEMBLE_SIMD_BINOP(addpd);
2766       break;
2767     }
2768     case kX64F64x2Sub: {
2769       ASSEMBLE_SIMD_BINOP(subpd);
2770       break;
2771     }
2772     case kX64F64x2Mul: {
2773       ASSEMBLE_SIMD_BINOP(mulpd);
2774       break;
2775     }
2776     case kX64F64x2Div: {
2777       ASSEMBLE_SIMD_BINOP(divpd);
2778       break;
2779     }
2780     case kX64F64x2Min: {
2781       // Avoids a move in no-AVX case if dst = src0.
2782       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2783       __ F64x2Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
2784                   i.InputSimd128Register(1), kScratchDoubleReg);
2785       break;
2786     }
2787     case kX64F64x2Max: {
2788       // Avoids a move in no-AVX case if dst = src0.
2789       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2790       __ F64x2Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
2791                   i.InputSimd128Register(1), kScratchDoubleReg);
2792       break;
2793     }
2794     case kX64F64x2Eq: {
2795       ASSEMBLE_SIMD_BINOP(cmpeqpd);
2796       break;
2797     }
2798     case kX64F64x2Ne: {
2799       ASSEMBLE_SIMD_BINOP(cmpneqpd);
2800       break;
2801     }
2802     case kX64F64x2Lt: {
2803       ASSEMBLE_SIMD_BINOP(cmpltpd);
2804       break;
2805     }
2806     case kX64F64x2Le: {
2807       ASSEMBLE_SIMD_BINOP(cmplepd);
2808       break;
2809     }
2810     case kX64F64x2Qfma: {
2811       __ F64x2Qfma(i.OutputSimd128Register(), i.InputSimd128Register(0),
2812                    i.InputSimd128Register(1), i.InputSimd128Register(2),
2813                    kScratchDoubleReg);
2814       break;
2815     }
2816     case kX64F64x2Qfms: {
2817       __ F64x2Qfms(i.OutputSimd128Register(), i.InputSimd128Register(0),
2818                    i.InputSimd128Register(1), i.InputSimd128Register(2),
2819                    kScratchDoubleReg);
2820       break;
2821     }
2822     case kX64F64x2ConvertLowI32x4S: {
2823       __ Cvtdq2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2824       break;
2825     }
2826     case kX64F64x2ConvertLowI32x4U: {
2827       __ F64x2ConvertLowI32x4U(i.OutputSimd128Register(),
2828                                i.InputSimd128Register(0), kScratchRegister);
2829       break;
2830     }
2831     case kX64F64x2PromoteLowF32x4: {
2832       if (HasAddressingMode(instr)) {
2833         EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2834         __ Cvtps2pd(i.OutputSimd128Register(), i.MemoryOperand());
2835       } else {
2836         __ Cvtps2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2837       }
2838       break;
2839     }
2840     case kX64F32x4DemoteF64x2Zero: {
2841       __ Cvtpd2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2842       break;
2843     }
2844     case kX64I32x4TruncSatF64x2SZero: {
2845       __ I32x4TruncSatF64x2SZero(i.OutputSimd128Register(),
2846                                  i.InputSimd128Register(0), kScratchDoubleReg,
2847                                  kScratchRegister);
2848       break;
2849     }
2850     case kX64I32x4TruncSatF64x2UZero: {
2851       __ I32x4TruncSatF64x2UZero(i.OutputSimd128Register(),
2852                                  i.InputSimd128Register(0), kScratchDoubleReg,
2853                                  kScratchRegister);
2854       break;
2855     }
2856     case kX64F32x4Splat: {
2857       __ F32x4Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0));
2858       break;
2859     }
2860     case kX64F32x4ExtractLane: {
2861       __ F32x4ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
2862                           i.InputUint8(1));
2863       break;
2864     }
2865     case kX64F32x4ReplaceLane: {
2866       // The insertps instruction uses imm8[5:4] to indicate the lane
2867       // that needs to be replaced.
2868       byte select = i.InputInt8(1) << 4 & 0x30;
2869       if (instr->InputAt(2)->IsFPRegister()) {
2870         __ Insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
2871                     select);
2872       } else {
2873         __ Insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
2874       }
2875       break;
2876     }
2877     case kX64F32x4SConvertI32x4: {
2878       __ Cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2879       break;
2880     }
2881     case kX64F32x4UConvertI32x4: {
2882       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2883       DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
2884       XMMRegister dst = i.OutputSimd128Register();
2885       __ Pxor(kScratchDoubleReg, kScratchDoubleReg);  // zeros
2886       __ Pblendw(kScratchDoubleReg, dst, uint8_t{0x55});  // get lo 16 bits
2887       __ Psubd(dst, kScratchDoubleReg);                   // get hi 16 bits
2888       __ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // convert lo exactly
2889       __ Psrld(dst, byte{1});            // divide by 2 to get in unsigned range
2890       __ Cvtdq2ps(dst, dst);             // convert hi exactly
2891       __ Addps(dst, dst);                // double hi, exactly
2892       __ Addps(dst, kScratchDoubleReg);  // add hi and lo, may round.
2893       break;
2894     }
2895     case kX64F32x4Abs: {
2896       XMMRegister dst = i.OutputSimd128Register();
2897       XMMRegister src = i.InputSimd128Register(0);
2898       if (dst == src) {
2899         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2900         __ Psrld(kScratchDoubleReg, byte{1});
2901         __ Andps(dst, kScratchDoubleReg);
2902       } else {
2903         __ Pcmpeqd(dst, dst);
2904         __ Psrld(dst, byte{1});
2905         __ Andps(dst, src);
2906       }
2907       break;
2908     }
2909     case kX64F32x4Neg: {
2910       XMMRegister dst = i.OutputSimd128Register();
2911       XMMRegister src = i.InputSimd128Register(0);
2912       if (dst == src) {
2913         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2914         __ Pslld(kScratchDoubleReg, byte{31});
2915         __ Xorps(dst, kScratchDoubleReg);
2916       } else {
2917         __ Pcmpeqd(dst, dst);
2918         __ Pslld(dst, byte{31});
2919         __ Xorps(dst, src);
2920       }
2921       break;
2922     }
2923     case kX64F32x4Sqrt: {
2924       __ Sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2925       break;
2926     }
2927     case kX64F32x4RecipApprox: {
2928       __ Rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2929       break;
2930     }
2931     case kX64F32x4RecipSqrtApprox: {
2932       __ Rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2933       break;
2934     }
2935     case kX64F32x4Add: {
2936       ASSEMBLE_SIMD_BINOP(addps);
2937       break;
2938     }
2939     case kX64F32x4Sub: {
2940       ASSEMBLE_SIMD_BINOP(subps);
2941       break;
2942     }
2943     case kX64F32x4Mul: {
2944       ASSEMBLE_SIMD_BINOP(mulps);
2945       break;
2946     }
2947     case kX64F32x4Div: {
2948       ASSEMBLE_SIMD_BINOP(divps);
2949       break;
2950     }
2951     case kX64F32x4Min: {
2952       __ F32x4Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
2953                   i.InputSimd128Register(1), kScratchDoubleReg);
2954       break;
2955     }
2956     case kX64F32x4Max: {
2957       __ F32x4Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
2958                   i.InputSimd128Register(1), kScratchDoubleReg);
2959       break;
2960     }
2961     case kX64F32x4Eq: {
2962       ASSEMBLE_SIMD_BINOP(cmpeqps);
2963       break;
2964     }
2965     case kX64F32x4Ne: {
2966       ASSEMBLE_SIMD_BINOP(cmpneqps);
2967       break;
2968     }
2969     case kX64F32x4Lt: {
2970       ASSEMBLE_SIMD_BINOP(cmpltps);
2971       break;
2972     }
2973     case kX64F32x4Le: {
2974       ASSEMBLE_SIMD_BINOP(cmpleps);
2975       break;
2976     }
2977     case kX64F32x4Qfma: {
2978       __ F32x4Qfma(i.OutputSimd128Register(), i.InputSimd128Register(0),
2979                    i.InputSimd128Register(1), i.InputSimd128Register(2),
2980                    kScratchDoubleReg);
2981       break;
2982     }
2983     case kX64F32x4Qfms: {
2984       __ F32x4Qfms(i.OutputSimd128Register(), i.InputSimd128Register(0),
2985                    i.InputSimd128Register(1), i.InputSimd128Register(2),
2986                    kScratchDoubleReg);
2987       break;
2988     }
2989     case kX64Minps: {
2990       ASSEMBLE_SIMD_BINOP(minps);
2991       break;
2992     }
2993     case kX64Maxps: {
2994       ASSEMBLE_SIMD_BINOP(maxps);
2995       break;
2996     }
2997     case kX64F32x4Round: {
2998       RoundingMode const mode =
2999           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
3000       __ Roundps(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
3001       break;
3002     }
3003     case kX64F64x2Round: {
3004       RoundingMode const mode =
3005           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
3006       __ Roundpd(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
3007       break;
3008     }
3009     case kX64Minpd: {
3010       ASSEMBLE_SIMD_BINOP(minpd);
3011       break;
3012     }
3013     case kX64Maxpd: {
3014       ASSEMBLE_SIMD_BINOP(maxpd);
3015       break;
3016     }
3017     case kX64I64x2Splat: {
3018       XMMRegister dst = i.OutputSimd128Register();
3019       if (HasRegisterInput(instr, 0)) {
3020         __ Movq(dst, i.InputRegister(0));
3021         __ Movddup(dst, dst);
3022       } else {
3023         __ Movddup(dst, i.InputOperand(0));
3024       }
3025       break;
3026     }
3027     case kX64I64x2ExtractLane: {
3028       __ Pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
3029       break;
3030     }
3031     case kX64I64x2Abs: {
3032       __ I64x2Abs(i.OutputSimd128Register(), i.InputSimd128Register(0),
3033                   kScratchDoubleReg);
3034       break;
3035     }
3036     case kX64I64x2Neg: {
3037       __ I64x2Neg(i.OutputSimd128Register(), i.InputSimd128Register(0),
3038                   kScratchDoubleReg);
3039       break;
3040     }
3041     case kX64I64x2BitMask: {
3042       __ Movmskpd(i.OutputRegister(), i.InputSimd128Register(0));
3043       break;
3044     }
3045     case kX64I64x2Shl: {
3046       // Take shift value modulo 2^6.
3047       ASSEMBLE_SIMD_SHIFT(psllq, 6);
3048       break;
3049     }
3050     case kX64I64x2ShrS: {
3051       // TODO(zhin): there is vpsraq but requires AVX512
3052       XMMRegister dst = i.OutputSimd128Register();
3053       XMMRegister src = i.InputSimd128Register(0);
3054       if (HasImmediateInput(instr, 1)) {
3055         __ I64x2ShrS(dst, src, i.InputInt6(1), kScratchDoubleReg);
3056       } else {
3057         __ I64x2ShrS(dst, src, i.InputRegister(1), kScratchDoubleReg,
3058                      i.TempSimd128Register(0), kScratchRegister);
3059       }
3060       break;
3061     }
3062     case kX64I64x2Add: {
3063       ASSEMBLE_SIMD_BINOP(paddq);
3064       break;
3065     }
3066     case kX64I64x2Sub: {
3067       ASSEMBLE_SIMD_BINOP(psubq);
3068       break;
3069     }
3070     case kX64I64x2Mul: {
3071       __ I64x2Mul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3072                   i.InputSimd128Register(1), i.TempSimd128Register(0),
3073                   kScratchDoubleReg);
3074       break;
3075     }
3076     case kX64I64x2Eq: {
3077       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3078       ASSEMBLE_SIMD_BINOP(pcmpeqq);
3079       break;
3080     }
3081     case kX64I64x2Ne: {
3082       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3083       __ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1));
3084       __ Pcmpeqq(kScratchDoubleReg, kScratchDoubleReg);
3085       __ Pxor(i.OutputSimd128Register(), kScratchDoubleReg);
3086       break;
3087     }
3088     case kX64I64x2GtS: {
3089       __ I64x2GtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
3090                   i.InputSimd128Register(1), kScratchDoubleReg);
3091       break;
3092     }
3093     case kX64I64x2GeS: {
3094       __ I64x2GeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
3095                   i.InputSimd128Register(1), kScratchDoubleReg);
3096       break;
3097     }
3098     case kX64I64x2ShrU: {
3099       // Take shift value modulo 2^6.
3100       ASSEMBLE_SIMD_SHIFT(psrlq, 6);
3101       break;
3102     }
3103     case kX64I64x2ExtMulLowI32x4S: {
3104       __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3105                      i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
3106                      /*is_signed=*/true);
3107       break;
3108     }
3109     case kX64I64x2ExtMulHighI32x4S: {
3110       __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3111                      i.InputSimd128Register(1), kScratchDoubleReg,
3112                      /*low=*/false,
3113                      /*is_signed=*/true);
3114       break;
3115     }
3116     case kX64I64x2ExtMulLowI32x4U: {
3117       __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3118                      i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
3119                      /*is_signed=*/false);
3120       break;
3121     }
3122     case kX64I64x2ExtMulHighI32x4U: {
3123       __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3124                      i.InputSimd128Register(1), kScratchDoubleReg,
3125                      /*low=*/false,
3126                      /*is_signed=*/false);
3127       break;
3128     }
3129     case kX64I64x2SConvertI32x4Low: {
3130       __ Pmovsxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
3131       break;
3132     }
3133     case kX64I64x2SConvertI32x4High: {
3134       __ I64x2SConvertI32x4High(i.OutputSimd128Register(),
3135                                 i.InputSimd128Register(0));
3136       break;
3137     }
3138     case kX64I64x2UConvertI32x4Low: {
3139       __ Pmovzxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
3140       break;
3141     }
3142     case kX64I64x2UConvertI32x4High: {
3143       __ I64x2UConvertI32x4High(i.OutputSimd128Register(),
3144                                 i.InputSimd128Register(0), kScratchDoubleReg);
3145       break;
3146     }
3147     case kX64I32x4Splat: {
3148       XMMRegister dst = i.OutputSimd128Register();
3149       if (HasRegisterInput(instr, 0)) {
3150         __ Movd(dst, i.InputRegister(0));
3151       } else {
3152         // TODO(v8:9198): Pshufd can load from aligned memory once supported.
3153         __ Movd(dst, i.InputOperand(0));
3154       }
3155       __ Pshufd(dst, dst, uint8_t{0x0});
3156       break;
3157     }
3158     case kX64I32x4ExtractLane: {
3159       __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
3160       break;
3161     }
3162     case kX64I32x4SConvertF32x4: {
3163       __ I32x4SConvertF32x4(i.OutputSimd128Register(),
3164                             i.InputSimd128Register(0), kScratchDoubleReg,
3165                             kScratchRegister);
3166       break;
3167     }
3168     case kX64I32x4SConvertI16x8Low: {
3169       __ Pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
3170       break;
3171     }
3172     case kX64I32x4SConvertI16x8High: {
3173       __ I32x4SConvertI16x8High(i.OutputSimd128Register(),
3174                                 i.InputSimd128Register(0));
3175       break;
3176     }
3177     case kX64I32x4Neg: {
3178       XMMRegister dst = i.OutputSimd128Register();
3179       XMMRegister src = i.InputSimd128Register(0);
3180       if (dst == src) {
3181         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3182         __ Psignd(dst, kScratchDoubleReg);
3183       } else {
3184         __ Pxor(dst, dst);
3185         __ Psubd(dst, src);
3186       }
3187       break;
3188     }
3189     case kX64I32x4Shl: {
3190       // Take shift value modulo 2^5.
3191       ASSEMBLE_SIMD_SHIFT(pslld, 5);
3192       break;
3193     }
3194     case kX64I32x4ShrS: {
3195       // Take shift value modulo 2^5.
3196       ASSEMBLE_SIMD_SHIFT(psrad, 5);
3197       break;
3198     }
3199     case kX64I32x4Add: {
3200       ASSEMBLE_SIMD_BINOP(paddd);
3201       break;
3202     }
3203     case kX64I32x4Sub: {
3204       ASSEMBLE_SIMD_BINOP(psubd);
3205       break;
3206     }
3207     case kX64I32x4Mul: {
3208       ASSEMBLE_SIMD_BINOP(pmulld);
3209       break;
3210     }
3211     case kX64I32x4MinS: {
3212       ASSEMBLE_SIMD_BINOP(pminsd);
3213       break;
3214     }
3215     case kX64I32x4MaxS: {
3216       ASSEMBLE_SIMD_BINOP(pmaxsd);
3217       break;
3218     }
3219     case kX64I32x4Eq: {
3220       ASSEMBLE_SIMD_BINOP(pcmpeqd);
3221       break;
3222     }
3223     case kX64I32x4Ne: {
3224       __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
3225       __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3226       __ Pxor(i.OutputSimd128Register(), kScratchDoubleReg);
3227       break;
3228     }
3229     case kX64I32x4GtS: {
3230       ASSEMBLE_SIMD_BINOP(pcmpgtd);
3231       break;
3232     }
3233     case kX64I32x4GeS: {
3234       XMMRegister dst = i.OutputSimd128Register();
3235       XMMRegister src = i.InputSimd128Register(1);
3236       __ Pminsd(dst, src);
3237       __ Pcmpeqd(dst, src);
3238       break;
3239     }
3240     case kX64I32x4UConvertF32x4: {
3241       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3242       XMMRegister dst = i.OutputSimd128Register();
3243       XMMRegister tmp = i.TempSimd128Register(0);
3244       XMMRegister tmp2 = i.TempSimd128Register(1);
3245       // NAN->0, negative->0
3246       __ Pxor(tmp2, tmp2);
3247       __ Maxps(dst, tmp2);
3248       // scratch: float representation of max_signed
3249       __ Pcmpeqd(tmp2, tmp2);
3250       __ Psrld(tmp2, uint8_t{1});  // 0x7fffffff
3251       __ Cvtdq2ps(tmp2, tmp2);     // 0x4f000000
3252       // tmp: convert (src-max_signed).
3253       // Positive overflow lanes -> 0x7FFFFFFF
3254       // Negative lanes -> 0
3255       __ Movaps(tmp, dst);
3256       __ Subps(tmp, tmp2);
3257       __ Cmpleps(tmp2, tmp);
3258       __ Cvttps2dq(tmp, tmp);
3259       __ Pxor(tmp, tmp2);
3260       __ Pxor(tmp2, tmp2);
3261       __ Pmaxsd(tmp, tmp2);
3262       // convert. Overflow lanes above max_signed will be 0x80000000
3263       __ Cvttps2dq(dst, dst);
3264       // Add (src-max_signed) for overflow lanes.
3265       __ Paddd(dst, tmp);
3266       break;
3267     }
3268     case kX64I32x4UConvertI16x8Low: {
3269       __ Pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
3270       break;
3271     }
3272     case kX64I32x4UConvertI16x8High: {
3273       __ I32x4UConvertI16x8High(i.OutputSimd128Register(),
3274                                 i.InputSimd128Register(0), kScratchDoubleReg);
3275       break;
3276     }
3277     case kX64I32x4ShrU: {
3278       // Take shift value modulo 2^5.
3279       ASSEMBLE_SIMD_SHIFT(psrld, 5);
3280       break;
3281     }
3282     case kX64I32x4MinU: {
3283       ASSEMBLE_SIMD_BINOP(pminud);
3284       break;
3285     }
3286     case kX64I32x4MaxU: {
3287       ASSEMBLE_SIMD_BINOP(pmaxud);
3288       break;
3289     }
3290     case kX64I32x4GtU: {
3291       XMMRegister dst = i.OutputSimd128Register();
3292       XMMRegister src = i.InputSimd128Register(1);
3293       __ Pmaxud(dst, src);
3294       __ Pcmpeqd(dst, src);
3295       __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3296       __ Pxor(dst, kScratchDoubleReg);
3297       break;
3298     }
3299     case kX64I32x4GeU: {
3300       XMMRegister dst = i.OutputSimd128Register();
3301       XMMRegister src = i.InputSimd128Register(1);
3302       __ Pminud(dst, src);
3303       __ Pcmpeqd(dst, src);
3304       break;
3305     }
3306     case kX64I32x4Abs: {
3307       __ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
3308       break;
3309     }
3310     case kX64I32x4BitMask: {
3311       __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
3312       break;
3313     }
3314     case kX64I32x4DotI16x8S: {
3315       ASSEMBLE_SIMD_BINOP(pmaddwd);
3316       break;
3317     }
3318     case kX64I32x4ExtAddPairwiseI16x8S: {
3319       __ I32x4ExtAddPairwiseI16x8S(i.OutputSimd128Register(),
3320                                    i.InputSimd128Register(0), kScratchRegister);
3321       break;
3322     }
3323     case kX64I32x4ExtAddPairwiseI16x8U: {
3324       __ I32x4ExtAddPairwiseI16x8U(i.OutputSimd128Register(),
3325                                    i.InputSimd128Register(0),
3326                                    kScratchDoubleReg);
3327       break;
3328     }
3329     case kX64S128Const: {
3330       // Emit code for generic constants as all zeros, or ones cases will be
3331       // handled separately by the selector.
3332       XMMRegister dst = i.OutputSimd128Register();
3333       uint32_t imm[4] = {};
3334       for (int j = 0; j < 4; j++) {
3335         imm[j] = i.InputUint32(j);
3336       }
3337       SetupSimdImmediateInRegister(tasm(), imm, dst);
3338       break;
3339     }
3340     case kX64S128Zero: {
3341       XMMRegister dst = i.OutputSimd128Register();
3342       __ Pxor(dst, dst);
3343       break;
3344     }
3345     case kX64S128AllOnes: {
3346       XMMRegister dst = i.OutputSimd128Register();
3347       __ Pcmpeqd(dst, dst);
3348       break;
3349     }
3350     case kX64I16x8Splat: {
3351       XMMRegister dst = i.OutputSimd128Register();
3352       if (HasRegisterInput(instr, 0)) {
3353         __ I16x8Splat(dst, i.InputRegister(0));
3354       } else {
3355         __ I16x8Splat(dst, i.InputOperand(0));
3356       }
3357       break;
3358     }
3359     case kX64I16x8ExtractLaneS: {
3360       Register dst = i.OutputRegister();
3361       __ Pextrw(dst, i.InputSimd128Register(0), i.InputUint8(1));
3362       __ movsxwl(dst, dst);
3363       break;
3364     }
3365     case kX64I16x8SConvertI8x16Low: {
3366       __ Pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3367       break;
3368     }
3369     case kX64I16x8SConvertI8x16High: {
3370       __ I16x8SConvertI8x16High(i.OutputSimd128Register(),
3371                                 i.InputSimd128Register(0));
3372       break;
3373     }
3374     case kX64I16x8Neg: {
3375       XMMRegister dst = i.OutputSimd128Register();
3376       XMMRegister src = i.InputSimd128Register(0);
3377       if (dst == src) {
3378         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3379         __ Psignw(dst, kScratchDoubleReg);
3380       } else {
3381         __ Pxor(dst, dst);
3382         __ Psubw(dst, src);
3383       }
3384       break;
3385     }
3386     case kX64I16x8Shl: {
3387       // Take shift value modulo 2^4.
3388       ASSEMBLE_SIMD_SHIFT(psllw, 4);
3389       break;
3390     }
3391     case kX64I16x8ShrS: {
3392       // Take shift value modulo 2^4.
3393       ASSEMBLE_SIMD_SHIFT(psraw, 4);
3394       break;
3395     }
3396     case kX64I16x8SConvertI32x4: {
3397       ASSEMBLE_SIMD_BINOP(packssdw);
3398       break;
3399     }
3400     case kX64I16x8Add: {
3401       ASSEMBLE_SIMD_BINOP(paddw);
3402       break;
3403     }
3404     case kX64I16x8AddSatS: {
3405       ASSEMBLE_SIMD_BINOP(paddsw);
3406       break;
3407     }
3408     case kX64I16x8Sub: {
3409       ASSEMBLE_SIMD_BINOP(psubw);
3410       break;
3411     }
3412     case kX64I16x8SubSatS: {
3413       ASSEMBLE_SIMD_BINOP(psubsw);
3414       break;
3415     }
3416     case kX64I16x8Mul: {
3417       ASSEMBLE_SIMD_BINOP(pmullw);
3418       break;
3419     }
3420     case kX64I16x8MinS: {
3421       ASSEMBLE_SIMD_BINOP(pminsw);
3422       break;
3423     }
3424     case kX64I16x8MaxS: {
3425       ASSEMBLE_SIMD_BINOP(pmaxsw);
3426       break;
3427     }
3428     case kX64I16x8Eq: {
3429       ASSEMBLE_SIMD_BINOP(pcmpeqw);
3430       break;
3431     }
3432     case kX64I16x8Ne: {
3433       XMMRegister dst = i.OutputSimd128Register();
3434       __ Pcmpeqw(dst, i.InputSimd128Register(1));
3435       __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3436       __ Pxor(dst, kScratchDoubleReg);
3437       break;
3438     }
3439     case kX64I16x8GtS: {
3440       ASSEMBLE_SIMD_BINOP(pcmpgtw);
3441       break;
3442     }
3443     case kX64I16x8GeS: {
3444       XMMRegister dst = i.OutputSimd128Register();
3445       XMMRegister src = i.InputSimd128Register(1);
3446       __ Pminsw(dst, src);
3447       __ Pcmpeqw(dst, src);
3448       break;
3449     }
3450     case kX64I16x8UConvertI8x16Low: {
3451       __ Pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3452       break;
3453     }
3454     case kX64I16x8UConvertI8x16High: {
3455       __ I16x8UConvertI8x16High(i.OutputSimd128Register(),
3456                                 i.InputSimd128Register(0), kScratchDoubleReg);
3457       break;
3458     }
3459     case kX64I16x8ShrU: {
3460       // Take shift value modulo 2^4.
3461       ASSEMBLE_SIMD_SHIFT(psrlw, 4);
3462       break;
3463     }
3464     case kX64I16x8UConvertI32x4: {
3465       ASSEMBLE_SIMD_BINOP(packusdw);
3466       break;
3467     }
3468     case kX64I16x8AddSatU: {
3469       ASSEMBLE_SIMD_BINOP(paddusw);
3470       break;
3471     }
3472     case kX64I16x8SubSatU: {
3473       ASSEMBLE_SIMD_BINOP(psubusw);
3474       break;
3475     }
3476     case kX64I16x8MinU: {
3477       ASSEMBLE_SIMD_BINOP(pminuw);
3478       break;
3479     }
3480     case kX64I16x8MaxU: {
3481       ASSEMBLE_SIMD_BINOP(pmaxuw);
3482       break;
3483     }
3484     case kX64I16x8GtU: {
3485       XMMRegister dst = i.OutputSimd128Register();
3486       XMMRegister src = i.InputSimd128Register(1);
3487       __ Pmaxuw(dst, src);
3488       __ Pcmpeqw(dst, src);
3489       __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3490       __ Pxor(dst, kScratchDoubleReg);
3491       break;
3492     }
3493     case kX64I16x8GeU: {
3494       XMMRegister dst = i.OutputSimd128Register();
3495       XMMRegister src = i.InputSimd128Register(1);
3496       __ Pminuw(dst, src);
3497       __ Pcmpeqw(dst, src);
3498       break;
3499     }
3500     case kX64I16x8RoundingAverageU: {
3501       ASSEMBLE_SIMD_BINOP(pavgw);
3502       break;
3503     }
3504     case kX64I16x8Abs: {
3505       __ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3506       break;
3507     }
3508     case kX64I16x8BitMask: {
3509       Register dst = i.OutputRegister();
3510       __ Packsswb(kScratchDoubleReg, i.InputSimd128Register(0));
3511       __ Pmovmskb(dst, kScratchDoubleReg);
3512       __ shrq(dst, Immediate(8));
3513       break;
3514     }
3515     case kX64I16x8ExtMulLowI8x16S: {
3516       __ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
3517                         i.InputSimd128Register(1), kScratchDoubleReg,
3518                         /*is_signed=*/true);
3519       break;
3520     }
3521     case kX64I16x8ExtMulHighI8x16S: {
3522       __ I16x8ExtMulHighS(i.OutputSimd128Register(), i.InputSimd128Register(0),
3523                           i.InputSimd128Register(1), kScratchDoubleReg);
3524       break;
3525     }
3526     case kX64I16x8ExtMulLowI8x16U: {
3527       __ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
3528                         i.InputSimd128Register(1), kScratchDoubleReg,
3529                         /*is_signed=*/false);
3530       break;
3531     }
3532     case kX64I16x8ExtMulHighI8x16U: {
3533       __ I16x8ExtMulHighU(i.OutputSimd128Register(), i.InputSimd128Register(0),
3534                           i.InputSimd128Register(1), kScratchDoubleReg);
3535       break;
3536     }
3537     case kX64I16x8ExtAddPairwiseI8x16S: {
3538       __ I16x8ExtAddPairwiseI8x16S(i.OutputSimd128Register(),
3539                                    i.InputSimd128Register(0), kScratchDoubleReg,
3540                                    kScratchRegister);
3541       break;
3542     }
3543     case kX64I16x8ExtAddPairwiseI8x16U: {
3544       __ I16x8ExtAddPairwiseI8x16U(i.OutputSimd128Register(),
3545                                    i.InputSimd128Register(0), kScratchRegister);
3546       break;
3547     }
3548     case kX64I16x8Q15MulRSatS: {
3549       __ I16x8Q15MulRSatS(i.OutputSimd128Register(), i.InputSimd128Register(0),
3550                           i.InputSimd128Register(1), kScratchDoubleReg);
3551       break;
3552     }
3553     case kX64I8x16Splat: {
3554       XMMRegister dst = i.OutputSimd128Register();
3555       if (HasRegisterInput(instr, 0)) {
3556         __ I8x16Splat(dst, i.InputRegister(0), kScratchDoubleReg);
3557       } else {
3558         __ I8x16Splat(dst, i.InputOperand(0), kScratchDoubleReg);
3559       }
3560       break;
3561     }
3562     case kX64Pextrb: {
3563       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3564       size_t index = 0;
3565       if (HasAddressingMode(instr)) {
3566         Operand operand = i.MemoryOperand(&index);
3567         __ Pextrb(operand, i.InputSimd128Register(index),
3568                   i.InputUint8(index + 1));
3569       } else {
3570         __ Pextrb(i.OutputRegister(), i.InputSimd128Register(0),
3571                   i.InputUint8(1));
3572       }
3573       break;
3574     }
3575     case kX64Pextrw: {
3576       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3577       size_t index = 0;
3578       if (HasAddressingMode(instr)) {
3579         Operand operand = i.MemoryOperand(&index);
3580         __ Pextrw(operand, i.InputSimd128Register(index),
3581                   i.InputUint8(index + 1));
3582       } else {
3583         __ Pextrw(i.OutputRegister(), i.InputSimd128Register(0),
3584                   i.InputUint8(1));
3585       }
3586       break;
3587     }
3588     case kX64I8x16ExtractLaneS: {
3589       Register dst = i.OutputRegister();
3590       __ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
3591       __ movsxbl(dst, dst);
3592       break;
3593     }
3594     case kX64Pinsrb: {
3595       ASSEMBLE_PINSR(Pinsrb);
3596       break;
3597     }
3598     case kX64Pinsrw: {
3599       ASSEMBLE_PINSR(Pinsrw);
3600       break;
3601     }
3602     case kX64Pinsrd: {
3603       ASSEMBLE_PINSR(Pinsrd);
3604       break;
3605     }
3606     case kX64Pinsrq: {
3607       ASSEMBLE_PINSR(Pinsrq);
3608       break;
3609     }
3610     case kX64I8x16SConvertI16x8: {
3611       ASSEMBLE_SIMD_BINOP(packsswb);
3612       break;
3613     }
3614     case kX64I8x16Neg: {
3615       XMMRegister dst = i.OutputSimd128Register();
3616       XMMRegister src = i.InputSimd128Register(0);
3617       if (dst == src) {
3618         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3619         __ Psignb(dst, kScratchDoubleReg);
3620       } else {
3621         __ Pxor(dst, dst);
3622         __ Psubb(dst, src);
3623       }
3624       break;
3625     }
3626     case kX64I8x16Shl: {
3627       XMMRegister dst = i.OutputSimd128Register();
3628       XMMRegister src = i.InputSimd128Register(0);
3629       DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
3630       if (HasImmediateInput(instr, 1)) {
3631         __ I8x16Shl(dst, src, i.InputInt3(1), kScratchRegister,
3632                     kScratchDoubleReg);
3633       } else {
3634         __ I8x16Shl(dst, src, i.InputRegister(1), kScratchRegister,
3635                     kScratchDoubleReg, i.TempSimd128Register(0));
3636       }
3637       break;
3638     }
3639     case kX64I8x16ShrS: {
3640       XMMRegister dst = i.OutputSimd128Register();
3641       XMMRegister src = i.InputSimd128Register(0);
3642       DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
3643       if (HasImmediateInput(instr, 1)) {
3644         __ I8x16ShrS(dst, src, i.InputInt3(1), kScratchDoubleReg);
3645       } else {
3646         __ I8x16ShrS(dst, src, i.InputRegister(1), kScratchRegister,
3647                      kScratchDoubleReg, i.TempSimd128Register(0));
3648       }
3649       break;
3650     }
3651     case kX64I8x16Add: {
3652       ASSEMBLE_SIMD_BINOP(paddb);
3653       break;
3654     }
3655     case kX64I8x16AddSatS: {
3656       ASSEMBLE_SIMD_BINOP(paddsb);
3657       break;
3658     }
3659     case kX64I8x16Sub: {
3660       ASSEMBLE_SIMD_BINOP(psubb);
3661       break;
3662     }
3663     case kX64I8x16SubSatS: {
3664       ASSEMBLE_SIMD_BINOP(psubsb);
3665       break;
3666     }
3667     case kX64I8x16MinS: {
3668       ASSEMBLE_SIMD_BINOP(pminsb);
3669       break;
3670     }
3671     case kX64I8x16MaxS: {
3672       ASSEMBLE_SIMD_BINOP(pmaxsb);
3673       break;
3674     }
3675     case kX64I8x16Eq: {
3676       ASSEMBLE_SIMD_BINOP(pcmpeqb);
3677       break;
3678     }
3679     case kX64I8x16Ne: {
3680       XMMRegister dst = i.OutputSimd128Register();
3681       __ Pcmpeqb(dst, i.InputSimd128Register(1));
3682       __ Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3683       __ Pxor(dst, kScratchDoubleReg);
3684       break;
3685     }
3686     case kX64I8x16GtS: {
3687       ASSEMBLE_SIMD_BINOP(pcmpgtb);
3688       break;
3689     }
3690     case kX64I8x16GeS: {
3691       XMMRegister dst = i.OutputSimd128Register();
3692       XMMRegister src = i.InputSimd128Register(1);
3693       __ Pminsb(dst, src);
3694       __ Pcmpeqb(dst, src);
3695       break;
3696     }
3697     case kX64I8x16UConvertI16x8: {
3698       ASSEMBLE_SIMD_BINOP(packuswb);
3699       break;
3700     }
3701     case kX64I8x16ShrU: {
3702       XMMRegister dst = i.OutputSimd128Register();
3703       XMMRegister src = i.InputSimd128Register(0);
3704       DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
3705       if (HasImmediateInput(instr, 1)) {
3706         __ I8x16ShrU(dst, src, i.InputInt3(1), kScratchRegister,
3707                      kScratchDoubleReg);
3708       } else {
3709         __ I8x16ShrU(dst, src, i.InputRegister(1), kScratchRegister,
3710                      kScratchDoubleReg, i.TempSimd128Register(0));
3711       }
3712       break;
3713     }
3714     case kX64I8x16AddSatU: {
3715       ASSEMBLE_SIMD_BINOP(paddusb);
3716       break;
3717     }
3718     case kX64I8x16SubSatU: {
3719       ASSEMBLE_SIMD_BINOP(psubusb);
3720       break;
3721     }
3722     case kX64I8x16MinU: {
3723       ASSEMBLE_SIMD_BINOP(pminub);
3724       break;
3725     }
3726     case kX64I8x16MaxU: {
3727       ASSEMBLE_SIMD_BINOP(pmaxub);
3728       break;
3729     }
3730     case kX64I8x16GtU: {
3731       XMMRegister dst = i.OutputSimd128Register();
3732       XMMRegister src = i.InputSimd128Register(1);
3733       __ Pmaxub(dst, src);
3734       __ Pcmpeqb(dst, src);
3735       __ Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3736       __ Pxor(dst, kScratchDoubleReg);
3737       break;
3738     }
3739     case kX64I8x16GeU: {
3740       XMMRegister dst = i.OutputSimd128Register();
3741       XMMRegister src = i.InputSimd128Register(1);
3742       __ Pminub(dst, src);
3743       __ Pcmpeqb(dst, src);
3744       break;
3745     }
3746     case kX64I8x16RoundingAverageU: {
3747       ASSEMBLE_SIMD_BINOP(pavgb);
3748       break;
3749     }
3750     case kX64I8x16Abs: {
3751       __ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
3752       break;
3753     }
3754     case kX64I8x16BitMask: {
3755       __ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0));
3756       break;
3757     }
3758     case kX64I32x4ExtMulLowI16x8S: {
3759       __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3760                      i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
3761                      /*is_signed=*/true);
3762       break;
3763     }
3764     case kX64I32x4ExtMulHighI16x8S: {
3765       __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3766                      i.InputSimd128Register(1), kScratchDoubleReg,
3767                      /*low=*/false,
3768                      /*is_signed=*/true);
3769       break;
3770     }
3771     case kX64I32x4ExtMulLowI16x8U: {
3772       __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3773                      i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
3774                      /*is_signed=*/false);
3775       break;
3776     }
3777     case kX64I32x4ExtMulHighI16x8U: {
3778       __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3779                      i.InputSimd128Register(1), kScratchDoubleReg,
3780                      /*low=*/false,
3781                      /*is_signed=*/false);
3782       break;
3783     }
3784     case kX64S128And: {
3785       ASSEMBLE_SIMD_BINOP(pand);
3786       break;
3787     }
3788     case kX64S128Or: {
3789       ASSEMBLE_SIMD_BINOP(por);
3790       break;
3791     }
3792     case kX64S128Xor: {
3793       ASSEMBLE_SIMD_BINOP(pxor);
3794       break;
3795     }
3796     case kX64S128Not: {
3797       __ S128Not(i.OutputSimd128Register(), i.InputSimd128Register(0),
3798                  kScratchDoubleReg);
3799       break;
3800     }
3801     case kX64S128Select: {
3802       __ S128Select(i.OutputSimd128Register(), i.InputSimd128Register(0),
3803                     i.InputSimd128Register(1), i.InputSimd128Register(2),
3804                     kScratchDoubleReg);
3805       break;
3806     }
3807     case kX64S128AndNot: {
3808       XMMRegister dst = i.OutputSimd128Register();
3809       DCHECK_EQ(dst, i.InputSimd128Register(0));
3810       // The inputs have been inverted by instruction selector, so we can call
3811       // andnps here without any modifications.
3812       __ Andnps(dst, i.InputSimd128Register(1));
3813       break;
3814     }
3815     case kX64I8x16Swizzle: {
3816       __ I8x16Swizzle(i.OutputSimd128Register(), i.InputSimd128Register(0),
3817                       i.InputSimd128Register(1), kScratchDoubleReg,
3818                       kScratchRegister, MiscField::decode(instr->opcode()));
3819       break;
3820     }
3821     case kX64I8x16Shuffle: {
3822       XMMRegister dst = i.OutputSimd128Register();
3823       XMMRegister tmp_simd = i.TempSimd128Register(0);
3824       DCHECK_NE(tmp_simd, i.InputSimd128Register(0));
3825       if (instr->InputCount() == 5) {  // only one input operand
3826         uint32_t mask[4] = {};
3827         DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3828         for (int j = 4; j > 0; j--) {
3829           mask[j - 1] = i.InputUint32(j);
3830         }
3831 
3832         SetupSimdImmediateInRegister(tasm(), mask, tmp_simd);
3833         __ Pshufb(dst, tmp_simd);
3834       } else {  // two input operands
3835         DCHECK_NE(tmp_simd, i.InputSimd128Register(1));
3836         DCHECK_EQ(6, instr->InputCount());
3837         ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 0);
3838         uint32_t mask1[4] = {};
3839         for (int j = 5; j > 1; j--) {
3840           uint32_t lanes = i.InputUint32(j);
3841           for (int k = 0; k < 32; k += 8) {
3842             uint8_t lane = lanes >> k;
3843             mask1[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
3844           }
3845         }
3846         SetupSimdImmediateInRegister(tasm(), mask1, tmp_simd);
3847         __ Pshufb(kScratchDoubleReg, tmp_simd);
3848         uint32_t mask2[4] = {};
3849         if (instr->InputAt(1)->IsSimd128Register()) {
3850           XMMRegister src1 = i.InputSimd128Register(1);
3851           if (src1 != dst) __ Movdqa(dst, src1);
3852         } else {
3853           __ Movdqu(dst, i.InputOperand(1));
3854         }
3855         for (int j = 5; j > 1; j--) {
3856           uint32_t lanes = i.InputUint32(j);
3857           for (int k = 0; k < 32; k += 8) {
3858             uint8_t lane = lanes >> k;
3859             mask2[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
3860           }
3861         }
3862         SetupSimdImmediateInRegister(tasm(), mask2, tmp_simd);
3863         __ Pshufb(dst, tmp_simd);
3864         __ Por(dst, kScratchDoubleReg);
3865       }
3866       break;
3867     }
3868     case kX64I8x16Popcnt: {
3869       __ I8x16Popcnt(i.OutputSimd128Register(), i.InputSimd128Register(0),
3870                      i.TempSimd128Register(0), kScratchDoubleReg,
3871                      kScratchRegister);
3872       break;
3873     }
3874     case kX64S128Load8Splat: {
3875       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3876       __ S128Load8Splat(i.OutputSimd128Register(), i.MemoryOperand(),
3877                         kScratchDoubleReg);
3878       break;
3879     }
3880     case kX64S128Load16Splat: {
3881       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3882       __ S128Load16Splat(i.OutputSimd128Register(), i.MemoryOperand(),
3883                          kScratchDoubleReg);
3884       break;
3885     }
3886     case kX64S128Load32Splat: {
3887       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3888       __ S128Load32Splat(i.OutputSimd128Register(), i.MemoryOperand());
3889       break;
3890     }
3891     case kX64S128Load64Splat: {
3892       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3893       __ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
3894       break;
3895     }
3896     case kX64S128Load8x8S: {
3897       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3898       __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
3899       break;
3900     }
3901     case kX64S128Load8x8U: {
3902       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3903       __ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
3904       break;
3905     }
3906     case kX64S128Load16x4S: {
3907       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3908       __ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
3909       break;
3910     }
3911     case kX64S128Load16x4U: {
3912       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3913       __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
3914       break;
3915     }
3916     case kX64S128Load32x2S: {
3917       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3918       __ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
3919       break;
3920     }
3921     case kX64S128Load32x2U: {
3922       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3923       __ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
3924       break;
3925     }
3926     case kX64S128Store32Lane: {
3927       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3928       size_t index = 0;
3929       Operand operand = i.MemoryOperand(&index);
3930       uint8_t lane = i.InputUint8(index + 1);
3931       __ S128Store32Lane(operand, i.InputSimd128Register(index), lane);
3932       break;
3933     }
3934     case kX64S128Store64Lane: {
3935       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3936       size_t index = 0;
3937       Operand operand = i.MemoryOperand(&index);
3938       uint8_t lane = i.InputUint8(index + 1);
3939       __ S128Store64Lane(operand, i.InputSimd128Register(index), lane);
3940       break;
3941     }
3942     case kX64Shufps: {
3943       __ Shufps(i.OutputSimd128Register(), i.InputSimd128Register(0),
3944                 i.InputSimd128Register(1), i.InputUint8(2));
3945       break;
3946     }
3947     case kX64S32x4Rotate: {
3948       XMMRegister dst = i.OutputSimd128Register();
3949       XMMRegister src = i.InputSimd128Register(0);
3950       uint8_t mask = i.InputUint8(1);
3951       if (dst == src) {
3952         // 1-byte shorter encoding than pshufd.
3953         __ Shufps(dst, src, src, mask);
3954       } else {
3955         __ Pshufd(dst, src, mask);
3956       }
3957       break;
3958     }
3959     case kX64S32x4Swizzle: {
3960       DCHECK_EQ(2, instr->InputCount());
3961       ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0,
3962                               i.InputUint8(1));
3963       break;
3964     }
3965     case kX64S32x4Shuffle: {
3966       DCHECK_EQ(4, instr->InputCount());  // Swizzles should be handled above.
3967       uint8_t shuffle = i.InputUint8(2);
3968       DCHECK_NE(0xe4, shuffle);  // A simple blend should be handled below.
3969       ASSEMBLE_SIMD_IMM_INSTR(Pshufd, kScratchDoubleReg, 1, shuffle);
3970       ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0, shuffle);
3971       __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputUint8(3));
3972       break;
3973     }
3974     case kX64S16x8Blend: {
3975       ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, i.InputUint8(2));
3976       break;
3977     }
3978     case kX64S16x8HalfShuffle1: {
3979       XMMRegister dst = i.OutputSimd128Register();
3980       uint8_t mask_lo = i.InputUint8(1);
3981       uint8_t mask_hi = i.InputUint8(2);
3982       if (mask_lo != 0xe4) {
3983         ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, mask_lo);
3984         if (mask_hi != 0xe4) __ Pshufhw(dst, dst, mask_hi);
3985       } else {
3986         DCHECK_NE(mask_hi, 0xe4);
3987         ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, mask_hi);
3988       }
3989       break;
3990     }
3991     case kX64S16x8HalfShuffle2: {
3992       XMMRegister dst = i.OutputSimd128Register();
3993       ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, kScratchDoubleReg, 1, i.InputUint8(2));
3994       __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputUint8(3));
3995       ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(2));
3996       __ Pshufhw(dst, dst, i.InputUint8(3));
3997       __ Pblendw(dst, kScratchDoubleReg, i.InputUint8(4));
3998       break;
3999     }
4000     case kX64S8x16Alignr: {
4001       ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, i.InputUint8(2));
4002       break;
4003     }
4004     case kX64S16x8Dup: {
4005       XMMRegister dst = i.OutputSimd128Register();
4006       uint8_t lane = i.InputInt8(1) & 0x7;
4007       uint8_t lane4 = lane & 0x3;
4008       uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
4009       if (lane < 4) {
4010         ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, half_dup);
4011         __ Punpcklqdq(dst, dst);
4012       } else {
4013         ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, half_dup);
4014         __ Punpckhqdq(dst, dst);
4015       }
4016       break;
4017     }
4018     case kX64S8x16Dup: {
4019       XMMRegister dst = i.OutputSimd128Register();
4020       uint8_t lane = i.InputInt8(1) & 0xf;
4021       DCHECK_EQ(dst, i.InputSimd128Register(0));
4022       if (lane < 8) {
4023         __ Punpcklbw(dst, dst);
4024       } else {
4025         __ Punpckhbw(dst, dst);
4026       }
4027       lane &= 0x7;
4028       uint8_t lane4 = lane & 0x3;
4029       uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
4030       if (lane < 4) {
4031         __ Pshuflw(dst, dst, half_dup);
4032         __ Punpcklqdq(dst, dst);
4033       } else {
4034         __ Pshufhw(dst, dst, half_dup);
4035         __ Punpckhqdq(dst, dst);
4036       }
4037       break;
4038     }
4039     case kX64S64x2UnpackHigh:
4040       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
4041       break;
4042     case kX64S32x4UnpackHigh:
4043       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
4044       break;
4045     case kX64S16x8UnpackHigh:
4046       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
4047       break;
4048     case kX64S8x16UnpackHigh:
4049       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
4050       break;
4051     case kX64S64x2UnpackLow:
4052       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
4053       break;
4054     case kX64S32x4UnpackLow:
4055       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
4056       break;
4057     case kX64S16x8UnpackLow:
4058       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
4059       break;
4060     case kX64S8x16UnpackLow:
4061       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
4062       break;
4063     case kX64S16x8UnzipHigh: {
4064       XMMRegister dst = i.OutputSimd128Register();
4065       XMMRegister src2 = dst;
4066       DCHECK_EQ(dst, i.InputSimd128Register(0));
4067       if (instr->InputCount() == 2) {
4068         ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
4069         __ Psrld(kScratchDoubleReg, byte{16});
4070         src2 = kScratchDoubleReg;
4071       }
4072       __ Psrld(dst, byte{16});
4073       __ Packusdw(dst, src2);
4074       break;
4075     }
4076     case kX64S16x8UnzipLow: {
4077       XMMRegister dst = i.OutputSimd128Register();
4078       XMMRegister src2 = dst;
4079       DCHECK_EQ(dst, i.InputSimd128Register(0));
4080       __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
4081       if (instr->InputCount() == 2) {
4082         ASSEMBLE_SIMD_IMM_INSTR(Pblendw, kScratchDoubleReg, 1, uint8_t{0x55});
4083         src2 = kScratchDoubleReg;
4084       }
4085       __ Pblendw(dst, kScratchDoubleReg, uint8_t{0xaa});
4086       __ Packusdw(dst, src2);
4087       break;
4088     }
4089     case kX64S8x16UnzipHigh: {
4090       XMMRegister dst = i.OutputSimd128Register();
4091       XMMRegister src2 = dst;
4092       DCHECK_EQ(dst, i.InputSimd128Register(0));
4093       if (instr->InputCount() == 2) {
4094         ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
4095         __ Psrlw(kScratchDoubleReg, byte{8});
4096         src2 = kScratchDoubleReg;
4097       }
4098       __ Psrlw(dst, byte{8});
4099       __ Packuswb(dst, src2);
4100       break;
4101     }
4102     case kX64S8x16UnzipLow: {
4103       XMMRegister dst = i.OutputSimd128Register();
4104       XMMRegister src2 = dst;
4105       DCHECK_EQ(dst, i.InputSimd128Register(0));
4106       if (instr->InputCount() == 2) {
4107         ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
4108         __ Psllw(kScratchDoubleReg, byte{8});
4109         __ Psrlw(kScratchDoubleReg, byte{8});
4110         src2 = kScratchDoubleReg;
4111       }
4112       __ Psllw(dst, byte{8});
4113       __ Psrlw(dst, byte{8});
4114       __ Packuswb(dst, src2);
4115       break;
4116     }
4117     case kX64S8x16TransposeLow: {
4118       XMMRegister dst = i.OutputSimd128Register();
4119       DCHECK_EQ(dst, i.InputSimd128Register(0));
4120       __ Psllw(dst, byte{8});
4121       if (instr->InputCount() == 1) {
4122         __ Movdqa(kScratchDoubleReg, dst);
4123       } else {
4124         DCHECK_EQ(2, instr->InputCount());
4125         ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
4126         __ Psllw(kScratchDoubleReg, byte{8});
4127       }
4128       __ Psrlw(dst, byte{8});
4129       __ Por(dst, kScratchDoubleReg);
4130       break;
4131     }
4132     case kX64S8x16TransposeHigh: {
4133       XMMRegister dst = i.OutputSimd128Register();
4134       DCHECK_EQ(dst, i.InputSimd128Register(0));
4135       __ Psrlw(dst, byte{8});
4136       if (instr->InputCount() == 1) {
4137         __ Movdqa(kScratchDoubleReg, dst);
4138       } else {
4139         DCHECK_EQ(2, instr->InputCount());
4140         ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
4141         __ Psrlw(kScratchDoubleReg, byte{8});
4142       }
4143       __ Psllw(kScratchDoubleReg, byte{8});
4144       __ Por(dst, kScratchDoubleReg);
4145       break;
4146     }
4147     case kX64S8x8Reverse:
4148     case kX64S8x4Reverse:
4149     case kX64S8x2Reverse: {
4150       DCHECK_EQ(1, instr->InputCount());
4151       XMMRegister dst = i.OutputSimd128Register();
4152       DCHECK_EQ(dst, i.InputSimd128Register(0));
4153       if (arch_opcode != kX64S8x2Reverse) {
4154         // First shuffle words into position.
4155         uint8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
4156         __ Pshuflw(dst, dst, shuffle_mask);
4157         __ Pshufhw(dst, dst, shuffle_mask);
4158       }
4159       __ Movdqa(kScratchDoubleReg, dst);
4160       __ Psrlw(kScratchDoubleReg, byte{8});
4161       __ Psllw(dst, byte{8});
4162       __ Por(dst, kScratchDoubleReg);
4163       break;
4164     }
4165     case kX64V128AnyTrue: {
4166       Register dst = i.OutputRegister();
4167       XMMRegister src = i.InputSimd128Register(0);
4168 
4169       __ xorq(dst, dst);
4170       __ Ptest(src, src);
4171       __ setcc(not_equal, dst);
4172       break;
4173     }
4174     // Need to split up all the different lane structures because the
4175     // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
4176     // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
4177     // respectively.
4178     case kX64I64x2AllTrue: {
4179       ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqq);
4180       break;
4181     }
4182     case kX64I32x4AllTrue: {
4183       ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
4184       break;
4185     }
4186     case kX64I16x8AllTrue: {
4187       ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqw);
4188       break;
4189     }
4190     case kX64I8x16AllTrue: {
4191       ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqb);
4192       break;
4193     }
4194     case kX64Pblendvb: {
4195       __ Pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
4196                   i.InputSimd128Register(1), i.InputSimd128Register(2));
4197       break;
4198     }
4199     case kX64I32x4TruncF64x2UZero: {
4200       __ I32x4TruncF64x2UZero(i.OutputSimd128Register(),
4201                               i.InputSimd128Register(0), kScratchRegister,
4202                               kScratchDoubleReg);
4203       break;
4204     }
4205     case kX64I32x4TruncF32x4U: {
4206       __ I32x4TruncF32x4U(i.OutputSimd128Register(), i.InputSimd128Register(0),
4207                           kScratchRegister, kScratchDoubleReg);
4208       break;
4209     }
4210     case kX64Cvttps2dq: {
4211       __ Cvttps2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
4212       break;
4213     }
4214     case kX64Cvttpd2dq: {
4215       __ Cvttpd2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
4216       break;
4217     }
4218     case kAtomicStoreWord8: {
4219       ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord8);
4220       break;
4221     }
4222     case kAtomicStoreWord16: {
4223       ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord16);
4224       break;
4225     }
4226     case kAtomicStoreWord32: {
4227       ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord32);
4228       break;
4229     }
4230     case kX64Word64AtomicStoreWord64: {
4231       ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord64);
4232       break;
4233     }
4234     case kAtomicExchangeInt8: {
4235       DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
4236       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
4237       __ movsxbl(i.InputRegister(0), i.InputRegister(0));
4238       break;
4239     }
4240     case kAtomicExchangeUint8: {
4241       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
4242       switch (AtomicWidthField::decode(opcode)) {
4243         case AtomicWidth::kWord32:
4244           __ movzxbl(i.InputRegister(0), i.InputRegister(0));
4245           break;
4246         case AtomicWidth::kWord64:
4247           __ movzxbq(i.InputRegister(0), i.InputRegister(0));
4248           break;
4249       }
4250       break;
4251     }
4252     case kAtomicExchangeInt16: {
4253       DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
4254       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
4255       __ movsxwl(i.InputRegister(0), i.InputRegister(0));
4256       break;
4257     }
4258     case kAtomicExchangeUint16: {
4259       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
4260       switch (AtomicWidthField::decode(opcode)) {
4261         case AtomicWidth::kWord32:
4262           __ movzxwl(i.InputRegister(0), i.InputRegister(0));
4263           break;
4264         case AtomicWidth::kWord64:
4265           __ movzxwq(i.InputRegister(0), i.InputRegister(0));
4266           break;
4267       }
4268       break;
4269     }
4270     case kAtomicExchangeWord32: {
4271       __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
4272       break;
4273     }
4274     case kAtomicCompareExchangeInt8: {
4275       DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
4276       __ lock();
4277       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
4278       __ movsxbl(rax, rax);
4279       break;
4280     }
4281     case kAtomicCompareExchangeUint8: {
4282       __ lock();
4283       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
4284       switch (AtomicWidthField::decode(opcode)) {
4285         case AtomicWidth::kWord32:
4286           __ movzxbl(rax, rax);
4287           break;
4288         case AtomicWidth::kWord64:
4289           __ movzxbq(rax, rax);
4290           break;
4291       }
4292       break;
4293     }
4294     case kAtomicCompareExchangeInt16: {
4295       DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
4296       __ lock();
4297       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
4298       __ movsxwl(rax, rax);
4299       break;
4300     }
4301     case kAtomicCompareExchangeUint16: {
4302       __ lock();
4303       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
4304       switch (AtomicWidthField::decode(opcode)) {
4305         case AtomicWidth::kWord32:
4306           __ movzxwl(rax, rax);
4307           break;
4308         case AtomicWidth::kWord64:
4309           __ movzxwq(rax, rax);
4310           break;
4311       }
4312       break;
4313     }
4314     case kAtomicCompareExchangeWord32: {
4315       __ lock();
4316       __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
4317       if (AtomicWidthField::decode(opcode) == AtomicWidth::kWord64) {
4318         // Zero-extend the 32 bit value to 64 bit.
4319         __ movl(rax, rax);
4320       }
4321       break;
4322     }
4323     case kX64Word64AtomicExchangeUint64: {
4324       __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
4325       break;
4326     }
4327     case kX64Word64AtomicCompareExchangeUint64: {
4328       __ lock();
4329       __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
4330       break;
4331     }
4332 #define ATOMIC_BINOP_CASE(op, inst32, inst64)                          \
4333   case kAtomic##op##Int8:                                              \
4334     DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32); \
4335     ASSEMBLE_ATOMIC_BINOP(inst32, movb, cmpxchgb);                     \
4336     __ movsxbl(rax, rax);                                              \
4337     break;                                                             \
4338   case kAtomic##op##Uint8:                                             \
4339     switch (AtomicWidthField::decode(opcode)) {                        \
4340       case AtomicWidth::kWord32:                                       \
4341         ASSEMBLE_ATOMIC_BINOP(inst32, movb, cmpxchgb);                 \
4342         __ movzxbl(rax, rax);                                          \
4343         break;                                                         \
4344       case AtomicWidth::kWord64:                                       \
4345         ASSEMBLE_ATOMIC64_BINOP(inst64, movb, cmpxchgb);               \
4346         __ movzxbq(rax, rax);                                          \
4347         break;                                                         \
4348     }                                                                  \
4349     break;                                                             \
4350   case kAtomic##op##Int16:                                             \
4351     DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32); \
4352     ASSEMBLE_ATOMIC_BINOP(inst32, movw, cmpxchgw);                     \
4353     __ movsxwl(rax, rax);                                              \
4354     break;                                                             \
4355   case kAtomic##op##Uint16:                                            \
4356     switch (AtomicWidthField::decode(opcode)) {                        \
4357       case AtomicWidth::kWord32:                                       \
4358         ASSEMBLE_ATOMIC_BINOP(inst32, movw, cmpxchgw);                 \
4359         __ movzxwl(rax, rax);                                          \
4360         break;                                                         \
4361       case AtomicWidth::kWord64:                                       \
4362         ASSEMBLE_ATOMIC64_BINOP(inst64, movw, cmpxchgw);               \
4363         __ movzxwq(rax, rax);                                          \
4364         break;                                                         \
4365     }                                                                  \
4366     break;                                                             \
4367   case kAtomic##op##Word32:                                            \
4368     switch (AtomicWidthField::decode(opcode)) {                        \
4369       case AtomicWidth::kWord32:                                       \
4370         ASSEMBLE_ATOMIC_BINOP(inst32, movl, cmpxchgl);                 \
4371         break;                                                         \
4372       case AtomicWidth::kWord64:                                       \
4373         ASSEMBLE_ATOMIC64_BINOP(inst64, movl, cmpxchgl);               \
4374         break;                                                         \
4375     }                                                                  \
4376     break;                                                             \
4377   case kX64Word64Atomic##op##Uint64:                                   \
4378     ASSEMBLE_ATOMIC64_BINOP(inst64, movq, cmpxchgq);                   \
4379     break;
4380       ATOMIC_BINOP_CASE(Add, addl, addq)
4381       ATOMIC_BINOP_CASE(Sub, subl, subq)
4382       ATOMIC_BINOP_CASE(And, andl, andq)
4383       ATOMIC_BINOP_CASE(Or, orl, orq)
4384       ATOMIC_BINOP_CASE(Xor, xorl, xorq)
4385 #undef ATOMIC_BINOP_CASE
4386 
4387     case kAtomicLoadInt8:
4388     case kAtomicLoadUint8:
4389     case kAtomicLoadInt16:
4390     case kAtomicLoadUint16:
4391     case kAtomicLoadWord32:
4392       UNREACHABLE();  // Won't be generated by instruction selector.
4393   }
4394   return kSuccess;
4395 }  // NOLadability/fn_size)
4396 
4397 #undef ASSEMBLE_PINSR
4398 #undef ASSEMBLE_UNOP
4399 #undef ASSEMBLE_BINOP
4400 #undef ASSEMBLE_COMPARE
4401 #undef ASSEMBLE_MULT
4402 #undef ASSEMBLE_SHIFT
4403 #undef ASSEMBLE_MOVX
4404 #undef ASSEMBLE_SSE_BINOP
4405 #undef ASSEMBLE_SSE_UNOP
4406 #undef ASSEMBLE_AVX_BINOP
4407 #undef ASSEMBLE_IEEE754_BINOP
4408 #undef ASSEMBLE_IEEE754_UNOP
4409 #undef ASSEMBLE_ATOMIC_BINOP
4410 #undef ASSEMBLE_ATOMIC64_BINOP
4411 #undef ASSEMBLE_SIMD_INSTR
4412 #undef ASSEMBLE_SIMD_IMM_INSTR
4413 #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
4414 #undef ASSEMBLE_SIMD_IMM_SHUFFLE
4415 #undef ASSEMBLE_SIMD_ALL_TRUE
4416 #undef ASSEMBLE_SIMD_SHIFT
4417 #undef ASSEMBLE_SEQ_CST_STORE
4418 
4419 namespace {
4420 
FlagsConditionToCondition(FlagsCondition condition)4421 Condition FlagsConditionToCondition(FlagsCondition condition) {
4422   switch (condition) {
4423     case kUnorderedEqual:
4424     case kEqual:
4425       return equal;
4426     case kUnorderedNotEqual:
4427     case kNotEqual:
4428       return not_equal;
4429     case kSignedLessThan:
4430       return less;
4431     case kSignedGreaterThanOrEqual:
4432       return greater_equal;
4433     case kSignedLessThanOrEqual:
4434       return less_equal;
4435     case kSignedGreaterThan:
4436       return greater;
4437     case kUnsignedLessThan:
4438       return below;
4439     case kUnsignedGreaterThanOrEqual:
4440       return above_equal;
4441     case kUnsignedLessThanOrEqual:
4442       return below_equal;
4443     case kUnsignedGreaterThan:
4444       return above;
4445     case kOverflow:
4446       return overflow;
4447     case kNotOverflow:
4448       return no_overflow;
4449     default:
4450       break;
4451   }
4452   UNREACHABLE();
4453 }
4454 
4455 }  // namespace
4456 
4457 // Assembles branches after this instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)4458 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
4459   Label::Distance flabel_distance =
4460       branch->fallthru ? Label::kNear : Label::kFar;
4461   Label* tlabel = branch->true_label;
4462   Label* flabel = branch->false_label;
4463   if (branch->condition == kUnorderedEqual) {
4464     __ j(parity_even, flabel, flabel_distance);
4465   } else if (branch->condition == kUnorderedNotEqual) {
4466     __ j(parity_even, tlabel);
4467   }
4468   __ j(FlagsConditionToCondition(branch->condition), tlabel);
4469 
4470   if (!branch->fallthru) __ jmp(flabel, flabel_distance);
4471 }
4472 
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)4473 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
4474                                             BranchInfo* branch) {
4475   Label::Distance flabel_distance =
4476       branch->fallthru ? Label::kNear : Label::kFar;
4477   Label* tlabel = branch->true_label;
4478   Label* flabel = branch->false_label;
4479   Label nodeopt;
4480   if (branch->condition == kUnorderedEqual) {
4481     __ j(parity_even, flabel, flabel_distance);
4482   } else if (branch->condition == kUnorderedNotEqual) {
4483     __ j(parity_even, tlabel);
4484   }
4485   __ j(FlagsConditionToCondition(branch->condition), tlabel);
4486 
4487   if (FLAG_deopt_every_n_times > 0) {
4488     ExternalReference counter =
4489         ExternalReference::stress_deopt_count(isolate());
4490 
4491     __ pushfq();
4492     __ pushq(rax);
4493     __ load_rax(counter);
4494     __ decl(rax);
4495     __ j(not_zero, &nodeopt, Label::kNear);
4496 
4497     __ Move(rax, FLAG_deopt_every_n_times);
4498     __ store_rax(counter);
4499     __ popq(rax);
4500     __ popfq();
4501     __ jmp(tlabel);
4502 
4503     __ bind(&nodeopt);
4504     __ store_rax(counter);
4505     __ popq(rax);
4506     __ popfq();
4507   }
4508 
4509   if (!branch->fallthru) {
4510     __ jmp(flabel, flabel_distance);
4511   }
4512 }
4513 
AssembleArchJumpRegardlessOfAssemblyOrder(RpoNumber target)4514 void CodeGenerator::AssembleArchJumpRegardlessOfAssemblyOrder(
4515     RpoNumber target) {
4516   __ jmp(GetLabel(target));
4517 }
4518 
4519 #if V8_ENABLE_WEBASSEMBLY
AssembleArchTrap(Instruction * instr,FlagsCondition condition)4520 void CodeGenerator::AssembleArchTrap(Instruction* instr,
4521                                      FlagsCondition condition) {
4522   auto ool = zone()->New<WasmOutOfLineTrap>(this, instr);
4523   Label* tlabel = ool->entry();
4524   Label end;
4525   if (condition == kUnorderedEqual) {
4526     __ j(parity_even, &end, Label::kNear);
4527   } else if (condition == kUnorderedNotEqual) {
4528     __ j(parity_even, tlabel);
4529   }
4530   __ j(FlagsConditionToCondition(condition), tlabel);
4531   __ bind(&end);
4532 }
4533 #endif  // V8_ENABLE_WEBASSEMBLY
4534 
4535 // Assembles boolean materializations after this instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)4536 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
4537                                         FlagsCondition condition) {
4538   X64OperandConverter i(this, instr);
4539   Label done;
4540 
4541   // Materialize a full 64-bit 1 or 0 value. The result register is always the
4542   // last output of the instruction.
4543   Label check;
4544   DCHECK_NE(0u, instr->OutputCount());
4545   Register reg = i.OutputRegister(instr->OutputCount() - 1);
4546   if (condition == kUnorderedEqual) {
4547     __ j(parity_odd, &check, Label::kNear);
4548     __ Move(reg, 0);
4549     __ jmp(&done, Label::kNear);
4550   } else if (condition == kUnorderedNotEqual) {
4551     __ j(parity_odd, &check, Label::kNear);
4552     __ Move(reg, 1);
4553     __ jmp(&done, Label::kNear);
4554   }
4555   __ bind(&check);
4556   __ setcc(FlagsConditionToCondition(condition), reg);
4557   if (!ShouldClearOutputRegisterBeforeInstruction(this, instr)) {
4558     __ movzxbl(reg, reg);
4559   }
4560   __ bind(&done);
4561 }
4562 
AssembleArchBinarySearchSwitch(Instruction * instr)4563 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
4564   X64OperandConverter i(this, instr);
4565   Register input = i.InputRegister(0);
4566   std::vector<std::pair<int32_t, Label*>> cases;
4567   for (size_t index = 2; index < instr->InputCount(); index += 2) {
4568     cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
4569   }
4570   AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
4571                                       cases.data() + cases.size());
4572 }
4573 
AssembleArchTableSwitch(Instruction * instr)4574 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
4575   X64OperandConverter i(this, instr);
4576   Register input = i.InputRegister(0);
4577   int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
4578   Label** cases = zone()->NewArray<Label*>(case_count);
4579   for (int32_t index = 0; index < case_count; ++index) {
4580     cases[index] = GetLabel(i.InputRpo(index + 2));
4581   }
4582   Label* const table = AddJumpTable(cases, case_count);
4583   __ cmpl(input, Immediate(case_count));
4584   __ j(above_equal, GetLabel(i.InputRpo(1)));
4585   __ leaq(kScratchRegister, Operand(table));
4586   __ jmp(Operand(kScratchRegister, input, times_8, 0));
4587 }
4588 
AssembleArchSelect(Instruction * instr,FlagsCondition condition)4589 void CodeGenerator::AssembleArchSelect(Instruction* instr,
4590                                        FlagsCondition condition) {
4591   X64OperandConverter i(this, instr);
4592   MachineRepresentation rep =
4593       LocationOperand::cast(instr->OutputAt(0))->representation();
4594   Condition cc = FlagsConditionToCondition(condition);
4595   DCHECK_EQ(i.OutputRegister(), i.InputRegister(instr->InputCount() - 2));
4596   size_t last_input = instr->InputCount() - 1;
4597   // kUnorderedNotEqual can be implemented more efficiently than
4598   // kUnorderedEqual. As the OR of two flags, it can be done with just two
4599   // cmovs. If the condition was originally a kUnorderedEqual, expect the
4600   // instruction selector to have inverted it and swapped the input.
4601   DCHECK_NE(condition, kUnorderedEqual);
4602   if (rep == MachineRepresentation::kWord32) {
4603     if (HasRegisterInput(instr, last_input)) {
4604       __ cmovl(cc, i.OutputRegister(), i.InputRegister(last_input));
4605       if (condition == kUnorderedNotEqual) {
4606         __ cmovl(parity_even, i.OutputRegister(), i.InputRegister(last_input));
4607       }
4608     } else {
4609       __ cmovl(cc, i.OutputRegister(), i.InputOperand(last_input));
4610       if (condition == kUnorderedNotEqual) {
4611         __ cmovl(parity_even, i.OutputRegister(), i.InputOperand(last_input));
4612       }
4613     }
4614   } else {
4615     DCHECK_EQ(rep, MachineRepresentation::kWord64);
4616     if (HasRegisterInput(instr, last_input)) {
4617       __ cmovq(cc, i.OutputRegister(), i.InputRegister(last_input));
4618       if (condition == kUnorderedNotEqual) {
4619         __ cmovq(parity_even, i.OutputRegister(), i.InputRegister(last_input));
4620       }
4621     } else {
4622       __ cmovq(cc, i.OutputRegister(), i.InputOperand(last_input));
4623       if (condition == kUnorderedNotEqual) {
4624         __ cmovq(parity_even, i.OutputRegister(), i.InputOperand(last_input));
4625       }
4626     }
4627   }
4628 }
4629 
4630 namespace {
4631 
4632 static const int kQuadWordSize = 16;
4633 
4634 }  // namespace
4635 
FinishFrame(Frame * frame)4636 void CodeGenerator::FinishFrame(Frame* frame) {
4637   CallDescriptor* call_descriptor = linkage()->GetIncomingDescriptor();
4638 
4639   const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4640   if (!saves_fp.is_empty()) {  // Save callee-saved XMM registers.
4641     frame->AlignSavedCalleeRegisterSlots();
4642     const uint32_t saves_fp_count = saves_fp.Count();
4643     frame->AllocateSavedCalleeRegisterSlots(
4644         saves_fp_count * (kQuadWordSize / kSystemPointerSize));
4645   }
4646   const RegList saves = call_descriptor->CalleeSavedRegisters();
4647   if (!saves.is_empty()) {  // Save callee-saved registers.
4648     frame->AllocateSavedCalleeRegisterSlots(saves.Count());
4649   }
4650 }
4651 
AssembleConstructFrame()4652 void CodeGenerator::AssembleConstructFrame() {
4653   auto call_descriptor = linkage()->GetIncomingDescriptor();
4654   if (frame_access_state()->has_frame()) {
4655     int pc_base = __ pc_offset();
4656 
4657     if (call_descriptor->IsCFunctionCall()) {
4658       __ pushq(rbp);
4659       __ movq(rbp, rsp);
4660 #if V8_ENABLE_WEBASSEMBLY
4661       if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
4662         __ Push(Immediate(StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY)));
4663         // Reserve stack space for saving the c_entry_fp later.
4664         __ AllocateStackSpace(kSystemPointerSize);
4665       }
4666 #endif  // V8_ENABLE_WEBASSEMBLY
4667     } else if (call_descriptor->IsJSFunctionCall()) {
4668       __ Prologue();
4669     } else {
4670       __ StubPrologue(info()->GetOutputStackFrameType());
4671 #if V8_ENABLE_WEBASSEMBLY
4672       if (call_descriptor->IsWasmFunctionCall() ||
4673           call_descriptor->IsWasmImportWrapper() ||
4674           call_descriptor->IsWasmCapiFunction()) {
4675         // We do not use this stack value in import wrappers and capi functions.
4676         // We push it anyway to satisfy legacy assumptions about these frames'
4677         // size and order.
4678         // TODO(manoskouk): Consider fixing this.
4679         __ pushq(kWasmInstanceRegister);
4680       }
4681       if (call_descriptor->IsWasmCapiFunction()) {
4682         // Reserve space for saving the PC later.
4683         __ AllocateStackSpace(kSystemPointerSize);
4684       }
4685 #endif  // V8_ENABLE_WEBASSEMBLY
4686     }
4687 
4688     unwinding_info_writer_.MarkFrameConstructed(pc_base);
4689   }
4690   int required_slots =
4691       frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
4692 
4693   if (info()->is_osr()) {
4694     // TurboFan OSR-compiled functions cannot be entered directly.
4695     __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
4696 
4697     // Unoptimized code jumps directly to this entrypoint while the unoptimized
4698     // frame is still on the stack. Optimized code uses OSR values directly from
4699     // the unoptimized frame. Thus, all that needs to be done is to allocate the
4700     // remaining stack slots.
4701     __ RecordComment("-- OSR entrypoint --");
4702     osr_pc_offset_ = __ pc_offset();
4703     required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
4704   }
4705 
4706   const RegList saves = call_descriptor->CalleeSavedRegisters();
4707   const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4708 
4709   if (required_slots > 0) {
4710     DCHECK(frame_access_state()->has_frame());
4711 #if V8_ENABLE_WEBASSEMBLY
4712     if (info()->IsWasm() && required_slots * kSystemPointerSize > 4 * KB) {
4713       // For WebAssembly functions with big frames we have to do the stack
4714       // overflow check before we construct the frame. Otherwise we may not
4715       // have enough space on the stack to call the runtime for the stack
4716       // overflow.
4717       Label done;
4718 
4719       // If the frame is bigger than the stack, we throw the stack overflow
4720       // exception unconditionally. Thereby we can avoid the integer overflow
4721       // check in the condition code.
4722       if (required_slots * kSystemPointerSize < FLAG_stack_size * KB) {
4723         __ movq(kScratchRegister,
4724                 FieldOperand(kWasmInstanceRegister,
4725                              WasmInstanceObject::kRealStackLimitAddressOffset));
4726         __ movq(kScratchRegister, Operand(kScratchRegister, 0));
4727         __ addq(kScratchRegister,
4728                 Immediate(required_slots * kSystemPointerSize));
4729         __ cmpq(rsp, kScratchRegister);
4730         __ j(above_equal, &done, Label::kNear);
4731       }
4732 
4733       __ near_call(wasm::WasmCode::kWasmStackOverflow,
4734                    RelocInfo::WASM_STUB_CALL);
4735       // The call does not return, hence we can ignore any references and just
4736       // define an empty safepoint.
4737       ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
4738       RecordSafepoint(reference_map);
4739       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4740       __ bind(&done);
4741     }
4742 #endif  // V8_ENABLE_WEBASSEMBLY
4743 
4744     // Skip callee-saved and return slots, which are created below.
4745     required_slots -= saves.Count();
4746     required_slots -= saves_fp.Count() * (kQuadWordSize / kSystemPointerSize);
4747     required_slots -= frame()->GetReturnSlotCount();
4748     if (required_slots > 0) {
4749       __ AllocateStackSpace(required_slots * kSystemPointerSize);
4750     }
4751   }
4752 
4753   if (!saves_fp.is_empty()) {  // Save callee-saved XMM registers.
4754     const uint32_t saves_fp_count = saves_fp.Count();
4755     const int stack_size = saves_fp_count * kQuadWordSize;
4756     // Adjust the stack pointer.
4757     __ AllocateStackSpace(stack_size);
4758     // Store the registers on the stack.
4759     int slot_idx = 0;
4760     for (XMMRegister reg : saves_fp) {
4761       __ Movdqu(Operand(rsp, kQuadWordSize * slot_idx), reg);
4762       slot_idx++;
4763     }
4764   }
4765 
4766   if (!saves.is_empty()) {  // Save callee-saved registers.
4767     for (Register reg : base::Reversed(saves)) {
4768       __ pushq(reg);
4769     }
4770   }
4771 
4772   // Allocate return slots (located after callee-saved).
4773   if (frame()->GetReturnSlotCount() > 0) {
4774     __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize);
4775   }
4776 }
4777 
AssembleReturn(InstructionOperand * additional_pop_count)4778 void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
4779   auto call_descriptor = linkage()->GetIncomingDescriptor();
4780 
4781   // Restore registers.
4782   const RegList saves = call_descriptor->CalleeSavedRegisters();
4783   if (!saves.is_empty()) {
4784     const int returns = frame()->GetReturnSlotCount();
4785     if (returns != 0) {
4786       __ addq(rsp, Immediate(returns * kSystemPointerSize));
4787     }
4788     for (Register reg : saves) {
4789       __ popq(reg);
4790     }
4791   }
4792   const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4793   if (!saves_fp.is_empty()) {
4794     const uint32_t saves_fp_count = saves_fp.Count();
4795     const int stack_size = saves_fp_count * kQuadWordSize;
4796     // Load the registers from the stack.
4797     int slot_idx = 0;
4798     for (XMMRegister reg : saves_fp) {
4799       __ Movdqu(reg, Operand(rsp, kQuadWordSize * slot_idx));
4800       slot_idx++;
4801     }
4802     // Adjust the stack pointer.
4803     __ addq(rsp, Immediate(stack_size));
4804   }
4805 
4806   unwinding_info_writer_.MarkBlockWillExit();
4807 
4808   X64OperandConverter g(this, nullptr);
4809   int parameter_slots = static_cast<int>(call_descriptor->ParameterSlotCount());
4810 
4811   // {aditional_pop_count} is only greater than zero if {parameter_slots = 0}.
4812   // Check RawMachineAssembler::PopAndReturn.
4813   if (parameter_slots != 0) {
4814     if (additional_pop_count->IsImmediate()) {
4815       DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
4816     } else if (FLAG_debug_code) {
4817       __ cmpq(g.ToRegister(additional_pop_count), Immediate(0));
4818       __ Assert(equal, AbortReason::kUnexpectedAdditionalPopValue);
4819     }
4820   }
4821 
4822   Register argc_reg = rcx;
4823   // Functions with JS linkage have at least one parameter (the receiver).
4824   // If {parameter_slots} == 0, it means it is a builtin with
4825   // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
4826   // itself.
4827   const bool drop_jsargs = parameter_slots != 0 &&
4828                            frame_access_state()->has_frame() &&
4829                            call_descriptor->IsJSFunctionCall();
4830   if (call_descriptor->IsCFunctionCall()) {
4831     AssembleDeconstructFrame();
4832   } else if (frame_access_state()->has_frame()) {
4833     if (additional_pop_count->IsImmediate() &&
4834         g.ToConstant(additional_pop_count).ToInt32() == 0) {
4835       // Canonicalize JSFunction return sites for now.
4836       if (return_label_.is_bound()) {
4837         __ jmp(&return_label_);
4838         return;
4839       } else {
4840         __ bind(&return_label_);
4841       }
4842     }
4843     if (drop_jsargs) {
4844       // Get the actual argument count.
4845       DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
4846       __ movq(argc_reg, Operand(rbp, StandardFrameConstants::kArgCOffset));
4847     }
4848     AssembleDeconstructFrame();
4849   }
4850 
4851   if (drop_jsargs) {
4852     // We must pop all arguments from the stack (including the receiver).
4853     // The number of arguments without the receiver is
4854     // max(argc_reg, parameter_slots-1), and the receiver is added in
4855     // DropArguments().
4856     Label mismatch_return;
4857     Register scratch_reg = r10;
4858     DCHECK_NE(argc_reg, scratch_reg);
4859     DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
4860     DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
4861     __ cmpq(argc_reg, Immediate(parameter_slots));
4862     __ j(greater, &mismatch_return, Label::kNear);
4863     __ Ret(parameter_slots * kSystemPointerSize, scratch_reg);
4864     __ bind(&mismatch_return);
4865     __ DropArguments(argc_reg, scratch_reg, TurboAssembler::kCountIsInteger,
4866                      TurboAssembler::kCountIncludesReceiver);
4867     // We use a return instead of a jump for better return address prediction.
4868     __ Ret();
4869   } else if (additional_pop_count->IsImmediate()) {
4870     Register scratch_reg = r10;
4871     DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
4872     int additional_count = g.ToConstant(additional_pop_count).ToInt32();
4873     size_t pop_size = (parameter_slots + additional_count) * kSystemPointerSize;
4874     CHECK_LE(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
4875     __ Ret(static_cast<int>(pop_size), scratch_reg);
4876   } else {
4877     Register pop_reg = g.ToRegister(additional_pop_count);
4878     Register scratch_reg = pop_reg == r10 ? rcx : r10;
4879     DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
4880     DCHECK(!call_descriptor->CalleeSavedRegisters().has(pop_reg));
4881     int pop_size = static_cast<int>(parameter_slots * kSystemPointerSize);
4882     __ PopReturnAddressTo(scratch_reg);
4883     __ leaq(rsp, Operand(rsp, pop_reg, times_system_pointer_size,
4884                          static_cast<int>(pop_size)));
4885     __ PushReturnAddressFrom(scratch_reg);
4886     __ Ret();
4887   }
4888 }
4889 
FinishCode()4890 void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
4891 
PrepareForDeoptimizationExits(ZoneDeque<DeoptimizationExit * > * exits)4892 void CodeGenerator::PrepareForDeoptimizationExits(
4893     ZoneDeque<DeoptimizationExit*>* exits) {}
4894 
IncrementStackAccessCounter(InstructionOperand * source,InstructionOperand * destination)4895 void CodeGenerator::IncrementStackAccessCounter(
4896     InstructionOperand* source, InstructionOperand* destination) {
4897   DCHECK(FLAG_trace_turbo_stack_accesses);
4898   if (!info()->IsOptimizing()) {
4899 #if V8_ENABLE_WEBASSEMBLY
4900     if (!info()->IsWasm()) return;
4901 #else
4902     return;
4903 #endif  // V8_ENABLE_WEBASSEMBLY
4904   }
4905   DCHECK_NOT_NULL(debug_name_);
4906   auto IncrementCounter = [&](ExternalReference counter) {
4907     __ incl(__ ExternalReferenceAsOperand(counter));
4908   };
4909   if (source->IsAnyStackSlot()) {
4910     IncrementCounter(
4911         ExternalReference::address_of_load_from_stack_count(debug_name_));
4912   }
4913   if (destination->IsAnyStackSlot()) {
4914     IncrementCounter(
4915         ExternalReference::address_of_store_to_stack_count(debug_name_));
4916   }
4917 }
4918 
4919 namespace {
4920 
Is32BitOperand(InstructionOperand * operand)4921 bool Is32BitOperand(InstructionOperand* operand) {
4922   DCHECK(operand->IsStackSlot() || operand->IsRegister());
4923   MachineRepresentation mr = LocationOperand::cast(operand)->representation();
4924   return mr == MachineRepresentation::kWord32 ||
4925          mr == MachineRepresentation::kCompressed ||
4926          mr == MachineRepresentation::kCompressedPointer;
4927 }
4928 
4929 }  // namespace
4930 
AssembleMove(InstructionOperand * source,InstructionOperand * destination)4931 void CodeGenerator::AssembleMove(InstructionOperand* source,
4932                                  InstructionOperand* destination) {
4933   X64OperandConverter g(this, nullptr);
4934   // Helper function to write the given constant to the dst register.
4935   auto MoveConstantToRegister = [&](Register dst, Constant src) {
4936     switch (src.type()) {
4937       case Constant::kInt32: {
4938         if (RelocInfo::IsWasmReference(src.rmode())) {
4939           __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
4940         } else {
4941           int32_t value = src.ToInt32();
4942           if (value == 0) {
4943             __ xorl(dst, dst);
4944           } else {
4945             __ movl(dst, Immediate(value));
4946           }
4947         }
4948         break;
4949       }
4950       case Constant::kInt64:
4951         if (RelocInfo::IsWasmReference(src.rmode())) {
4952           __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
4953         } else {
4954           __ Move(dst, src.ToInt64());
4955         }
4956         break;
4957       case Constant::kFloat32:
4958         __ MoveNumber(dst, src.ToFloat32());
4959         break;
4960       case Constant::kFloat64:
4961         __ MoveNumber(dst, src.ToFloat64().value());
4962         break;
4963       case Constant::kExternalReference:
4964         __ Move(dst, src.ToExternalReference());
4965         break;
4966       case Constant::kHeapObject: {
4967         Handle<HeapObject> src_object = src.ToHeapObject();
4968         RootIndex index;
4969         if (IsMaterializableFromRoot(src_object, &index)) {
4970           __ LoadRoot(dst, index);
4971         } else {
4972           __ Move(dst, src_object);
4973         }
4974         break;
4975       }
4976       case Constant::kCompressedHeapObject: {
4977         Handle<HeapObject> src_object = src.ToHeapObject();
4978         RootIndex index;
4979         if (IsMaterializableFromRoot(src_object, &index)) {
4980           __ LoadRoot(dst, index);
4981         } else {
4982           __ Move(dst, src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT);
4983         }
4984         break;
4985       }
4986       case Constant::kDelayedStringConstant: {
4987         const StringConstantBase* src_constant = src.ToDelayedStringConstant();
4988         __ MoveStringConstant(dst, src_constant);
4989         break;
4990       }
4991       case Constant::kRpoNumber:
4992         UNREACHABLE();  // TODO(dcarney): load of labels on x64.
4993     }
4994   };
4995   // Helper function to write the given constant to the stack.
4996   auto MoveConstantToSlot = [&](Operand dst, Constant src) {
4997     if (!RelocInfo::IsWasmReference(src.rmode())) {
4998       switch (src.type()) {
4999         case Constant::kInt32:
5000           __ Move(dst, src.ToInt32());
5001           return;
5002         case Constant::kInt64:
5003           __ Move(dst, src.ToInt64());
5004           return;
5005         default:
5006           break;
5007       }
5008     }
5009     MoveConstantToRegister(kScratchRegister, src);
5010     __ movq(dst, kScratchRegister);
5011   };
5012 
5013   if (FLAG_trace_turbo_stack_accesses) {
5014     IncrementStackAccessCounter(source, destination);
5015   }
5016 
5017   // Dispatch on the source and destination operand kinds.
5018   switch (MoveType::InferMove(source, destination)) {
5019     case MoveType::kRegisterToRegister:
5020       if (source->IsRegister()) {
5021         __ movq(g.ToRegister(destination), g.ToRegister(source));
5022       } else {
5023         DCHECK(source->IsFPRegister());
5024         __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
5025       }
5026       return;
5027     case MoveType::kRegisterToStack: {
5028       Operand dst = g.ToOperand(destination);
5029       if (source->IsRegister()) {
5030         __ movq(dst, g.ToRegister(source));
5031       } else {
5032         DCHECK(source->IsFPRegister());
5033         XMMRegister src = g.ToDoubleRegister(source);
5034         MachineRepresentation rep =
5035             LocationOperand::cast(source)->representation();
5036         if (rep != MachineRepresentation::kSimd128) {
5037           __ Movsd(dst, src);
5038         } else {
5039           __ Movups(dst, src);
5040         }
5041       }
5042       return;
5043     }
5044     case MoveType::kStackToRegister: {
5045       Operand src = g.ToOperand(source);
5046       if (source->IsStackSlot()) {
5047         // TODO(13581): Fix this for other code kinds (see
5048         // https://crbug.com/1356461).
5049         if (code_kind() == CodeKind::WASM_FUNCTION && Is32BitOperand(source) &&
5050             Is32BitOperand(destination)) {
5051           // When we need only 32 bits, move only 32 bits. Benefits:
5052           // - Save a byte here and there (depending on the destination
5053           //   register; "movl eax, ..." is smaller than "movq rax, ...").
5054           // - Safeguard against accidental decompression of compressed slots.
5055           // We must check both {source} and {destination} to be 32-bit values,
5056           // because treating 32-bit sources as 64-bit values can be perfectly
5057           // fine as a result of virtual register renaming (to avoid redundant
5058           // explicit zero-extensions that also happen implicitly).
5059           __ movl(g.ToRegister(destination), src);
5060         } else {
5061           __ movq(g.ToRegister(destination), src);
5062         }
5063       } else {
5064         DCHECK(source->IsFPStackSlot());
5065         XMMRegister dst = g.ToDoubleRegister(destination);
5066         MachineRepresentation rep =
5067             LocationOperand::cast(source)->representation();
5068         if (rep != MachineRepresentation::kSimd128) {
5069           __ Movsd(dst, src);
5070         } else {
5071           __ Movups(dst, src);
5072         }
5073       }
5074       return;
5075     }
5076     case MoveType::kStackToStack: {
5077       Operand src = g.ToOperand(source);
5078       Operand dst = g.ToOperand(destination);
5079       if (source->IsStackSlot()) {
5080         // Spill on demand to use a temporary register for memory-to-memory
5081         // moves.
5082         __ movq(kScratchRegister, src);
5083         __ movq(dst, kScratchRegister);
5084       } else {
5085         MachineRepresentation rep =
5086             LocationOperand::cast(source)->representation();
5087         if (rep != MachineRepresentation::kSimd128) {
5088           __ Movsd(kScratchDoubleReg, src);
5089           __ Movsd(dst, kScratchDoubleReg);
5090         } else {
5091           DCHECK(source->IsSimd128StackSlot());
5092           __ Movups(kScratchDoubleReg, src);
5093           __ Movups(dst, kScratchDoubleReg);
5094         }
5095       }
5096       return;
5097     }
5098     case MoveType::kConstantToRegister: {
5099       Constant src = g.ToConstant(source);
5100       if (destination->IsRegister()) {
5101         MoveConstantToRegister(g.ToRegister(destination), src);
5102       } else {
5103         DCHECK(destination->IsFPRegister());
5104         XMMRegister dst = g.ToDoubleRegister(destination);
5105         if (src.type() == Constant::kFloat32) {
5106           // TODO(turbofan): Can we do better here?
5107           __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
5108         } else {
5109           DCHECK_EQ(src.type(), Constant::kFloat64);
5110           __ Move(dst, src.ToFloat64().AsUint64());
5111         }
5112       }
5113       return;
5114     }
5115     case MoveType::kConstantToStack: {
5116       Constant src = g.ToConstant(source);
5117       Operand dst = g.ToOperand(destination);
5118       if (destination->IsStackSlot()) {
5119         MoveConstantToSlot(dst, src);
5120       } else {
5121         DCHECK(destination->IsFPStackSlot());
5122         if (src.type() == Constant::kFloat32) {
5123           __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
5124         } else {
5125           DCHECK_EQ(src.type(), Constant::kFloat64);
5126           __ Move(dst, src.ToFloat64().AsUint64());
5127         }
5128       }
5129       return;
5130     }
5131   }
5132   UNREACHABLE();
5133 }
5134 
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)5135 void CodeGenerator::AssembleSwap(InstructionOperand* source,
5136                                  InstructionOperand* destination) {
5137   if (FLAG_trace_turbo_stack_accesses) {
5138     IncrementStackAccessCounter(source, destination);
5139     IncrementStackAccessCounter(destination, source);
5140   }
5141 
5142   X64OperandConverter g(this, nullptr);
5143   // Dispatch on the source and destination operand kinds.  Not all
5144   // combinations are possible.
5145   switch (MoveType::InferSwap(source, destination)) {
5146     case MoveType::kRegisterToRegister: {
5147       if (source->IsRegister()) {
5148         Register src = g.ToRegister(source);
5149         Register dst = g.ToRegister(destination);
5150         __ movq(kScratchRegister, src);
5151         __ movq(src, dst);
5152         __ movq(dst, kScratchRegister);
5153       } else {
5154         DCHECK(source->IsFPRegister());
5155         XMMRegister src = g.ToDoubleRegister(source);
5156         XMMRegister dst = g.ToDoubleRegister(destination);
5157         __ Movapd(kScratchDoubleReg, src);
5158         __ Movapd(src, dst);
5159         __ Movapd(dst, kScratchDoubleReg);
5160       }
5161       return;
5162     }
5163     case MoveType::kRegisterToStack: {
5164       if (source->IsRegister()) {
5165         Register src = g.ToRegister(source);
5166         Operand dst = g.ToOperand(destination);
5167         __ movq(kScratchRegister, src);
5168         __ movq(src, dst);
5169         __ movq(dst, kScratchRegister);
5170       } else {
5171         DCHECK(source->IsFPRegister());
5172         XMMRegister src = g.ToDoubleRegister(source);
5173         Operand dst = g.ToOperand(destination);
5174         MachineRepresentation rep =
5175             LocationOperand::cast(source)->representation();
5176         if (rep != MachineRepresentation::kSimd128) {
5177           __ Movsd(kScratchDoubleReg, src);
5178           __ Movsd(src, dst);
5179           __ Movsd(dst, kScratchDoubleReg);
5180         } else {
5181           __ Movups(kScratchDoubleReg, src);
5182           __ Movups(src, dst);
5183           __ Movups(dst, kScratchDoubleReg);
5184         }
5185       }
5186       return;
5187     }
5188     case MoveType::kStackToStack: {
5189       Operand src = g.ToOperand(source);
5190       Operand dst = g.ToOperand(destination);
5191       MachineRepresentation rep =
5192           LocationOperand::cast(source)->representation();
5193       if (rep != MachineRepresentation::kSimd128) {
5194         Register tmp = kScratchRegister;
5195         __ movq(tmp, dst);
5196         __ pushq(src);  // Then use stack to copy src to destination.
5197         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5198                                                          kSystemPointerSize);
5199         __ popq(dst);
5200         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5201                                                          -kSystemPointerSize);
5202         __ movq(src, tmp);
5203       } else {
5204         // Without AVX, misaligned reads and writes will trap. Move using the
5205         // stack, in two parts.
5206         __ movups(kScratchDoubleReg, dst);  // Save dst in scratch register.
5207         __ pushq(src);  // Then use stack to copy src to destination.
5208         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5209                                                          kSystemPointerSize);
5210         __ popq(dst);
5211         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5212                                                          -kSystemPointerSize);
5213         __ pushq(g.ToOperand(source, kSystemPointerSize));
5214         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5215                                                          kSystemPointerSize);
5216         __ popq(g.ToOperand(destination, kSystemPointerSize));
5217         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5218                                                          -kSystemPointerSize);
5219         __ movups(src, kScratchDoubleReg);
5220       }
5221       return;
5222     }
5223     default:
5224       UNREACHABLE();
5225   }
5226 }
5227 
AssembleJumpTable(Label ** targets,size_t target_count)5228 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
5229   for (size_t index = 0; index < target_count; ++index) {
5230     __ dq(targets[index]);
5231   }
5232 }
5233 
5234 #undef __
5235 
5236 }  // namespace compiler
5237 }  // namespace internal
5238 }  // namespace v8
5239