• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/base/overflowing-math.h"
6 #include "src/codegen/assembler-inl.h"
7 #include "src/codegen/callable.h"
8 #include "src/codegen/cpu-features.h"
9 #include "src/codegen/ia32/assembler-ia32.h"
10 #include "src/codegen/ia32/register-ia32.h"
11 #include "src/codegen/macro-assembler.h"
12 #include "src/codegen/optimized-compilation-info.h"
13 #include "src/compiler/backend/code-generator-impl.h"
14 #include "src/compiler/backend/code-generator.h"
15 #include "src/compiler/backend/gap-resolver.h"
16 #include "src/compiler/node-matchers.h"
17 #include "src/compiler/osr.h"
18 #include "src/execution/frame-constants.h"
19 #include "src/execution/frames.h"
20 #include "src/heap/memory-chunk.h"
21 #include "src/objects/smi.h"
22 
23 #if V8_ENABLE_WEBASSEMBLY
24 #include "src/wasm/wasm-code-manager.h"
25 #include "src/wasm/wasm-objects.h"
26 #endif  // V8_ENABLE_WEBASSEMBLY
27 
28 namespace v8 {
29 namespace internal {
30 namespace compiler {
31 
32 #define __ tasm()->
33 
34 #define kScratchDoubleReg xmm0
35 
36 // Adds IA-32 specific methods for decoding operands.
37 class IA32OperandConverter : public InstructionOperandConverter {
38  public:
IA32OperandConverter(CodeGenerator * gen,Instruction * instr)39   IA32OperandConverter(CodeGenerator* gen, Instruction* instr)
40       : InstructionOperandConverter(gen, instr) {}
41 
InputOperand(size_t index,int extra=0)42   Operand InputOperand(size_t index, int extra = 0) {
43     return ToOperand(instr_->InputAt(index), extra);
44   }
45 
InputImmediate(size_t index)46   Immediate InputImmediate(size_t index) {
47     return ToImmediate(instr_->InputAt(index));
48   }
49 
OutputOperand()50   Operand OutputOperand() { return ToOperand(instr_->Output()); }
51 
ToOperand(InstructionOperand * op,int extra=0)52   Operand ToOperand(InstructionOperand* op, int extra = 0) {
53     if (op->IsRegister()) {
54       DCHECK_EQ(0, extra);
55       return Operand(ToRegister(op));
56     } else if (op->IsFPRegister()) {
57       DCHECK_EQ(0, extra);
58       return Operand(ToDoubleRegister(op));
59     }
60     DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
61     return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
62   }
63 
SlotToOperand(int slot,int extra=0)64   Operand SlotToOperand(int slot, int extra = 0) {
65     FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
66     return Operand(offset.from_stack_pointer() ? esp : ebp,
67                    offset.offset() + extra);
68   }
69 
ToImmediate(InstructionOperand * operand)70   Immediate ToImmediate(InstructionOperand* operand) {
71     Constant constant = ToConstant(operand);
72 #if V8_ENABLE_WEBASSEMBLY
73     if (constant.type() == Constant::kInt32 &&
74         RelocInfo::IsWasmReference(constant.rmode())) {
75       return Immediate(static_cast<Address>(constant.ToInt32()),
76                        constant.rmode());
77     }
78 #endif  // V8_ENABLE_WEBASSEMBLY
79     switch (constant.type()) {
80       case Constant::kInt32:
81         return Immediate(constant.ToInt32());
82       case Constant::kFloat32:
83         return Immediate::EmbeddedNumber(constant.ToFloat32());
84       case Constant::kFloat64:
85         return Immediate::EmbeddedNumber(constant.ToFloat64().value());
86       case Constant::kExternalReference:
87         return Immediate(constant.ToExternalReference());
88       case Constant::kHeapObject:
89         return Immediate(constant.ToHeapObject());
90       case Constant::kCompressedHeapObject:
91         break;
92       case Constant::kDelayedStringConstant:
93         return Immediate::EmbeddedStringConstant(
94             constant.ToDelayedStringConstant());
95       case Constant::kInt64:
96         break;
97       case Constant::kRpoNumber:
98         return Immediate::CodeRelativeOffset(ToLabel(operand));
99     }
100     UNREACHABLE();
101   }
102 
NextOffset(size_t * offset)103   static size_t NextOffset(size_t* offset) {
104     size_t i = *offset;
105     (*offset)++;
106     return i;
107   }
108 
ScaleFor(AddressingMode one,AddressingMode mode)109   static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
110     STATIC_ASSERT(0 == static_cast<int>(times_1));
111     STATIC_ASSERT(1 == static_cast<int>(times_2));
112     STATIC_ASSERT(2 == static_cast<int>(times_4));
113     STATIC_ASSERT(3 == static_cast<int>(times_8));
114     int scale = static_cast<int>(mode - one);
115     DCHECK(scale >= 0 && scale < 4);
116     return static_cast<ScaleFactor>(scale);
117   }
118 
MemoryOperand(size_t * offset)119   Operand MemoryOperand(size_t* offset) {
120     AddressingMode mode = AddressingModeField::decode(instr_->opcode());
121     switch (mode) {
122       case kMode_MR: {
123         Register base = InputRegister(NextOffset(offset));
124         int32_t disp = 0;
125         return Operand(base, disp);
126       }
127       case kMode_MRI: {
128         Register base = InputRegister(NextOffset(offset));
129         Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
130         return Operand(base, ctant.ToInt32(), ctant.rmode());
131       }
132       case kMode_MR1:
133       case kMode_MR2:
134       case kMode_MR4:
135       case kMode_MR8: {
136         Register base = InputRegister(NextOffset(offset));
137         Register index = InputRegister(NextOffset(offset));
138         ScaleFactor scale = ScaleFor(kMode_MR1, mode);
139         int32_t disp = 0;
140         return Operand(base, index, scale, disp);
141       }
142       case kMode_MR1I:
143       case kMode_MR2I:
144       case kMode_MR4I:
145       case kMode_MR8I: {
146         Register base = InputRegister(NextOffset(offset));
147         Register index = InputRegister(NextOffset(offset));
148         ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
149         Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
150         return Operand(base, index, scale, ctant.ToInt32(), ctant.rmode());
151       }
152       case kMode_M1:
153       case kMode_M2:
154       case kMode_M4:
155       case kMode_M8: {
156         Register index = InputRegister(NextOffset(offset));
157         ScaleFactor scale = ScaleFor(kMode_M1, mode);
158         int32_t disp = 0;
159         return Operand(index, scale, disp);
160       }
161       case kMode_M1I:
162       case kMode_M2I:
163       case kMode_M4I:
164       case kMode_M8I: {
165         Register index = InputRegister(NextOffset(offset));
166         ScaleFactor scale = ScaleFor(kMode_M1I, mode);
167         Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
168         return Operand(index, scale, ctant.ToInt32(), ctant.rmode());
169       }
170       case kMode_MI: {
171         Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
172         return Operand(ctant.ToInt32(), ctant.rmode());
173       }
174       case kMode_Root: {
175         Register base = kRootRegister;
176         int32_t disp = InputInt32(NextOffset(offset));
177         return Operand(base, disp);
178       }
179       case kMode_None:
180         UNREACHABLE();
181     }
182     UNREACHABLE();
183   }
184 
MemoryOperand(size_t first_input=0)185   Operand MemoryOperand(size_t first_input = 0) {
186     return MemoryOperand(&first_input);
187   }
188 
NextMemoryOperand(size_t offset=0)189   Operand NextMemoryOperand(size_t offset = 0) {
190     AddressingMode mode = AddressingModeField::decode(instr_->opcode());
191     Register base = InputRegister(NextOffset(&offset));
192     const int32_t disp = 4;
193     if (mode == kMode_MR1) {
194       Register index = InputRegister(NextOffset(&offset));
195       ScaleFactor scale = ScaleFor(kMode_MR1, kMode_MR1);
196       return Operand(base, index, scale, disp);
197     } else if (mode == kMode_MRI) {
198       Constant ctant = ToConstant(instr_->InputAt(NextOffset(&offset)));
199       return Operand(base, ctant.ToInt32() + disp, ctant.rmode());
200     } else {
201       UNREACHABLE();
202     }
203   }
204 
MoveInstructionOperandToRegister(Register destination,InstructionOperand * op)205   void MoveInstructionOperandToRegister(Register destination,
206                                         InstructionOperand* op) {
207     if (op->IsImmediate() || op->IsConstant()) {
208       gen_->tasm()->mov(destination, ToImmediate(op));
209     } else if (op->IsRegister()) {
210       gen_->tasm()->Move(destination, ToRegister(op));
211     } else {
212       gen_->tasm()->mov(destination, ToOperand(op));
213     }
214   }
215 };
216 
217 namespace {
218 
HasAddressingMode(Instruction * instr)219 bool HasAddressingMode(Instruction* instr) {
220   return instr->addressing_mode() != kMode_None;
221 }
222 
HasImmediateInput(Instruction * instr,size_t index)223 bool HasImmediateInput(Instruction* instr, size_t index) {
224   return instr->InputAt(index)->IsImmediate();
225 }
226 
HasRegisterInput(Instruction * instr,size_t index)227 bool HasRegisterInput(Instruction* instr, size_t index) {
228   return instr->InputAt(index)->IsRegister();
229 }
230 
231 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
232  public:
OutOfLineLoadFloat32NaN(CodeGenerator * gen,XMMRegister result)233   OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
234       : OutOfLineCode(gen), result_(result) {}
235 
Generate()236   void Generate() final {
237     __ xorps(result_, result_);
238     __ divss(result_, result_);
239   }
240 
241  private:
242   XMMRegister const result_;
243 };
244 
245 class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
246  public:
OutOfLineLoadFloat64NaN(CodeGenerator * gen,XMMRegister result)247   OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
248       : OutOfLineCode(gen), result_(result) {}
249 
Generate()250   void Generate() final {
251     __ xorpd(result_, result_);
252     __ divsd(result_, result_);
253   }
254 
255  private:
256   XMMRegister const result_;
257 };
258 
259 class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
260  public:
OutOfLineTruncateDoubleToI(CodeGenerator * gen,Register result,XMMRegister input,StubCallMode stub_mode)261   OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
262                              XMMRegister input, StubCallMode stub_mode)
263       : OutOfLineCode(gen),
264         result_(result),
265         input_(input),
266 #if V8_ENABLE_WEBASSEMBLY
267         stub_mode_(stub_mode),
268 #endif  // V8_ENABLE_WEBASSEMBLY
269         isolate_(gen->isolate()),
270         zone_(gen->zone()) {
271   }
272 
Generate()273   void Generate() final {
274     __ AllocateStackSpace(kDoubleSize);
275     __ Movsd(MemOperand(esp, 0), input_);
276 #if V8_ENABLE_WEBASSEMBLY
277     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
278       // A direct call to a wasm runtime stub defined in this module.
279       // Just encode the stub index. This will be patched when the code
280       // is added to the native module and copied into wasm code space.
281       __ wasm_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
282 #else
283     // For balance.
284     if (false) {
285 #endif  // V8_ENABLE_WEBASSEMBLY
286     } else if (tasm()->options().inline_offheap_trampolines) {
287       __ CallBuiltin(Builtin::kDoubleToI);
288     } else {
289       __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
290     }
291     __ mov(result_, MemOperand(esp, 0));
292     __ add(esp, Immediate(kDoubleSize));
293   }
294 
295  private:
296   Register const result_;
297   XMMRegister const input_;
298 #if V8_ENABLE_WEBASSEMBLY
299   StubCallMode stub_mode_;
300 #endif  // V8_ENABLE_WEBASSEMBLY
301   Isolate* isolate_;
302   Zone* zone_;
303 };
304 
305 class OutOfLineRecordWrite final : public OutOfLineCode {
306  public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand operand,Register value,Register scratch0,Register scratch1,RecordWriteMode mode,StubCallMode stub_mode)307   OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
308                        Register value, Register scratch0, Register scratch1,
309                        RecordWriteMode mode, StubCallMode stub_mode)
310       : OutOfLineCode(gen),
311         object_(object),
312         operand_(operand),
313         value_(value),
314         scratch0_(scratch0),
315         scratch1_(scratch1),
316         mode_(mode),
317 #if V8_ENABLE_WEBASSEMBLY
318         stub_mode_(stub_mode),
319 #endif  // V8_ENABLE_WEBASSEMBLY
320         zone_(gen->zone()) {
321     DCHECK(!AreAliased(object, scratch0, scratch1));
322     DCHECK(!AreAliased(value, scratch0, scratch1));
323   }
324 
Generate()325   void Generate() final {
326     __ CheckPageFlag(value_, scratch0_,
327                      MemoryChunk::kPointersToHereAreInterestingMask, zero,
328                      exit());
329     __ lea(scratch1_, operand_);
330     RememberedSetAction const remembered_set_action =
331         mode_ > RecordWriteMode::kValueIsMap ||
332                 FLAG_use_full_record_write_builtin
333             ? RememberedSetAction::kEmit
334             : RememberedSetAction::kOmit;
335     SaveFPRegsMode const save_fp_mode = frame()->DidAllocateDoubleRegisters()
336                                             ? SaveFPRegsMode::kSave
337                                             : SaveFPRegsMode::kIgnore;
338     if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
339       __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
340 #if V8_ENABLE_WEBASSEMBLY
341     } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
342       // A direct call to a wasm runtime stub defined in this module.
343       // Just encode the stub index. This will be patched when the code
344       // is added to the native module and copied into wasm code space.
345       __ CallRecordWriteStubSaveRegisters(object_, scratch1_,
346                                           remembered_set_action, save_fp_mode,
347                                           StubCallMode::kCallWasmRuntimeStub);
348 #endif  // V8_ENABLE_WEBASSEMBLY
349     } else {
350       __ CallRecordWriteStubSaveRegisters(object_, scratch1_,
351                                           remembered_set_action, save_fp_mode);
352     }
353   }
354 
355  private:
356   Register const object_;
357   Operand const operand_;
358   Register const value_;
359   Register const scratch0_;
360   Register const scratch1_;
361   RecordWriteMode const mode_;
362 #if V8_ENABLE_WEBASSEMBLY
363   StubCallMode const stub_mode_;
364 #endif  // V8_ENABLE_WEBASSEMBLY
365   Zone* zone_;
366 };
367 
368 }  // namespace
369 
370 #define ASSEMBLE_COMPARE(asm_instr)                              \
371   do {                                                           \
372     if (HasAddressingMode(instr)) {                              \
373       size_t index = 0;                                          \
374       Operand left = i.MemoryOperand(&index);                    \
375       if (HasImmediateInput(instr, index)) {                     \
376         __ asm_instr(left, i.InputImmediate(index));             \
377       } else {                                                   \
378         __ asm_instr(left, i.InputRegister(index));              \
379       }                                                          \
380     } else {                                                     \
381       if (HasImmediateInput(instr, 1)) {                         \
382         if (HasRegisterInput(instr, 0)) {                        \
383           __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
384         } else {                                                 \
385           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));  \
386         }                                                        \
387       } else {                                                   \
388         if (HasRegisterInput(instr, 1)) {                        \
389           __ asm_instr(i.InputRegister(0), i.InputRegister(1));  \
390         } else {                                                 \
391           __ asm_instr(i.InputRegister(0), i.InputOperand(1));   \
392         }                                                        \
393       }                                                          \
394     }                                                            \
395   } while (0)
396 
397 #define ASSEMBLE_IEEE754_BINOP(name)                                     \
398   do {                                                                   \
399     /* Pass two doubles as arguments on the stack. */                    \
400     __ PrepareCallCFunction(4, eax);                                     \
401     __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0));   \
402     __ movsd(Operand(esp, 1 * kDoubleSize), i.InputDoubleRegister(1));   \
403     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 4); \
404     /* Return value is in st(0) on ia32. */                              \
405     /* Store it into the result register. */                             \
406     __ AllocateStackSpace(kDoubleSize);                                  \
407     __ fstp_d(Operand(esp, 0));                                          \
408     __ movsd(i.OutputDoubleRegister(), Operand(esp, 0));                 \
409     __ add(esp, Immediate(kDoubleSize));                                 \
410   } while (false)
411 
412 #define ASSEMBLE_IEEE754_UNOP(name)                                      \
413   do {                                                                   \
414     /* Pass one double as argument on the stack. */                      \
415     __ PrepareCallCFunction(2, eax);                                     \
416     __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0));   \
417     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
418     /* Return value is in st(0) on ia32. */                              \
419     /* Store it into the result register. */                             \
420     __ AllocateStackSpace(kDoubleSize);                                  \
421     __ fstp_d(Operand(esp, 0));                                          \
422     __ movsd(i.OutputDoubleRegister(), Operand(esp, 0));                 \
423     __ add(esp, Immediate(kDoubleSize));                                 \
424   } while (false)
425 
426 #define ASSEMBLE_BINOP(asm_instr)                             \
427   do {                                                        \
428     if (HasAddressingMode(instr)) {                           \
429       size_t index = 1;                                       \
430       Operand right = i.MemoryOperand(&index);                \
431       __ asm_instr(i.InputRegister(0), right);                \
432     } else {                                                  \
433       if (HasImmediateInput(instr, 1)) {                      \
434         __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
435       } else {                                                \
436         __ asm_instr(i.InputRegister(0), i.InputOperand(1));  \
437       }                                                       \
438     }                                                         \
439   } while (0)
440 
441 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
442   do {                                                          \
443     Label binop;                                                \
444     __ bind(&binop);                                            \
445     __ mov_inst(eax, i.MemoryOperand(1));                       \
446     __ Move(i.TempRegister(0), eax);                            \
447     __ bin_inst(i.TempRegister(0), i.InputRegister(0));         \
448     __ lock();                                                  \
449     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));     \
450     __ j(not_equal, &binop);                                    \
451   } while (false)
452 
453 #define ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2)                \
454   do {                                                          \
455     Label binop;                                                \
456     __ bind(&binop);                                            \
457     __ mov(eax, i.MemoryOperand(2));                            \
458     __ mov(edx, i.NextMemoryOperand(2));                        \
459     __ push(ebx);                                               \
460     frame_access_state()->IncreaseSPDelta(1);                   \
461     i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0)); \
462     __ push(i.InputRegister(1));                                \
463     __ instr1(ebx, eax);                                        \
464     __ instr2(i.InputRegister(1), edx);                         \
465     __ lock();                                                  \
466     __ cmpxchg8b(i.MemoryOperand(2));                           \
467     __ pop(i.InputRegister(1));                                 \
468     __ pop(ebx);                                                \
469     frame_access_state()->IncreaseSPDelta(-1);                  \
470     __ j(not_equal, &binop);                                    \
471   } while (false);
472 
473 #define ASSEMBLE_MOVX(mov_instr)                            \
474   do {                                                      \
475     if (HasAddressingMode(instr)) {                         \
476       __ mov_instr(i.OutputRegister(), i.MemoryOperand());  \
477     } else if (HasRegisterInput(instr, 0)) {                \
478       __ mov_instr(i.OutputRegister(), i.InputRegister(0)); \
479     } else {                                                \
480       __ mov_instr(i.OutputRegister(), i.InputOperand(0));  \
481     }                                                       \
482   } while (0)
483 
484 #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode)                         \
485   do {                                                               \
486     XMMRegister src0 = i.InputSimd128Register(0);                    \
487     Operand src1 = i.InputOperand(instr->InputCount() == 2 ? 1 : 0); \
488     if (CpuFeatures::IsSupported(AVX)) {                             \
489       CpuFeatureScope avx_scope(tasm(), AVX);                        \
490       __ v##opcode(i.OutputSimd128Register(), src0, src1);           \
491     } else {                                                         \
492       DCHECK_EQ(i.OutputSimd128Register(), src0);                    \
493       __ opcode(i.OutputSimd128Register(), src1);                    \
494     }                                                                \
495   } while (false)
496 
497 #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm)               \
498   if (CpuFeatures::IsSupported(AVX)) {                                 \
499     CpuFeatureScope avx_scope(tasm(), AVX);                            \
500     __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
501                  i.InputOperand(1), imm);                              \
502   } else {                                                             \
503     CpuFeatureScope sse_scope(tasm(), SSELevel);                       \
504     DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));   \
505     __ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm);      \
506   }
507 
508 #define ASSEMBLE_SIMD_ALL_TRUE(opcode)               \
509   do {                                               \
510     Register dst = i.OutputRegister();               \
511     Operand src = i.InputOperand(0);                 \
512     Register tmp = i.TempRegister(0);                \
513     XMMRegister tmp_simd = i.TempSimd128Register(1); \
514     __ mov(tmp, Immediate(1));                       \
515     __ xor_(dst, dst);                               \
516     __ Pxor(tmp_simd, tmp_simd);                     \
517     __ opcode(tmp_simd, src);                        \
518     __ Ptest(tmp_simd, tmp_simd);                    \
519     __ cmov(zero, dst, tmp);                         \
520   } while (false)
521 
522 #define ASSEMBLE_SIMD_SHIFT(opcode, width)             \
523   do {                                                 \
524     XMMRegister dst = i.OutputSimd128Register();       \
525     DCHECK_EQ(dst, i.InputSimd128Register(0));         \
526     if (HasImmediateInput(instr, 1)) {                 \
527       __ opcode(dst, dst, byte{i.InputInt##width(1)}); \
528     } else {                                           \
529       XMMRegister tmp = i.TempSimd128Register(0);      \
530       Register tmp_shift = i.TempRegister(1);          \
531       constexpr int mask = (1 << width) - 1;           \
532       __ mov(tmp_shift, i.InputRegister(1));           \
533       __ and_(tmp_shift, Immediate(mask));             \
534       __ Movd(tmp, tmp_shift);                         \
535       __ opcode(dst, dst, tmp);                        \
536     }                                                  \
537   } while (false)
538 
539 #define ASSEMBLE_SIMD_PINSR(OPCODE, CPU_FEATURE)             \
540   do {                                                       \
541     XMMRegister dst = i.OutputSimd128Register();             \
542     XMMRegister src = i.InputSimd128Register(0);             \
543     int8_t laneidx = i.InputInt8(1);                         \
544     if (HasAddressingMode(instr)) {                          \
545       if (CpuFeatures::IsSupported(AVX)) {                   \
546         CpuFeatureScope avx_scope(tasm(), AVX);              \
547         __ v##OPCODE(dst, src, i.MemoryOperand(2), laneidx); \
548       } else {                                               \
549         DCHECK_EQ(dst, src);                                 \
550         CpuFeatureScope sse_scope(tasm(), CPU_FEATURE);      \
551         __ OPCODE(dst, i.MemoryOperand(2), laneidx);         \
552       }                                                      \
553     } else {                                                 \
554       if (CpuFeatures::IsSupported(AVX)) {                   \
555         CpuFeatureScope avx_scope(tasm(), AVX);              \
556         __ v##OPCODE(dst, src, i.InputOperand(2), laneidx);  \
557       } else {                                               \
558         DCHECK_EQ(dst, src);                                 \
559         CpuFeatureScope sse_scope(tasm(), CPU_FEATURE);      \
560         __ OPCODE(dst, i.InputOperand(2), laneidx);          \
561       }                                                      \
562     }                                                        \
563   } while (false)
564 
565 
AssembleDeconstructFrame()566 void CodeGenerator::AssembleDeconstructFrame() {
567   __ mov(esp, ebp);
568   __ pop(ebp);
569 }
570 
AssemblePrepareTailCall()571 void CodeGenerator::AssemblePrepareTailCall() {
572   if (frame_access_state()->has_frame()) {
573     __ mov(ebp, MemOperand(ebp, 0));
574   }
575   frame_access_state()->SetFrameAccessToSP();
576 }
577 
578 namespace {
579 
AdjustStackPointerForTailCall(TurboAssembler * tasm,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)580 void AdjustStackPointerForTailCall(TurboAssembler* tasm,
581                                    FrameAccessState* state,
582                                    int new_slot_above_sp,
583                                    bool allow_shrinkage = true) {
584   int current_sp_offset = state->GetSPToFPSlotCount() +
585                           StandardFrameConstants::kFixedSlotCountAboveFp;
586   int stack_slot_delta = new_slot_above_sp - current_sp_offset;
587   if (stack_slot_delta > 0) {
588     tasm->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
589     state->IncreaseSPDelta(stack_slot_delta);
590   } else if (allow_shrinkage && stack_slot_delta < 0) {
591     tasm->add(esp, Immediate(-stack_slot_delta * kSystemPointerSize));
592     state->IncreaseSPDelta(stack_slot_delta);
593   }
594 }
595 
596 #ifdef DEBUG
VerifyOutputOfAtomicPairInstr(IA32OperandConverter * converter,const Instruction * instr)597 bool VerifyOutputOfAtomicPairInstr(IA32OperandConverter* converter,
598                                    const Instruction* instr) {
599   if (instr->OutputCount() == 2) {
600     return (converter->OutputRegister(0) == eax &&
601             converter->OutputRegister(1) == edx);
602   }
603   if (instr->OutputCount() == 1) {
604     return (converter->OutputRegister(0) == eax &&
605             converter->TempRegister(0) == edx) ||
606            (converter->OutputRegister(0) == edx &&
607             converter->TempRegister(0) == eax);
608   }
609   DCHECK_EQ(instr->OutputCount(), 0);
610   return (converter->TempRegister(0) == eax &&
611           converter->TempRegister(1) == edx);
612 }
613 #endif
614 
615 }  // namespace
616 
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_slot_offset)617 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
618                                               int first_unused_slot_offset) {
619   CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
620   ZoneVector<MoveOperands*> pushes(zone());
621   GetPushCompatibleMoves(instr, flags, &pushes);
622 
623   if (!pushes.empty() &&
624       (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
625        first_unused_slot_offset)) {
626     IA32OperandConverter g(this, instr);
627     for (auto move : pushes) {
628       LocationOperand destination_location(
629           LocationOperand::cast(move->destination()));
630       InstructionOperand source(move->source());
631       AdjustStackPointerForTailCall(tasm(), frame_access_state(),
632                                     destination_location.index());
633       if (source.IsStackSlot()) {
634         LocationOperand source_location(LocationOperand::cast(source));
635         __ push(g.SlotToOperand(source_location.index()));
636       } else if (source.IsRegister()) {
637         LocationOperand source_location(LocationOperand::cast(source));
638         __ push(source_location.GetRegister());
639       } else if (source.IsImmediate()) {
640         __ Push(Immediate(ImmediateOperand::cast(source).inline_int32_value()));
641       } else {
642         // Pushes of non-scalar data types is not supported.
643         UNIMPLEMENTED();
644       }
645       frame_access_state()->IncreaseSPDelta(1);
646       move->Eliminate();
647     }
648   }
649   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
650                                 first_unused_slot_offset, false);
651 }
652 
AssembleTailCallAfterGap(Instruction * instr,int first_unused_slot_offset)653 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
654                                              int first_unused_slot_offset) {
655   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
656                                 first_unused_slot_offset);
657 }
658 
659 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()660 void CodeGenerator::AssembleCodeStartRegisterCheck() {
661   __ push(eax);  // Push eax so we can use it as a scratch register.
662   __ ComputeCodeStartAddress(eax);
663   __ cmp(eax, kJavaScriptCallCodeStartRegister);
664   __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
665   __ pop(eax);  // Restore eax.
666 }
667 
668 // Check if the code object is marked for deoptimization. If it is, then it
669 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
670 // to:
671 //    1. read from memory the word that contains that bit, which can be found in
672 //       the flags in the referenced {CodeDataContainer} object;
673 //    2. test kMarkedForDeoptimizationBit in those flags; and
674 //    3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()675 void CodeGenerator::BailoutIfDeoptimized() {
676   int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
677   __ push(eax);  // Push eax so we can use it as a scratch register.
678   __ mov(eax, Operand(kJavaScriptCallCodeStartRegister, offset));
679   __ test(FieldOperand(eax, CodeDataContainer::kKindSpecificFlagsOffset),
680           Immediate(1 << Code::kMarkedForDeoptimizationBit));
681   __ pop(eax);  // Restore eax.
682 
683   Label skip;
684   __ j(zero, &skip, Label::kNear);
685   __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
686           RelocInfo::CODE_TARGET);
687   __ bind(&skip);
688 }
689 
690 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)691 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
692     Instruction* instr) {
693   IA32OperandConverter i(this, instr);
694   InstructionCode opcode = instr->opcode();
695   ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
696   switch (arch_opcode) {
697     case kArchCallCodeObject: {
698       InstructionOperand* op = instr->InputAt(0);
699       if (op->IsImmediate()) {
700         Handle<Code> code = i.InputCode(0);
701         __ Call(code, RelocInfo::CODE_TARGET);
702       } else {
703         Register reg = i.InputRegister(0);
704         DCHECK_IMPLIES(
705             instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
706             reg == kJavaScriptCallCodeStartRegister);
707         __ LoadCodeObjectEntry(reg, reg);
708         __ call(reg);
709       }
710       RecordCallPosition(instr);
711       frame_access_state()->ClearSPDelta();
712       break;
713     }
714     case kArchCallBuiltinPointer: {
715       DCHECK(!HasImmediateInput(instr, 0));
716       Register builtin_index = i.InputRegister(0);
717       __ CallBuiltinByIndex(builtin_index);
718       RecordCallPosition(instr);
719       frame_access_state()->ClearSPDelta();
720       break;
721     }
722 #if V8_ENABLE_WEBASSEMBLY
723     case kArchCallWasmFunction: {
724       if (HasImmediateInput(instr, 0)) {
725         Constant constant = i.ToConstant(instr->InputAt(0));
726         Address wasm_code = static_cast<Address>(constant.ToInt32());
727         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
728           __ wasm_call(wasm_code, constant.rmode());
729         } else {
730           __ call(wasm_code, constant.rmode());
731         }
732       } else {
733         __ call(i.InputRegister(0));
734       }
735       RecordCallPosition(instr);
736       frame_access_state()->ClearSPDelta();
737       break;
738     }
739     case kArchTailCallWasm: {
740       if (HasImmediateInput(instr, 0)) {
741         Constant constant = i.ToConstant(instr->InputAt(0));
742         Address wasm_code = static_cast<Address>(constant.ToInt32());
743         __ jmp(wasm_code, constant.rmode());
744       } else {
745         __ jmp(i.InputRegister(0));
746       }
747       frame_access_state()->ClearSPDelta();
748       frame_access_state()->SetFrameAccessToDefault();
749       break;
750     }
751 #endif  // V8_ENABLE_WEBASSEMBLY
752     case kArchTailCallCodeObject: {
753       if (HasImmediateInput(instr, 0)) {
754         Handle<Code> code = i.InputCode(0);
755         __ Jump(code, RelocInfo::CODE_TARGET);
756       } else {
757         Register reg = i.InputRegister(0);
758         DCHECK_IMPLIES(
759             instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
760             reg == kJavaScriptCallCodeStartRegister);
761         __ LoadCodeObjectEntry(reg, reg);
762         __ jmp(reg);
763       }
764       frame_access_state()->ClearSPDelta();
765       frame_access_state()->SetFrameAccessToDefault();
766       break;
767     }
768     case kArchTailCallAddress: {
769       CHECK(!HasImmediateInput(instr, 0));
770       Register reg = i.InputRegister(0);
771       DCHECK_IMPLIES(
772           instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
773           reg == kJavaScriptCallCodeStartRegister);
774       __ jmp(reg);
775       frame_access_state()->ClearSPDelta();
776       frame_access_state()->SetFrameAccessToDefault();
777       break;
778     }
779     case kArchCallJSFunction: {
780       Register func = i.InputRegister(0);
781       if (FLAG_debug_code) {
782         // Check the function's context matches the context argument.
783         __ cmp(esi, FieldOperand(func, JSFunction::kContextOffset));
784         __ Assert(equal, AbortReason::kWrongFunctionContext);
785       }
786       static_assert(kJavaScriptCallCodeStartRegister == ecx, "ABI mismatch");
787       __ mov(ecx, FieldOperand(func, JSFunction::kCodeOffset));
788       __ CallCodeObject(ecx);
789       RecordCallPosition(instr);
790       frame_access_state()->ClearSPDelta();
791       break;
792     }
793     case kArchPrepareCallCFunction: {
794       // Frame alignment requires using FP-relative frame addressing.
795       frame_access_state()->SetFrameAccessToFP();
796       int const num_gp_parameters = ParamField::decode(instr->opcode());
797       int const num_fp_parameters = FPParamField::decode(instr->opcode());
798       __ PrepareCallCFunction(num_gp_parameters + num_fp_parameters,
799                               i.TempRegister(0));
800       break;
801     }
802     case kArchSaveCallerRegisters: {
803       fp_mode_ =
804           static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
805       DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
806              fp_mode_ == SaveFPRegsMode::kSave);
807       // kReturnRegister0 should have been saved before entering the stub.
808       int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
809       DCHECK(IsAligned(bytes, kSystemPointerSize));
810       DCHECK_EQ(0, frame_access_state()->sp_delta());
811       frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
812       DCHECK(!caller_registers_saved_);
813       caller_registers_saved_ = true;
814       break;
815     }
816     case kArchRestoreCallerRegisters: {
817       DCHECK(fp_mode_ ==
818              static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
819       DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
820              fp_mode_ == SaveFPRegsMode::kSave);
821       // Don't overwrite the returned value.
822       int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
823       frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
824       DCHECK_EQ(0, frame_access_state()->sp_delta());
825       DCHECK(caller_registers_saved_);
826       caller_registers_saved_ = false;
827       break;
828     }
829     case kArchPrepareTailCall:
830       AssemblePrepareTailCall();
831       break;
832     case kArchCallCFunction: {
833       int const num_parameters = MiscField::decode(instr->opcode());
834       Label return_location;
835 #if V8_ENABLE_WEBASSEMBLY
836       if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
837         // Put the return address in a stack slot.
838         Register scratch = eax;
839         __ push(scratch);
840         __ PushPC();
841         int pc = __ pc_offset();
842         __ pop(scratch);
843         __ sub(scratch, Immediate(pc + Code::kHeaderSize - kHeapObjectTag));
844         __ add(scratch, Immediate::CodeRelativeOffset(&return_location));
845         __ mov(MemOperand(ebp, WasmExitFrameConstants::kCallingPCOffset),
846                scratch);
847         __ pop(scratch);
848       }
849 #endif  // V8_ENABLE_WEBASSEMBLY
850       if (HasImmediateInput(instr, 0)) {
851         ExternalReference ref = i.InputExternalReference(0);
852         __ CallCFunction(ref, num_parameters);
853       } else {
854         Register func = i.InputRegister(0);
855         __ CallCFunction(func, num_parameters);
856       }
857       __ bind(&return_location);
858 #if V8_ENABLE_WEBASSEMBLY
859       if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
860         RecordSafepoint(instr->reference_map());
861       }
862 #endif  // V8_ENABLE_WEBASSEMBLY
863       frame_access_state()->SetFrameAccessToDefault();
864       // Ideally, we should decrement SP delta to match the change of stack
865       // pointer in CallCFunction. However, for certain architectures (e.g.
866       // ARM), there may be more strict alignment requirement, causing old SP
867       // to be saved on the stack. In those cases, we can not calculate the SP
868       // delta statically.
869       frame_access_state()->ClearSPDelta();
870       if (caller_registers_saved_) {
871         // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
872         // Here, we assume the sequence to be:
873         //   kArchSaveCallerRegisters;
874         //   kArchCallCFunction;
875         //   kArchRestoreCallerRegisters;
876         int bytes =
877             __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
878         frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
879       }
880       break;
881     }
882     case kArchJmp:
883       AssembleArchJump(i.InputRpo(0));
884       break;
885     case kArchBinarySearchSwitch:
886       AssembleArchBinarySearchSwitch(instr);
887       break;
888     case kArchTableSwitch:
889       AssembleArchTableSwitch(instr);
890       break;
891     case kArchComment:
892       __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
893       break;
894     case kArchAbortCSADcheck:
895       DCHECK(i.InputRegister(0) == edx);
896       {
897         // We don't actually want to generate a pile of code for this, so just
898         // claim there is a stack frame, without generating one.
899         FrameScope scope(tasm(), StackFrame::NO_FRAME_TYPE);
900         __ Call(isolate()->builtins()->code_handle(Builtin::kAbortCSADcheck),
901                 RelocInfo::CODE_TARGET);
902       }
903       __ int3();
904       break;
905     case kArchDebugBreak:
906       __ DebugBreak();
907       break;
908     case kArchNop:
909     case kArchThrowTerminator:
910       // don't emit code for nops.
911       break;
912     case kArchDeoptimize: {
913       DeoptimizationExit* exit =
914           BuildTranslation(instr, -1, 0, 0, OutputFrameStateCombine::Ignore());
915       __ jmp(exit->label());
916       break;
917     }
918     case kArchRet:
919       AssembleReturn(instr->InputAt(0));
920       break;
921     case kArchFramePointer:
922       __ mov(i.OutputRegister(), ebp);
923       break;
924     case kArchParentFramePointer:
925       if (frame_access_state()->has_frame()) {
926         __ mov(i.OutputRegister(), Operand(ebp, 0));
927       } else {
928         __ mov(i.OutputRegister(), ebp);
929       }
930       break;
931     case kArchStackPointerGreaterThan: {
932       // Potentially apply an offset to the current stack pointer before the
933       // comparison to consider the size difference of an optimized frame versus
934       // the contained unoptimized frames.
935       Register lhs_register = esp;
936       uint32_t offset;
937 
938       if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
939         lhs_register = i.TempRegister(0);
940         __ lea(lhs_register, Operand(esp, -1 * static_cast<int32_t>(offset)));
941       }
942 
943       constexpr size_t kValueIndex = 0;
944       if (HasAddressingMode(instr)) {
945         __ cmp(lhs_register, i.MemoryOperand(kValueIndex));
946       } else {
947         __ cmp(lhs_register, i.InputRegister(kValueIndex));
948       }
949       break;
950     }
951     case kArchStackCheckOffset:
952       __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
953       break;
954     case kArchTruncateDoubleToI: {
955       auto result = i.OutputRegister();
956       auto input = i.InputDoubleRegister(0);
957       auto ool = zone()->New<OutOfLineTruncateDoubleToI>(
958           this, result, input, DetermineStubCallMode());
959       __ cvttsd2si(result, Operand(input));
960       __ cmp(result, 1);
961       __ j(overflow, ool->entry());
962       __ bind(ool->exit());
963       break;
964     }
965     case kArchStoreWithWriteBarrier:  // Fall thrugh.
966     case kArchAtomicStoreWithWriteBarrier: {
967       RecordWriteMode mode =
968           static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
969       Register object = i.InputRegister(0);
970       size_t index = 0;
971       Operand operand = i.MemoryOperand(&index);
972       Register value = i.InputRegister(index);
973       Register scratch0 = i.TempRegister(0);
974       Register scratch1 = i.TempRegister(1);
975 
976       if (FLAG_debug_code) {
977         // Checking that |value| is not a cleared weakref: our write barrier
978         // does not support that for now.
979         __ cmp(value, Immediate(kClearedWeakHeapObjectLower32));
980         __ Check(not_equal, AbortReason::kOperandIsCleared);
981       }
982 
983       auto ool = zone()->New<OutOfLineRecordWrite>(this, object, operand, value,
984                                                    scratch0, scratch1, mode,
985                                                    DetermineStubCallMode());
986       if (arch_opcode == kArchStoreWithWriteBarrier) {
987         __ mov(operand, value);
988       } else {
989         __ mov(scratch0, value);
990         __ xchg(scratch0, operand);
991       }
992       if (mode > RecordWriteMode::kValueIsPointer) {
993         __ JumpIfSmi(value, ool->exit());
994       }
995       __ CheckPageFlag(object, scratch0,
996                        MemoryChunk::kPointersFromHereAreInterestingMask,
997                        not_zero, ool->entry());
998       __ bind(ool->exit());
999       break;
1000     }
1001     case kArchStackSlot: {
1002       FrameOffset offset =
1003           frame_access_state()->GetFrameOffset(i.InputInt32(0));
1004       Register base = offset.from_stack_pointer() ? esp : ebp;
1005       __ lea(i.OutputRegister(), Operand(base, offset.offset()));
1006       break;
1007     }
1008     case kIeee754Float64Acos:
1009       ASSEMBLE_IEEE754_UNOP(acos);
1010       break;
1011     case kIeee754Float64Acosh:
1012       ASSEMBLE_IEEE754_UNOP(acosh);
1013       break;
1014     case kIeee754Float64Asin:
1015       ASSEMBLE_IEEE754_UNOP(asin);
1016       break;
1017     case kIeee754Float64Asinh:
1018       ASSEMBLE_IEEE754_UNOP(asinh);
1019       break;
1020     case kIeee754Float64Atan:
1021       ASSEMBLE_IEEE754_UNOP(atan);
1022       break;
1023     case kIeee754Float64Atanh:
1024       ASSEMBLE_IEEE754_UNOP(atanh);
1025       break;
1026     case kIeee754Float64Atan2:
1027       ASSEMBLE_IEEE754_BINOP(atan2);
1028       break;
1029     case kIeee754Float64Cbrt:
1030       ASSEMBLE_IEEE754_UNOP(cbrt);
1031       break;
1032     case kIeee754Float64Cos:
1033       ASSEMBLE_IEEE754_UNOP(cos);
1034       break;
1035     case kIeee754Float64Cosh:
1036       ASSEMBLE_IEEE754_UNOP(cosh);
1037       break;
1038     case kIeee754Float64Expm1:
1039       ASSEMBLE_IEEE754_UNOP(expm1);
1040       break;
1041     case kIeee754Float64Exp:
1042       ASSEMBLE_IEEE754_UNOP(exp);
1043       break;
1044     case kIeee754Float64Log:
1045       ASSEMBLE_IEEE754_UNOP(log);
1046       break;
1047     case kIeee754Float64Log1p:
1048       ASSEMBLE_IEEE754_UNOP(log1p);
1049       break;
1050     case kIeee754Float64Log2:
1051       ASSEMBLE_IEEE754_UNOP(log2);
1052       break;
1053     case kIeee754Float64Log10:
1054       ASSEMBLE_IEEE754_UNOP(log10);
1055       break;
1056     case kIeee754Float64Pow:
1057       ASSEMBLE_IEEE754_BINOP(pow);
1058       break;
1059     case kIeee754Float64Sin:
1060       ASSEMBLE_IEEE754_UNOP(sin);
1061       break;
1062     case kIeee754Float64Sinh:
1063       ASSEMBLE_IEEE754_UNOP(sinh);
1064       break;
1065     case kIeee754Float64Tan:
1066       ASSEMBLE_IEEE754_UNOP(tan);
1067       break;
1068     case kIeee754Float64Tanh:
1069       ASSEMBLE_IEEE754_UNOP(tanh);
1070       break;
1071     case kIA32Add:
1072       ASSEMBLE_BINOP(add);
1073       break;
1074     case kIA32And:
1075       ASSEMBLE_BINOP(and_);
1076       break;
1077     case kIA32Cmp:
1078       ASSEMBLE_COMPARE(cmp);
1079       break;
1080     case kIA32Cmp16:
1081       ASSEMBLE_COMPARE(cmpw);
1082       break;
1083     case kIA32Cmp8:
1084       ASSEMBLE_COMPARE(cmpb);
1085       break;
1086     case kIA32Test:
1087       ASSEMBLE_COMPARE(test);
1088       break;
1089     case kIA32Test16:
1090       ASSEMBLE_COMPARE(test_w);
1091       break;
1092     case kIA32Test8:
1093       ASSEMBLE_COMPARE(test_b);
1094       break;
1095     case kIA32Imul:
1096       if (HasImmediateInput(instr, 1)) {
1097         __ imul(i.OutputRegister(), i.InputOperand(0), i.InputInt32(1));
1098       } else {
1099         __ imul(i.OutputRegister(), i.InputOperand(1));
1100       }
1101       break;
1102     case kIA32ImulHigh:
1103       __ imul(i.InputRegister(1));
1104       break;
1105     case kIA32UmulHigh:
1106       __ mul(i.InputRegister(1));
1107       break;
1108     case kIA32Idiv:
1109       __ cdq();
1110       __ idiv(i.InputOperand(1));
1111       break;
1112     case kIA32Udiv:
1113       __ Move(edx, Immediate(0));
1114       __ div(i.InputOperand(1));
1115       break;
1116     case kIA32Not:
1117       __ not_(i.OutputOperand());
1118       break;
1119     case kIA32Neg:
1120       __ neg(i.OutputOperand());
1121       break;
1122     case kIA32Or:
1123       ASSEMBLE_BINOP(or_);
1124       break;
1125     case kIA32Xor:
1126       ASSEMBLE_BINOP(xor_);
1127       break;
1128     case kIA32Sub:
1129       ASSEMBLE_BINOP(sub);
1130       break;
1131     case kIA32Shl:
1132       if (HasImmediateInput(instr, 1)) {
1133         __ shl(i.OutputOperand(), i.InputInt5(1));
1134       } else {
1135         __ shl_cl(i.OutputOperand());
1136       }
1137       break;
1138     case kIA32Shr:
1139       if (HasImmediateInput(instr, 1)) {
1140         __ shr(i.OutputOperand(), i.InputInt5(1));
1141       } else {
1142         __ shr_cl(i.OutputOperand());
1143       }
1144       break;
1145     case kIA32Sar:
1146       if (HasImmediateInput(instr, 1)) {
1147         __ sar(i.OutputOperand(), i.InputInt5(1));
1148       } else {
1149         __ sar_cl(i.OutputOperand());
1150       }
1151       break;
1152     case kIA32AddPair: {
1153       // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
1154       // i.InputRegister(1) ... left high word.
1155       // i.InputRegister(2) ... right low word.
1156       // i.InputRegister(3) ... right high word.
1157       bool use_temp = false;
1158       if ((HasRegisterInput(instr, 1) &&
1159            i.OutputRegister(0).code() == i.InputRegister(1).code()) ||
1160           i.OutputRegister(0).code() == i.InputRegister(3).code()) {
1161         // We cannot write to the output register directly, because it would
1162         // overwrite an input for adc. We have to use the temp register.
1163         use_temp = true;
1164         __ Move(i.TempRegister(0), i.InputRegister(0));
1165         __ add(i.TempRegister(0), i.InputRegister(2));
1166       } else {
1167         __ add(i.OutputRegister(0), i.InputRegister(2));
1168       }
1169       i.MoveInstructionOperandToRegister(i.OutputRegister(1),
1170                                          instr->InputAt(1));
1171       __ adc(i.OutputRegister(1), Operand(i.InputRegister(3)));
1172       if (use_temp) {
1173         __ Move(i.OutputRegister(0), i.TempRegister(0));
1174       }
1175       break;
1176     }
1177     case kIA32SubPair: {
1178       // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
1179       // i.InputRegister(1) ... left high word.
1180       // i.InputRegister(2) ... right low word.
1181       // i.InputRegister(3) ... right high word.
1182       bool use_temp = false;
1183       if ((HasRegisterInput(instr, 1) &&
1184            i.OutputRegister(0).code() == i.InputRegister(1).code()) ||
1185           i.OutputRegister(0).code() == i.InputRegister(3).code()) {
1186         // We cannot write to the output register directly, because it would
1187         // overwrite an input for adc. We have to use the temp register.
1188         use_temp = true;
1189         __ Move(i.TempRegister(0), i.InputRegister(0));
1190         __ sub(i.TempRegister(0), i.InputRegister(2));
1191       } else {
1192         __ sub(i.OutputRegister(0), i.InputRegister(2));
1193       }
1194       i.MoveInstructionOperandToRegister(i.OutputRegister(1),
1195                                          instr->InputAt(1));
1196       __ sbb(i.OutputRegister(1), Operand(i.InputRegister(3)));
1197       if (use_temp) {
1198         __ Move(i.OutputRegister(0), i.TempRegister(0));
1199       }
1200       break;
1201     }
1202     case kIA32MulPair: {
1203       __ imul(i.OutputRegister(1), i.InputOperand(0));
1204       i.MoveInstructionOperandToRegister(i.TempRegister(0), instr->InputAt(1));
1205       __ imul(i.TempRegister(0), i.InputOperand(2));
1206       __ add(i.OutputRegister(1), i.TempRegister(0));
1207       __ mov(i.OutputRegister(0), i.InputOperand(0));
1208       // Multiplies the low words and stores them in eax and edx.
1209       __ mul(i.InputRegister(2));
1210       __ add(i.OutputRegister(1), i.TempRegister(0));
1211 
1212       break;
1213     }
1214     case kIA32ShlPair:
1215       if (HasImmediateInput(instr, 2)) {
1216         __ ShlPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1217       } else {
1218         // Shift has been loaded into CL by the register allocator.
1219         __ ShlPair_cl(i.InputRegister(1), i.InputRegister(0));
1220       }
1221       break;
1222     case kIA32ShrPair:
1223       if (HasImmediateInput(instr, 2)) {
1224         __ ShrPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1225       } else {
1226         // Shift has been loaded into CL by the register allocator.
1227         __ ShrPair_cl(i.InputRegister(1), i.InputRegister(0));
1228       }
1229       break;
1230     case kIA32SarPair:
1231       if (HasImmediateInput(instr, 2)) {
1232         __ SarPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1233       } else {
1234         // Shift has been loaded into CL by the register allocator.
1235         __ SarPair_cl(i.InputRegister(1), i.InputRegister(0));
1236       }
1237       break;
1238     case kIA32Rol:
1239       if (HasImmediateInput(instr, 1)) {
1240         __ rol(i.OutputOperand(), i.InputInt5(1));
1241       } else {
1242         __ rol_cl(i.OutputOperand());
1243       }
1244       break;
1245     case kIA32Ror:
1246       if (HasImmediateInput(instr, 1)) {
1247         __ ror(i.OutputOperand(), i.InputInt5(1));
1248       } else {
1249         __ ror_cl(i.OutputOperand());
1250       }
1251       break;
1252     case kIA32Lzcnt:
1253       __ Lzcnt(i.OutputRegister(), i.InputOperand(0));
1254       break;
1255     case kIA32Tzcnt:
1256       __ Tzcnt(i.OutputRegister(), i.InputOperand(0));
1257       break;
1258     case kIA32Popcnt:
1259       __ Popcnt(i.OutputRegister(), i.InputOperand(0));
1260       break;
1261     case kIA32Bswap:
1262       __ bswap(i.OutputRegister());
1263       break;
1264     case kIA32MFence:
1265       __ mfence();
1266       break;
1267     case kIA32LFence:
1268       __ lfence();
1269       break;
1270     case kIA32Float32Cmp:
1271       __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1272       break;
1273     case kIA32Float32Sqrt:
1274       __ Sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
1275       break;
1276     case kIA32Float32Round: {
1277       CpuFeatureScope sse_scope(tasm(), SSE4_1);
1278       RoundingMode const mode =
1279           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1280       __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1281       break;
1282     }
1283     case kIA32Float64Cmp:
1284       __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1285       break;
1286     case kIA32Float32Max: {
1287       Label compare_swap, done_compare;
1288       if (instr->InputAt(1)->IsFPRegister()) {
1289         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1290       } else {
1291         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1292       }
1293       auto ool =
1294           zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1295       __ j(parity_even, ool->entry());
1296       __ j(above, &done_compare, Label::kNear);
1297       __ j(below, &compare_swap, Label::kNear);
1298       __ Movmskps(i.TempRegister(0), i.InputDoubleRegister(0));
1299       __ test(i.TempRegister(0), Immediate(1));
1300       __ j(zero, &done_compare, Label::kNear);
1301       __ bind(&compare_swap);
1302       if (instr->InputAt(1)->IsFPRegister()) {
1303         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1304       } else {
1305         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1306       }
1307       __ bind(&done_compare);
1308       __ bind(ool->exit());
1309       break;
1310     }
1311 
1312     case kIA32Float64Max: {
1313       Label compare_swap, done_compare;
1314       if (instr->InputAt(1)->IsFPRegister()) {
1315         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1316       } else {
1317         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1318       }
1319       auto ool =
1320           zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1321       __ j(parity_even, ool->entry());
1322       __ j(above, &done_compare, Label::kNear);
1323       __ j(below, &compare_swap, Label::kNear);
1324       __ Movmskpd(i.TempRegister(0), i.InputDoubleRegister(0));
1325       __ test(i.TempRegister(0), Immediate(1));
1326       __ j(zero, &done_compare, Label::kNear);
1327       __ bind(&compare_swap);
1328       if (instr->InputAt(1)->IsFPRegister()) {
1329         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1330       } else {
1331         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1332       }
1333       __ bind(&done_compare);
1334       __ bind(ool->exit());
1335       break;
1336     }
1337     case kIA32Float32Min: {
1338       Label compare_swap, done_compare;
1339       if (instr->InputAt(1)->IsFPRegister()) {
1340         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1341       } else {
1342         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1343       }
1344       auto ool =
1345           zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1346       __ j(parity_even, ool->entry());
1347       __ j(below, &done_compare, Label::kNear);
1348       __ j(above, &compare_swap, Label::kNear);
1349       if (instr->InputAt(1)->IsFPRegister()) {
1350         __ Movmskps(i.TempRegister(0), i.InputDoubleRegister(1));
1351       } else {
1352         __ Movss(kScratchDoubleReg, i.InputOperand(1));
1353         __ Movmskps(i.TempRegister(0), kScratchDoubleReg);
1354       }
1355       __ test(i.TempRegister(0), Immediate(1));
1356       __ j(zero, &done_compare, Label::kNear);
1357       __ bind(&compare_swap);
1358       if (instr->InputAt(1)->IsFPRegister()) {
1359         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1360       } else {
1361         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1362       }
1363       __ bind(&done_compare);
1364       __ bind(ool->exit());
1365       break;
1366     }
1367     case kIA32Float64Min: {
1368       Label compare_swap, done_compare;
1369       if (instr->InputAt(1)->IsFPRegister()) {
1370         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1371       } else {
1372         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1373       }
1374       auto ool =
1375           zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1376       __ j(parity_even, ool->entry());
1377       __ j(below, &done_compare, Label::kNear);
1378       __ j(above, &compare_swap, Label::kNear);
1379       if (instr->InputAt(1)->IsFPRegister()) {
1380         __ Movmskpd(i.TempRegister(0), i.InputDoubleRegister(1));
1381       } else {
1382         __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1383         __ Movmskpd(i.TempRegister(0), kScratchDoubleReg);
1384       }
1385       __ test(i.TempRegister(0), Immediate(1));
1386       __ j(zero, &done_compare, Label::kNear);
1387       __ bind(&compare_swap);
1388       if (instr->InputAt(1)->IsFPRegister()) {
1389         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1390       } else {
1391         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1392       }
1393       __ bind(&done_compare);
1394       __ bind(ool->exit());
1395       break;
1396     }
1397     case kIA32Float64Mod: {
1398       Register tmp = i.TempRegister(1);
1399       __ mov(tmp, esp);
1400       __ AllocateStackSpace(kDoubleSize);
1401       __ and_(esp, -8);  // align to 8 byte boundary.
1402       // Move values to st(0) and st(1).
1403       __ Movsd(Operand(esp, 0), i.InputDoubleRegister(1));
1404       __ fld_d(Operand(esp, 0));
1405       __ Movsd(Operand(esp, 0), i.InputDoubleRegister(0));
1406       __ fld_d(Operand(esp, 0));
1407       // Loop while fprem isn't done.
1408       Label mod_loop;
1409       __ bind(&mod_loop);
1410       // This instruction traps on all kinds of inputs, but we are assuming the
1411       // floating point control word is set to ignore them all.
1412       __ fprem();
1413       // fnstsw_ax clobbers eax.
1414       DCHECK_EQ(eax, i.TempRegister(0));
1415       __ fnstsw_ax();
1416       __ sahf();
1417       __ j(parity_even, &mod_loop);
1418       // Move output to stack and clean up.
1419       __ fstp(1);
1420       __ fstp_d(Operand(esp, 0));
1421       __ Movsd(i.OutputDoubleRegister(), Operand(esp, 0));
1422       __ mov(esp, tmp);
1423       break;
1424     }
1425     case kIA32Float64Sqrt:
1426       __ Sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
1427       break;
1428     case kIA32Float64Round: {
1429       RoundingMode const mode =
1430           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1431       __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1432       break;
1433     }
1434     case kIA32Float32ToFloat64:
1435       __ Cvtss2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1436       break;
1437     case kIA32Float64ToFloat32:
1438       __ Cvtsd2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1439       break;
1440     case kIA32Float32ToInt32:
1441       __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1442       break;
1443     case kIA32Float32ToUint32:
1444       __ Cvttss2ui(i.OutputRegister(), i.InputOperand(0),
1445                    i.TempSimd128Register(0));
1446       break;
1447     case kIA32Float64ToInt32:
1448       __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1449       break;
1450     case kIA32Float64ToUint32:
1451       __ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0),
1452                    i.TempSimd128Register(0));
1453       break;
1454     case kSSEInt32ToFloat32:
1455       // Calling Cvtsi2ss (which does a xor) regresses some benchmarks.
1456       __ cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1457       break;
1458     case kIA32Uint32ToFloat32:
1459       __ Cvtui2ss(i.OutputDoubleRegister(), i.InputOperand(0),
1460                   i.TempRegister(0));
1461       break;
1462     case kSSEInt32ToFloat64:
1463       // Calling Cvtsi2sd (which does a xor) regresses some benchmarks.
1464       __ cvtsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1465       break;
1466     case kIA32Uint32ToFloat64:
1467       __ Cvtui2sd(i.OutputDoubleRegister(), i.InputOperand(0),
1468                   i.TempRegister(0));
1469       break;
1470     case kIA32Float64ExtractLowWord32:
1471       if (instr->InputAt(0)->IsFPStackSlot()) {
1472         __ mov(i.OutputRegister(), i.InputOperand(0));
1473       } else {
1474         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1475       }
1476       break;
1477     case kIA32Float64ExtractHighWord32:
1478       if (instr->InputAt(0)->IsFPStackSlot()) {
1479         __ mov(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1480       } else {
1481         __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1482       }
1483       break;
1484     case kIA32Float64InsertLowWord32:
1485       __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1486       break;
1487     case kIA32Float64InsertHighWord32:
1488       __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1489       break;
1490     case kIA32Float64LoadLowWord32:
1491       __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
1492       break;
1493     case kFloat32Add: {
1494       __ Addss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1495                i.InputOperand(1));
1496       break;
1497     }
1498     case kFloat32Sub: {
1499       __ Subss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1500                i.InputOperand(1));
1501       break;
1502     }
1503     case kFloat32Mul: {
1504       __ Mulss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1505                i.InputOperand(1));
1506       break;
1507     }
1508     case kFloat32Div: {
1509       __ Divss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1510                i.InputOperand(1));
1511       // Don't delete this mov. It may improve performance on some CPUs,
1512       // when there is a (v)mulss depending on the result.
1513       __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1514       break;
1515     }
1516     case kFloat64Add: {
1517       __ Addsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1518                i.InputOperand(1));
1519       break;
1520     }
1521     case kFloat64Sub: {
1522       __ Subsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1523                i.InputOperand(1));
1524       break;
1525     }
1526     case kFloat64Mul: {
1527       __ Mulsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1528                i.InputOperand(1));
1529       break;
1530     }
1531     case kFloat64Div: {
1532       __ Divsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1533                i.InputOperand(1));
1534       // Don't delete this mov. It may improve performance on some CPUs,
1535       // when there is a (v)mulsd depending on the result.
1536       __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1537       break;
1538     }
1539     case kFloat32Abs: {
1540       __ Absps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1541                i.TempRegister(0));
1542       break;
1543     }
1544     case kFloat32Neg: {
1545       __ Negps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1546                i.TempRegister(0));
1547       break;
1548     }
1549     case kFloat64Abs: {
1550       __ Abspd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1551                i.TempRegister(0));
1552       break;
1553     }
1554     case kFloat64Neg: {
1555       __ Negpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1556                i.TempRegister(0));
1557       break;
1558     }
1559     case kIA32Float64SilenceNaN:
1560       __ Xorps(kScratchDoubleReg, kScratchDoubleReg);
1561       __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1562       break;
1563     case kIA32Movsxbl:
1564       ASSEMBLE_MOVX(movsx_b);
1565       break;
1566     case kIA32Movzxbl:
1567       ASSEMBLE_MOVX(movzx_b);
1568       break;
1569     case kIA32Movb: {
1570       size_t index = 0;
1571       Operand operand = i.MemoryOperand(&index);
1572       if (HasImmediateInput(instr, index)) {
1573         __ mov_b(operand, i.InputInt8(index));
1574       } else {
1575         __ mov_b(operand, i.InputRegister(index));
1576       }
1577       break;
1578     }
1579     case kIA32Movsxwl:
1580       ASSEMBLE_MOVX(movsx_w);
1581       break;
1582     case kIA32Movzxwl:
1583       ASSEMBLE_MOVX(movzx_w);
1584       break;
1585     case kIA32Movw: {
1586       size_t index = 0;
1587       Operand operand = i.MemoryOperand(&index);
1588       if (HasImmediateInput(instr, index)) {
1589         __ mov_w(operand, i.InputInt16(index));
1590       } else {
1591         __ mov_w(operand, i.InputRegister(index));
1592       }
1593       break;
1594     }
1595     case kIA32Movl:
1596       if (instr->HasOutput()) {
1597         __ mov(i.OutputRegister(), i.MemoryOperand());
1598       } else {
1599         size_t index = 0;
1600         Operand operand = i.MemoryOperand(&index);
1601         if (HasImmediateInput(instr, index)) {
1602           __ mov(operand, i.InputImmediate(index));
1603         } else {
1604           __ mov(operand, i.InputRegister(index));
1605         }
1606       }
1607       break;
1608     case kIA32Movsd:
1609       if (instr->HasOutput()) {
1610         __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
1611       } else {
1612         size_t index = 0;
1613         Operand operand = i.MemoryOperand(&index);
1614         __ Movsd(operand, i.InputDoubleRegister(index));
1615       }
1616       break;
1617     case kIA32Movss:
1618       if (instr->HasOutput()) {
1619         __ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
1620       } else {
1621         size_t index = 0;
1622         Operand operand = i.MemoryOperand(&index);
1623         __ Movss(operand, i.InputDoubleRegister(index));
1624       }
1625       break;
1626     case kIA32Movdqu:
1627       if (instr->HasOutput()) {
1628         __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
1629       } else {
1630         size_t index = 0;
1631         Operand operand = i.MemoryOperand(&index);
1632         __ Movdqu(operand, i.InputSimd128Register(index));
1633       }
1634       break;
1635     case kIA32BitcastFI:
1636       if (instr->InputAt(0)->IsFPStackSlot()) {
1637         __ mov(i.OutputRegister(), i.InputOperand(0));
1638       } else {
1639         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1640       }
1641       break;
1642     case kIA32BitcastIF:
1643       if (HasRegisterInput(instr, 0)) {
1644         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1645       } else {
1646         __ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
1647       }
1648       break;
1649     case kIA32Lea: {
1650       AddressingMode mode = AddressingModeField::decode(instr->opcode());
1651       // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
1652       // and addressing mode just happens to work out. The "addl"/"subl" forms
1653       // in these cases are faster based on measurements.
1654       if (mode == kMode_MI) {
1655         __ Move(i.OutputRegister(), Immediate(i.InputInt32(0)));
1656       } else if (i.InputRegister(0) == i.OutputRegister()) {
1657         if (mode == kMode_MRI) {
1658           int32_t constant_summand = i.InputInt32(1);
1659           if (constant_summand > 0) {
1660             __ add(i.OutputRegister(), Immediate(constant_summand));
1661           } else if (constant_summand < 0) {
1662             __ sub(i.OutputRegister(),
1663                    Immediate(base::NegateWithWraparound(constant_summand)));
1664           }
1665         } else if (mode == kMode_MR1) {
1666           if (i.InputRegister(1) == i.OutputRegister()) {
1667             __ shl(i.OutputRegister(), 1);
1668           } else {
1669             __ add(i.OutputRegister(), i.InputRegister(1));
1670           }
1671         } else if (mode == kMode_M2) {
1672           __ shl(i.OutputRegister(), 1);
1673         } else if (mode == kMode_M4) {
1674           __ shl(i.OutputRegister(), 2);
1675         } else if (mode == kMode_M8) {
1676           __ shl(i.OutputRegister(), 3);
1677         } else {
1678           __ lea(i.OutputRegister(), i.MemoryOperand());
1679         }
1680       } else if (mode == kMode_MR1 &&
1681                  i.InputRegister(1) == i.OutputRegister()) {
1682         __ add(i.OutputRegister(), i.InputRegister(0));
1683       } else {
1684         __ lea(i.OutputRegister(), i.MemoryOperand());
1685       }
1686       break;
1687     }
1688     case kIA32Push: {
1689       int stack_decrement = i.InputInt32(0);
1690       int slots = stack_decrement / kSystemPointerSize;
1691       // Whenever codegen uses push, we need to check if stack_decrement
1692       // contains any extra padding and adjust the stack before the push.
1693       if (HasImmediateInput(instr, 1)) {
1694         __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
1695         __ push(i.InputImmediate(1));
1696       } else if (HasAddressingMode(instr)) {
1697         // Only single slot pushes from memory are supported.
1698         __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
1699         size_t index = 1;
1700         Operand operand = i.MemoryOperand(&index);
1701         __ push(operand);
1702       } else {
1703         InstructionOperand* input = instr->InputAt(1);
1704         if (input->IsRegister()) {
1705           __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
1706           __ push(i.InputRegister(1));
1707         } else if (input->IsFloatRegister()) {
1708           DCHECK_GE(stack_decrement, kFloatSize);
1709           __ AllocateStackSpace(stack_decrement);
1710           __ Movss(Operand(esp, 0), i.InputDoubleRegister(1));
1711         } else if (input->IsDoubleRegister()) {
1712           DCHECK_GE(stack_decrement, kDoubleSize);
1713           __ AllocateStackSpace(stack_decrement);
1714           __ Movsd(Operand(esp, 0), i.InputDoubleRegister(1));
1715         } else if (input->IsSimd128Register()) {
1716           DCHECK_GE(stack_decrement, kSimd128Size);
1717           __ AllocateStackSpace(stack_decrement);
1718           // TODO(bbudge) Use Movaps when slots are aligned.
1719           __ Movups(Operand(esp, 0), i.InputSimd128Register(1));
1720         } else if (input->IsStackSlot() || input->IsFloatStackSlot()) {
1721           __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
1722           __ push(i.InputOperand(1));
1723         } else if (input->IsDoubleStackSlot()) {
1724           DCHECK_GE(stack_decrement, kDoubleSize);
1725           __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1726           __ AllocateStackSpace(stack_decrement);
1727           __ Movsd(Operand(esp, 0), kScratchDoubleReg);
1728         } else {
1729           DCHECK(input->IsSimd128StackSlot());
1730           DCHECK_GE(stack_decrement, kSimd128Size);
1731           // TODO(bbudge) Use Movaps when slots are aligned.
1732           __ Movups(kScratchDoubleReg, i.InputOperand(1));
1733           __ AllocateStackSpace(stack_decrement);
1734           __ Movups(Operand(esp, 0), kScratchDoubleReg);
1735         }
1736       }
1737       frame_access_state()->IncreaseSPDelta(slots);
1738       break;
1739     }
1740     case kIA32Poke: {
1741       int slot = MiscField::decode(instr->opcode());
1742       if (HasImmediateInput(instr, 0)) {
1743         __ mov(Operand(esp, slot * kSystemPointerSize), i.InputImmediate(0));
1744       } else {
1745         __ mov(Operand(esp, slot * kSystemPointerSize), i.InputRegister(0));
1746       }
1747       break;
1748     }
1749     case kIA32Peek: {
1750       int reverse_slot = i.InputInt32(0);
1751       int offset =
1752           FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1753       if (instr->OutputAt(0)->IsFPRegister()) {
1754         LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1755         if (op->representation() == MachineRepresentation::kFloat64) {
1756           __ Movsd(i.OutputDoubleRegister(), Operand(ebp, offset));
1757         } else if (op->representation() == MachineRepresentation::kFloat32) {
1758           __ Movss(i.OutputFloatRegister(), Operand(ebp, offset));
1759         } else {
1760           DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
1761           __ Movdqu(i.OutputSimd128Register(), Operand(ebp, offset));
1762         }
1763       } else {
1764         __ mov(i.OutputRegister(), Operand(ebp, offset));
1765       }
1766       break;
1767     }
1768     case kIA32F64x2Splat: {
1769       __ Movddup(i.OutputSimd128Register(), i.InputDoubleRegister(0));
1770       break;
1771     }
1772     case kIA32F64x2ExtractLane: {
1773       __ F64x2ExtractLane(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1774                           i.InputUint8(1));
1775       break;
1776     }
1777     case kIA32F64x2ReplaceLane: {
1778       __ F64x2ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
1779                           i.InputDoubleRegister(2), i.InputInt8(1));
1780       break;
1781     }
1782     case kIA32F64x2Sqrt: {
1783       __ Sqrtpd(i.OutputSimd128Register(), i.InputOperand(0));
1784       break;
1785     }
1786     case kIA32F64x2Add: {
1787       __ Addpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1788                i.InputOperand(1));
1789       break;
1790     }
1791     case kIA32F64x2Sub: {
1792       __ Subpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1793                i.InputOperand(1));
1794       break;
1795     }
1796     case kIA32F64x2Mul: {
1797       __ Mulpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1798                i.InputOperand(1));
1799       break;
1800     }
1801     case kIA32F64x2Div: {
1802       __ Divpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1803                i.InputOperand(1));
1804       break;
1805     }
1806     case kIA32F64x2Min: {
1807       __ F64x2Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
1808                   i.InputSimd128Register(1), kScratchDoubleReg);
1809       break;
1810     }
1811     case kIA32F64x2Max: {
1812       __ F64x2Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
1813                   i.InputSimd128Register(1), kScratchDoubleReg);
1814       break;
1815     }
1816     case kIA32F64x2Eq: {
1817       __ Cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1818                  i.InputOperand(1));
1819       break;
1820     }
1821     case kIA32F64x2Ne: {
1822       __ Cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1823                   i.InputOperand(1));
1824       break;
1825     }
1826     case kIA32F64x2Lt: {
1827       __ Cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1828                  i.InputOperand(1));
1829       break;
1830     }
1831     case kIA32F64x2Le: {
1832       __ Cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1833                  i.InputOperand(1));
1834       break;
1835     }
1836     case kIA32F64x2Qfma: {
1837       __ F64x2Qfma(i.OutputSimd128Register(), i.InputSimd128Register(0),
1838                    i.InputSimd128Register(1), i.InputSimd128Register(2),
1839                    kScratchDoubleReg);
1840       break;
1841     }
1842     case kIA32F64x2Qfms: {
1843       __ F64x2Qfms(i.OutputSimd128Register(), i.InputSimd128Register(0),
1844                    i.InputSimd128Register(1), i.InputSimd128Register(2),
1845                    kScratchDoubleReg);
1846       break;
1847     }
1848     case kIA32Minpd: {
1849       __ Minpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1850                i.InputSimd128Register(1));
1851       break;
1852     }
1853     case kIA32Maxpd: {
1854       __ Maxpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1855                i.InputSimd128Register(1));
1856       break;
1857     }
1858     case kIA32F64x2Round: {
1859       RoundingMode const mode =
1860           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1861       __ Roundpd(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode);
1862       break;
1863     }
1864     case kIA32F64x2PromoteLowF32x4: {
1865       if (HasAddressingMode(instr)) {
1866         __ Cvtps2pd(i.OutputSimd128Register(), i.MemoryOperand());
1867       } else {
1868         __ Cvtps2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
1869       }
1870       break;
1871     }
1872     case kIA32F32x4DemoteF64x2Zero: {
1873       __ Cvtpd2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
1874       break;
1875     }
1876     case kIA32I32x4TruncSatF64x2SZero: {
1877       __ I32x4TruncSatF64x2SZero(i.OutputSimd128Register(),
1878                                  i.InputSimd128Register(0), kScratchDoubleReg,
1879                                  i.TempRegister(0));
1880       break;
1881     }
1882     case kIA32I32x4TruncSatF64x2UZero: {
1883       __ I32x4TruncSatF64x2UZero(i.OutputSimd128Register(),
1884                                  i.InputSimd128Register(0), kScratchDoubleReg,
1885                                  i.TempRegister(0));
1886       break;
1887     }
1888     case kIA32F64x2ConvertLowI32x4S: {
1889       __ Cvtdq2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
1890       break;
1891     }
1892     case kIA32F64x2ConvertLowI32x4U: {
1893       __ F64x2ConvertLowI32x4U(i.OutputSimd128Register(),
1894                                i.InputSimd128Register(0), i.TempRegister(0));
1895       break;
1896     }
1897     case kIA32I64x2ExtMulLowI32x4S: {
1898       __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1899                      i.InputSimd128Register(1), kScratchDoubleReg,
1900                      /*low=*/true, /*is_signed=*/true);
1901       break;
1902     }
1903     case kIA32I64x2ExtMulHighI32x4S: {
1904       __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1905                      i.InputSimd128Register(1), kScratchDoubleReg,
1906                      /*low=*/false, /*is_signed=*/true);
1907       break;
1908     }
1909     case kIA32I64x2ExtMulLowI32x4U: {
1910       __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1911                      i.InputSimd128Register(1), kScratchDoubleReg,
1912                      /*low=*/true, /*is_signed=*/false);
1913       break;
1914     }
1915     case kIA32I64x2ExtMulHighI32x4U: {
1916       __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1917                      i.InputSimd128Register(1), kScratchDoubleReg,
1918                      /*low=*/false, /*is_signed=*/false);
1919       break;
1920     }
1921     case kIA32I32x4ExtMulLowI16x8S: {
1922       __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1923                      i.InputSimd128Register(1), kScratchDoubleReg,
1924                      /*low=*/true, /*is_signed=*/true);
1925       break;
1926     }
1927     case kIA32I32x4ExtMulHighI16x8S: {
1928       __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1929                      i.InputSimd128Register(1), kScratchDoubleReg,
1930                      /*low=*/false, /*is_signed=*/true);
1931       break;
1932     }
1933     case kIA32I32x4ExtMulLowI16x8U: {
1934       __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1935                      i.InputSimd128Register(1), kScratchDoubleReg,
1936                      /*low=*/true, /*is_signed=*/false);
1937       break;
1938     }
1939     case kIA32I32x4ExtMulHighI16x8U: {
1940       __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1941                      i.InputSimd128Register(1), kScratchDoubleReg,
1942                      /*low=*/false, /*is_signed=*/false);
1943       break;
1944     }
1945     case kIA32I16x8ExtMulLowI8x16S: {
1946       __ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
1947                         i.InputSimd128Register(1), kScratchDoubleReg,
1948                         /*is_signed=*/true);
1949       break;
1950     }
1951     case kIA32I16x8ExtMulHighI8x16S: {
1952       __ I16x8ExtMulHighS(i.OutputSimd128Register(), i.InputSimd128Register(0),
1953                           i.InputSimd128Register(1), kScratchDoubleReg);
1954       break;
1955     }
1956     case kIA32I16x8ExtMulLowI8x16U: {
1957       __ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
1958                         i.InputSimd128Register(1), kScratchDoubleReg,
1959                         /*is_signed=*/false);
1960       break;
1961     }
1962     case kIA32I16x8ExtMulHighI8x16U: {
1963       __ I16x8ExtMulHighU(i.OutputSimd128Register(), i.InputSimd128Register(0),
1964                           i.InputSimd128Register(1), kScratchDoubleReg);
1965       break;
1966     }
1967     case kIA32I64x2SplatI32Pair: {
1968       XMMRegister dst = i.OutputSimd128Register();
1969       __ Pinsrd(dst, i.InputRegister(0), 0);
1970       __ Pinsrd(dst, i.InputOperand(1), 1);
1971       __ Pshufd(dst, dst, uint8_t{0x44});
1972       break;
1973     }
1974     case kIA32I64x2ReplaceLaneI32Pair: {
1975       int8_t lane = i.InputInt8(1);
1976       __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), lane * 2);
1977       __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(3), lane * 2 + 1);
1978       break;
1979     }
1980     case kIA32I64x2Abs: {
1981       __ I64x2Abs(i.OutputSimd128Register(), i.InputSimd128Register(0),
1982                   kScratchDoubleReg);
1983       break;
1984     }
1985     case kIA32I64x2Neg: {
1986       __ I64x2Neg(i.OutputSimd128Register(), i.InputSimd128Register(0),
1987                   kScratchDoubleReg);
1988       break;
1989     }
1990     case kIA32I64x2Shl: {
1991       ASSEMBLE_SIMD_SHIFT(Psllq, 6);
1992       break;
1993     }
1994     case kIA32I64x2ShrS: {
1995       XMMRegister dst = i.OutputSimd128Register();
1996       XMMRegister src = i.InputSimd128Register(0);
1997       if (HasImmediateInput(instr, 1)) {
1998         __ I64x2ShrS(dst, src, i.InputInt6(1), kScratchDoubleReg);
1999       } else {
2000         __ I64x2ShrS(dst, src, i.InputRegister(1), kScratchDoubleReg,
2001                      i.TempSimd128Register(0), i.TempRegister(1));
2002       }
2003       break;
2004     }
2005     case kIA32I64x2Add: {
2006       __ Paddq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2007                i.InputOperand(1));
2008       break;
2009     }
2010     case kIA32I64x2Sub: {
2011       __ Psubq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2012                i.InputOperand(1));
2013       break;
2014     }
2015     case kIA32I64x2Mul: {
2016       __ I64x2Mul(i.OutputSimd128Register(), i.InputSimd128Register(0),
2017                   i.InputSimd128Register(1), i.TempSimd128Register(0),
2018                   i.TempSimd128Register(1));
2019       break;
2020     }
2021     case kIA32I64x2ShrU: {
2022       ASSEMBLE_SIMD_SHIFT(Psrlq, 6);
2023       break;
2024     }
2025     case kIA32I64x2BitMask: {
2026       __ Movmskpd(i.OutputRegister(), i.InputSimd128Register(0));
2027       break;
2028     }
2029     case kIA32I64x2Eq: {
2030       __ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2031                  i.InputOperand(1));
2032       break;
2033     }
2034     case kIA32I64x2Ne: {
2035       __ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2036                  i.InputOperand(1));
2037       __ Pcmpeqq(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2038       __ Pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2039       break;
2040     }
2041     case kIA32I64x2GtS: {
2042       __ I64x2GtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2043                   i.InputSimd128Register(1), kScratchDoubleReg);
2044       break;
2045     }
2046     case kIA32I64x2GeS: {
2047       __ I64x2GeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2048                   i.InputSimd128Register(1), kScratchDoubleReg);
2049       break;
2050     }
2051     case kIA32I64x2SConvertI32x4Low: {
2052       __ Pmovsxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
2053       break;
2054     }
2055     case kIA32I64x2SConvertI32x4High: {
2056       __ I64x2SConvertI32x4High(i.OutputSimd128Register(),
2057                                 i.InputSimd128Register(0));
2058       break;
2059     }
2060     case kIA32I64x2UConvertI32x4Low: {
2061       __ Pmovzxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
2062       break;
2063     }
2064     case kIA32I64x2UConvertI32x4High: {
2065       __ I64x2UConvertI32x4High(i.OutputSimd128Register(),
2066                                 i.InputSimd128Register(0), kScratchDoubleReg);
2067       break;
2068     }
2069     case kIA32I32x4ExtAddPairwiseI16x8S: {
2070       __ I32x4ExtAddPairwiseI16x8S(i.OutputSimd128Register(),
2071                                    i.InputSimd128Register(0),
2072                                    i.TempRegister(0));
2073       break;
2074     }
2075     case kIA32I32x4ExtAddPairwiseI16x8U: {
2076       __ I32x4ExtAddPairwiseI16x8U(i.OutputSimd128Register(),
2077                                    i.InputSimd128Register(0),
2078                                    kScratchDoubleReg);
2079       break;
2080     }
2081     case kIA32I16x8ExtAddPairwiseI8x16S: {
2082       __ I16x8ExtAddPairwiseI8x16S(i.OutputSimd128Register(),
2083                                    i.InputSimd128Register(0), kScratchDoubleReg,
2084                                    i.TempRegister(0));
2085       break;
2086     }
2087     case kIA32I16x8ExtAddPairwiseI8x16U: {
2088       __ I16x8ExtAddPairwiseI8x16U(i.OutputSimd128Register(),
2089                                    i.InputSimd128Register(0),
2090                                    i.TempRegister(0));
2091       break;
2092     }
2093     case kIA32I16x8Q15MulRSatS: {
2094       __ I16x8Q15MulRSatS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2095                           i.InputSimd128Register(1), kScratchDoubleReg);
2096       break;
2097     }
2098     case kIA32F32x4Splat: {
2099       __ F32x4Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0));
2100       break;
2101     }
2102     case kIA32F32x4ExtractLane: {
2103       __ F32x4ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
2104                           i.InputUint8(1));
2105       break;
2106     }
2107     case kIA32Insertps: {
2108       if (CpuFeatures::IsSupported(AVX)) {
2109         CpuFeatureScope avx_scope(tasm(), AVX);
2110         __ vinsertps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2111                      i.InputOperand(2), i.InputInt8(1) << 4);
2112       } else {
2113         DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2114         CpuFeatureScope sse_scope(tasm(), SSE4_1);
2115         __ insertps(i.OutputSimd128Register(), i.InputOperand(2),
2116                     i.InputInt8(1) << 4);
2117       }
2118       break;
2119     }
2120     case kIA32F32x4SConvertI32x4: {
2121       __ Cvtdq2ps(i.OutputSimd128Register(), i.InputOperand(0));
2122       break;
2123     }
2124     case kIA32F32x4UConvertI32x4: {
2125       XMMRegister dst = i.OutputSimd128Register();
2126       XMMRegister src = i.InputSimd128Register(0);
2127       __ Pxor(kScratchDoubleReg, kScratchDoubleReg);      // zeros
2128       __ Pblendw(kScratchDoubleReg, src, uint8_t{0x55});  // get lo 16 bits
2129       __ Psubd(dst, src, kScratchDoubleReg);              // get hi 16 bits
2130       __ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // convert lo exactly
2131       __ Psrld(dst, dst, byte{1});  // divide by 2 to get in unsigned range
2132       __ Cvtdq2ps(dst, dst);    // convert hi exactly
2133       __ Addps(dst, dst, dst);  // double hi, exactly
2134       __ Addps(dst, dst, kScratchDoubleReg);  // add hi and lo, may round.
2135       break;
2136     }
2137     case kIA32F32x4Sqrt: {
2138       __ Sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2139       break;
2140     }
2141     case kIA32F32x4RecipApprox: {
2142       __ Rcpps(i.OutputSimd128Register(), i.InputOperand(0));
2143       break;
2144     }
2145     case kIA32F32x4RecipSqrtApprox: {
2146       __ Rsqrtps(i.OutputSimd128Register(), i.InputOperand(0));
2147       break;
2148     }
2149     case kIA32F32x4Add: {
2150       __ Addps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2151                i.InputOperand(1));
2152       break;
2153     };
2154     case kIA32F32x4Sub: {
2155       __ Subps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2156                i.InputOperand(1));
2157       break;
2158     }
2159     case kIA32F32x4Mul: {
2160       __ Mulps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2161                i.InputOperand(1));
2162       break;
2163     }
2164     case kIA32F32x4Div: {
2165       __ Divps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2166                i.InputOperand(1));
2167       break;
2168     }
2169     case kIA32F32x4Min: {
2170       __ F32x4Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
2171                   i.InputSimd128Register(1), kScratchDoubleReg);
2172       break;
2173     }
2174     case kIA32F32x4Max: {
2175       __ F32x4Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
2176                   i.InputSimd128Register(1), kScratchDoubleReg);
2177       break;
2178     }
2179     case kIA32F32x4Eq: {
2180       __ Cmpeqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2181                  i.InputOperand(1));
2182       break;
2183     }
2184     case kIA32F32x4Ne: {
2185       __ Cmpneqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2186                   i.InputOperand(1));
2187       break;
2188     }
2189     case kIA32F32x4Lt: {
2190       __ Cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2191                  i.InputOperand(1));
2192       break;
2193     }
2194     case kIA32F32x4Le: {
2195       __ Cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2196                  i.InputOperand(1));
2197       break;
2198     }
2199     case kIA32F32x4Qfma: {
2200       __ F32x4Qfma(i.OutputSimd128Register(), i.InputSimd128Register(0),
2201                    i.InputSimd128Register(1), i.InputSimd128Register(2),
2202                    kScratchDoubleReg);
2203       break;
2204     }
2205     case kIA32F32x4Qfms: {
2206       __ F32x4Qfms(i.OutputSimd128Register(), i.InputSimd128Register(0),
2207                    i.InputSimd128Register(1), i.InputSimd128Register(2),
2208                    kScratchDoubleReg);
2209       break;
2210     }
2211     case kIA32Minps: {
2212       __ Minps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2213                i.InputSimd128Register(1));
2214       break;
2215     }
2216     case kIA32Maxps: {
2217       __ Maxps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2218                i.InputSimd128Register(1));
2219       break;
2220     }
2221     case kIA32F32x4Round: {
2222       RoundingMode const mode =
2223           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
2224       __ Roundps(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode);
2225       break;
2226     }
2227     case kIA32I32x4Splat: {
2228       XMMRegister dst = i.OutputSimd128Register();
2229       __ Movd(dst, i.InputOperand(0));
2230       __ Pshufd(dst, dst, uint8_t{0x0});
2231       break;
2232     }
2233     case kIA32I32x4ExtractLane: {
2234       __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2235       break;
2236     }
2237     case kIA32I32x4SConvertF32x4: {
2238       __ I32x4SConvertF32x4(i.OutputSimd128Register(),
2239                             i.InputSimd128Register(0), kScratchDoubleReg,
2240                             i.TempRegister(0));
2241       break;
2242     }
2243     case kIA32I32x4SConvertI16x8Low: {
2244       __ Pmovsxwd(i.OutputSimd128Register(), i.InputOperand(0));
2245       break;
2246     }
2247     case kIA32I32x4SConvertI16x8High: {
2248       __ I32x4SConvertI16x8High(i.OutputSimd128Register(),
2249                                 i.InputSimd128Register(0));
2250       break;
2251     }
2252     case kIA32I32x4Neg: {
2253       XMMRegister dst = i.OutputSimd128Register();
2254       Operand src = i.InputOperand(0);
2255       if (src.is_reg(dst)) {
2256         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2257         __ Psignd(dst, kScratchDoubleReg);
2258       } else {
2259         __ Pxor(dst, dst);
2260         __ Psubd(dst, src);
2261       }
2262       break;
2263     }
2264     case kIA32I32x4Shl: {
2265       ASSEMBLE_SIMD_SHIFT(Pslld, 5);
2266       break;
2267     }
2268     case kIA32I32x4ShrS: {
2269       ASSEMBLE_SIMD_SHIFT(Psrad, 5);
2270       break;
2271     }
2272     case kIA32I32x4Add: {
2273       __ Paddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2274                i.InputOperand(1));
2275       break;
2276     }
2277     case kIA32I32x4Sub: {
2278       __ Psubd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2279                i.InputOperand(1));
2280       break;
2281     }
2282     case kIA32I32x4Mul: {
2283       __ Pmulld(i.OutputSimd128Register(), i.InputSimd128Register(0),
2284                 i.InputOperand(1));
2285       break;
2286     }
2287     case kIA32I32x4MinS: {
2288       __ Pminsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2289                 i.InputOperand(1));
2290       break;
2291     }
2292     case kIA32I32x4MaxS: {
2293       __ Pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2294                 i.InputOperand(1));
2295       break;
2296     }
2297     case kIA32I32x4Eq: {
2298       __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2299                  i.InputOperand(1));
2300       break;
2301     }
2302     case kIA32I32x4Ne: {
2303       __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2304                  i.InputOperand(1));
2305       __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2306       __ Pxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2307               kScratchDoubleReg);
2308       break;
2309     }
2310     case kIA32I32x4GtS: {
2311       __ Pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2312                  i.InputOperand(1));
2313       break;
2314     }
2315     case kIA32I32x4GeS: {
2316       XMMRegister dst = i.OutputSimd128Register();
2317       XMMRegister src1 = i.InputSimd128Register(0);
2318       XMMRegister src2 = i.InputSimd128Register(1);
2319       if (CpuFeatures::IsSupported(AVX)) {
2320         CpuFeatureScope avx_scope(tasm(), AVX);
2321         __ vpminsd(kScratchDoubleReg, src1, src2);
2322         __ vpcmpeqd(dst, kScratchDoubleReg, src2);
2323       } else {
2324         DCHECK_EQ(dst, src1);
2325         CpuFeatureScope sse_scope(tasm(), SSE4_1);
2326         __ pminsd(dst, src2);
2327         __ pcmpeqd(dst, src2);
2328       }
2329       break;
2330     }
2331     case kSSEI32x4UConvertF32x4: {
2332       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2333       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2334       XMMRegister dst = i.OutputSimd128Register();
2335       XMMRegister tmp = i.TempSimd128Register(0);
2336       // NAN->0, negative->0
2337       __ xorps(kScratchDoubleReg, kScratchDoubleReg);
2338       __ maxps(dst, kScratchDoubleReg);
2339       // scratch: float representation of max_signed
2340       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2341       __ psrld(kScratchDoubleReg, 1);                     // 0x7fffffff
2342       __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // 0x4f000000
2343       // tmp: convert (src-max_signed).
2344       // Positive overflow lanes -> 0x7FFFFFFF
2345       // Negative lanes -> 0
2346       __ movaps(tmp, dst);
2347       __ subps(tmp, kScratchDoubleReg);
2348       __ cmpleps(kScratchDoubleReg, tmp);
2349       __ cvttps2dq(tmp, tmp);
2350       __ xorps(tmp, kScratchDoubleReg);
2351       __ xorps(kScratchDoubleReg, kScratchDoubleReg);
2352       __ pmaxsd(tmp, kScratchDoubleReg);
2353       // convert. Overflow lanes above max_signed will be 0x80000000
2354       __ cvttps2dq(dst, dst);
2355       // Add (src-max_signed) for overflow lanes.
2356       __ paddd(dst, tmp);
2357       break;
2358     }
2359     case kAVXI32x4UConvertF32x4: {
2360       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2361       CpuFeatureScope avx_scope(tasm(), AVX);
2362       XMMRegister dst = i.OutputSimd128Register();
2363       XMMRegister tmp = i.TempSimd128Register(0);
2364       // NAN->0, negative->0
2365       __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2366       __ vmaxps(dst, dst, kScratchDoubleReg);
2367       // scratch: float representation of max_signed
2368       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2369       __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);  // 0x7fffffff
2370       __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // 0x4f000000
2371       // tmp: convert (src-max_signed).
2372       // Positive overflow lanes -> 0x7FFFFFFF
2373       // Negative lanes -> 0
2374       __ vsubps(tmp, dst, kScratchDoubleReg);
2375       __ vcmpleps(kScratchDoubleReg, kScratchDoubleReg, tmp);
2376       __ vcvttps2dq(tmp, tmp);
2377       __ vpxor(tmp, tmp, kScratchDoubleReg);
2378       __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2379       __ vpmaxsd(tmp, tmp, kScratchDoubleReg);
2380       // convert. Overflow lanes above max_signed will be 0x80000000
2381       __ vcvttps2dq(dst, dst);
2382       // Add (src-max_signed) for overflow lanes.
2383       __ vpaddd(dst, dst, tmp);
2384       break;
2385     }
2386     case kIA32I32x4UConvertI16x8Low: {
2387       __ Pmovzxwd(i.OutputSimd128Register(), i.InputOperand(0));
2388       break;
2389     }
2390     case kIA32I32x4UConvertI16x8High: {
2391       __ I32x4UConvertI16x8High(i.OutputSimd128Register(),
2392                                 i.InputSimd128Register(0), kScratchDoubleReg);
2393       break;
2394     }
2395     case kIA32I32x4ShrU: {
2396       ASSEMBLE_SIMD_SHIFT(Psrld, 5);
2397       break;
2398     }
2399     case kIA32I32x4MinU: {
2400       __ Pminud(i.OutputSimd128Register(), i.InputSimd128Register(0),
2401                 i.InputOperand(1));
2402       break;
2403     }
2404     case kIA32I32x4MaxU: {
2405       __ Pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(0),
2406                 i.InputOperand(1));
2407       break;
2408     }
2409     case kSSEI32x4GtU: {
2410       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2411       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2412       XMMRegister dst = i.OutputSimd128Register();
2413       Operand src = i.InputOperand(1);
2414       __ pmaxud(dst, src);
2415       __ pcmpeqd(dst, src);
2416       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2417       __ xorps(dst, kScratchDoubleReg);
2418       break;
2419     }
2420     case kAVXI32x4GtU: {
2421       CpuFeatureScope avx_scope(tasm(), AVX);
2422       XMMRegister dst = i.OutputSimd128Register();
2423       XMMRegister src1 = i.InputSimd128Register(0);
2424       Operand src2 = i.InputOperand(1);
2425       __ vpmaxud(kScratchDoubleReg, src1, src2);
2426       __ vpcmpeqd(dst, kScratchDoubleReg, src2);
2427       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2428       __ vpxor(dst, dst, kScratchDoubleReg);
2429       break;
2430     }
2431     case kSSEI32x4GeU: {
2432       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2433       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2434       XMMRegister dst = i.OutputSimd128Register();
2435       Operand src = i.InputOperand(1);
2436       __ pminud(dst, src);
2437       __ pcmpeqd(dst, src);
2438       break;
2439     }
2440     case kAVXI32x4GeU: {
2441       CpuFeatureScope avx_scope(tasm(), AVX);
2442       XMMRegister src1 = i.InputSimd128Register(0);
2443       Operand src2 = i.InputOperand(1);
2444       __ vpminud(kScratchDoubleReg, src1, src2);
2445       __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2446       break;
2447     }
2448     case kIA32I32x4Abs: {
2449       __ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2450       break;
2451     }
2452     case kIA32I32x4BitMask: {
2453       __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
2454       break;
2455     }
2456     case kIA32I32x4DotI16x8S: {
2457       __ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2458                  i.InputOperand(1));
2459       break;
2460     }
2461     case kIA32I16x8Splat: {
2462       if (instr->InputAt(0)->IsRegister()) {
2463         __ I16x8Splat(i.OutputSimd128Register(), i.InputRegister(0));
2464       } else {
2465         __ I16x8Splat(i.OutputSimd128Register(), i.InputOperand(0));
2466       }
2467       break;
2468     }
2469     case kIA32I16x8ExtractLaneS: {
2470       Register dst = i.OutputRegister();
2471       __ Pextrw(dst, i.InputSimd128Register(0), i.InputUint8(1));
2472       __ movsx_w(dst, dst);
2473       break;
2474     }
2475     case kIA32I16x8SConvertI8x16Low: {
2476       __ Pmovsxbw(i.OutputSimd128Register(), i.InputOperand(0));
2477       break;
2478     }
2479     case kIA32I16x8SConvertI8x16High: {
2480       __ I16x8SConvertI8x16High(i.OutputSimd128Register(),
2481                                 i.InputSimd128Register(0));
2482       break;
2483     }
2484     case kIA32I16x8Neg: {
2485       XMMRegister dst = i.OutputSimd128Register();
2486       Operand src = i.InputOperand(0);
2487       if (src.is_reg(dst)) {
2488         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2489         __ Psignw(dst, kScratchDoubleReg);
2490       } else {
2491         __ Pxor(dst, dst);
2492         __ Psubw(dst, src);
2493       }
2494       break;
2495     }
2496     case kIA32I16x8Shl: {
2497       ASSEMBLE_SIMD_SHIFT(Psllw, 4);
2498       break;
2499     }
2500     case kIA32I16x8ShrS: {
2501       ASSEMBLE_SIMD_SHIFT(Psraw, 4);
2502       break;
2503     }
2504     case kIA32I16x8SConvertI32x4: {
2505       __ Packssdw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2506                   i.InputOperand(1));
2507       break;
2508     }
2509     case kIA32I16x8Add: {
2510       __ Paddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2511                i.InputOperand(1));
2512       break;
2513     }
2514     case kIA32I16x8AddSatS: {
2515       __ Paddsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2516                 i.InputOperand(1));
2517       break;
2518     }
2519     case kIA32I16x8Sub: {
2520       __ Psubw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2521                i.InputOperand(1));
2522       break;
2523     }
2524     case kIA32I16x8SubSatS: {
2525       __ Psubsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2526                 i.InputOperand(1));
2527       break;
2528     }
2529     case kIA32I16x8Mul: {
2530       __ Pmullw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2531                 i.InputOperand(1));
2532       break;
2533     }
2534     case kIA32I16x8MinS: {
2535       __ Pminsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2536                 i.InputOperand(1));
2537       break;
2538     }
2539     case kIA32I16x8MaxS: {
2540       __ Pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2541                 i.InputOperand(1));
2542       break;
2543     }
2544     case kIA32I16x8Eq: {
2545       __ Pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2546                  i.InputOperand(1));
2547       break;
2548     }
2549     case kSSEI16x8Ne: {
2550       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2551       __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1));
2552       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2553       __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2554       break;
2555     }
2556     case kAVXI16x8Ne: {
2557       CpuFeatureScope avx_scope(tasm(), AVX);
2558       __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2559                   i.InputOperand(1));
2560       __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2561       __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2562                kScratchDoubleReg);
2563       break;
2564     }
2565     case kIA32I16x8GtS: {
2566       __ Pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2567                  i.InputOperand(1));
2568       break;
2569     }
2570     case kSSEI16x8GeS: {
2571       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2572       XMMRegister dst = i.OutputSimd128Register();
2573       Operand src = i.InputOperand(1);
2574       __ pminsw(dst, src);
2575       __ pcmpeqw(dst, src);
2576       break;
2577     }
2578     case kAVXI16x8GeS: {
2579       CpuFeatureScope avx_scope(tasm(), AVX);
2580       XMMRegister src1 = i.InputSimd128Register(0);
2581       Operand src2 = i.InputOperand(1);
2582       __ vpminsw(kScratchDoubleReg, src1, src2);
2583       __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2584       break;
2585     }
2586     case kIA32I16x8UConvertI8x16Low: {
2587       __ Pmovzxbw(i.OutputSimd128Register(), i.InputOperand(0));
2588       break;
2589     }
2590     case kIA32I16x8UConvertI8x16High: {
2591       __ I16x8UConvertI8x16High(i.OutputSimd128Register(),
2592                                 i.InputSimd128Register(0), kScratchDoubleReg);
2593       break;
2594     }
2595     case kIA32I16x8ShrU: {
2596       ASSEMBLE_SIMD_SHIFT(Psrlw, 4);
2597       break;
2598     }
2599     case kIA32I16x8UConvertI32x4: {
2600       __ Packusdw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2601                   i.InputSimd128Register(1));
2602       break;
2603     }
2604     case kIA32I16x8AddSatU: {
2605       __ Paddusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2606                  i.InputOperand(1));
2607       break;
2608     }
2609     case kIA32I16x8SubSatU: {
2610       __ Psubusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2611                  i.InputOperand(1));
2612       break;
2613     }
2614     case kIA32I16x8MinU: {
2615       __ Pminuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2616                 i.InputOperand(1));
2617       break;
2618     }
2619     case kIA32I16x8MaxU: {
2620       __ Pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2621                 i.InputOperand(1));
2622       break;
2623     }
2624     case kSSEI16x8GtU: {
2625       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2626       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2627       XMMRegister dst = i.OutputSimd128Register();
2628       Operand src = i.InputOperand(1);
2629       __ pmaxuw(dst, src);
2630       __ pcmpeqw(dst, src);
2631       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2632       __ xorps(dst, kScratchDoubleReg);
2633       break;
2634     }
2635     case kAVXI16x8GtU: {
2636       CpuFeatureScope avx_scope(tasm(), AVX);
2637       XMMRegister dst = i.OutputSimd128Register();
2638       XMMRegister src1 = i.InputSimd128Register(0);
2639       Operand src2 = i.InputOperand(1);
2640       __ vpmaxuw(kScratchDoubleReg, src1, src2);
2641       __ vpcmpeqw(dst, kScratchDoubleReg, src2);
2642       __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2643       __ vpxor(dst, dst, kScratchDoubleReg);
2644       break;
2645     }
2646     case kSSEI16x8GeU: {
2647       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2648       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2649       XMMRegister dst = i.OutputSimd128Register();
2650       Operand src = i.InputOperand(1);
2651       __ pminuw(dst, src);
2652       __ pcmpeqw(dst, src);
2653       break;
2654     }
2655     case kAVXI16x8GeU: {
2656       CpuFeatureScope avx_scope(tasm(), AVX);
2657       XMMRegister src1 = i.InputSimd128Register(0);
2658       Operand src2 = i.InputOperand(1);
2659       __ vpminuw(kScratchDoubleReg, src1, src2);
2660       __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2661       break;
2662     }
2663     case kIA32I16x8RoundingAverageU: {
2664       __ Pavgw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2665                i.InputOperand(1));
2666       break;
2667     }
2668     case kIA32I16x8Abs: {
2669       __ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2670       break;
2671     }
2672     case kIA32I16x8BitMask: {
2673       Register dst = i.OutputRegister();
2674       XMMRegister tmp = i.TempSimd128Register(0);
2675       __ Packsswb(tmp, i.InputSimd128Register(0));
2676       __ Pmovmskb(dst, tmp);
2677       __ shr(dst, 8);
2678       break;
2679     }
2680     case kIA32I8x16Splat: {
2681       if (instr->InputAt(0)->IsRegister()) {
2682         __ I8x16Splat(i.OutputSimd128Register(), i.InputRegister(0),
2683                       kScratchDoubleReg);
2684       } else {
2685         __ I8x16Splat(i.OutputSimd128Register(), i.InputOperand(0),
2686                       kScratchDoubleReg);
2687       }
2688       break;
2689     }
2690     case kIA32I8x16ExtractLaneS: {
2691       Register dst = i.OutputRegister();
2692       __ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
2693       __ movsx_b(dst, dst);
2694       break;
2695     }
2696     case kIA32Pinsrb: {
2697       ASSEMBLE_SIMD_PINSR(pinsrb, SSE4_1);
2698       break;
2699     }
2700     case kIA32Pinsrw: {
2701       ASSEMBLE_SIMD_PINSR(pinsrw, SSE4_1);
2702       break;
2703     }
2704     case kIA32Pinsrd: {
2705       ASSEMBLE_SIMD_PINSR(pinsrd, SSE4_1);
2706       break;
2707     }
2708     case kIA32Movlps: {
2709       if (instr->HasOutput()) {
2710         __ Movlps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2711                   i.MemoryOperand(2));
2712       } else {
2713         size_t index = 0;
2714         Operand dst = i.MemoryOperand(&index);
2715         __ Movlps(dst, i.InputSimd128Register(index));
2716       }
2717       break;
2718     }
2719     case kIA32Movhps: {
2720       if (instr->HasOutput()) {
2721         __ Movhps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2722                   i.MemoryOperand(2));
2723       } else {
2724         size_t index = 0;
2725         Operand dst = i.MemoryOperand(&index);
2726         __ Movhps(dst, i.InputSimd128Register(index));
2727       }
2728       break;
2729     }
2730     case kIA32Pextrb: {
2731       if (HasAddressingMode(instr)) {
2732         size_t index = 0;
2733         Operand operand = i.MemoryOperand(&index);
2734         __ Pextrb(operand, i.InputSimd128Register(index),
2735                   i.InputUint8(index + 1));
2736       } else {
2737         Register dst = i.OutputRegister();
2738         __ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
2739       }
2740       break;
2741     }
2742     case kIA32Pextrw: {
2743       if (HasAddressingMode(instr)) {
2744         size_t index = 0;
2745         Operand operand = i.MemoryOperand(&index);
2746         __ Pextrw(operand, i.InputSimd128Register(index),
2747                   i.InputUint8(index + 1));
2748       } else {
2749         Register dst = i.OutputRegister();
2750         __ Pextrw(dst, i.InputSimd128Register(0), i.InputUint8(1));
2751       }
2752       break;
2753     }
2754     case kIA32S128Store32Lane: {
2755       size_t index = 0;
2756       Operand operand = i.MemoryOperand(&index);
2757       uint8_t laneidx = i.InputUint8(index + 1);
2758       __ S128Store32Lane(operand, i.InputSimd128Register(index), laneidx);
2759       break;
2760     }
2761     case kIA32I8x16SConvertI16x8: {
2762       __ Packsswb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2763                   i.InputOperand(1));
2764       break;
2765     }
2766     case kIA32I8x16Neg: {
2767       XMMRegister dst = i.OutputSimd128Register();
2768       Operand src = i.InputOperand(0);
2769       if (src.is_reg(dst)) {
2770         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2771         __ Psignb(dst, kScratchDoubleReg);
2772       } else {
2773         __ Pxor(dst, dst);
2774         __ Psubb(dst, src);
2775       }
2776       break;
2777     }
2778     case kIA32I8x16Shl: {
2779       XMMRegister dst = i.OutputSimd128Register();
2780       XMMRegister src = i.InputSimd128Register(0);
2781       DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
2782       Register tmp = i.TempRegister(0);
2783 
2784       if (HasImmediateInput(instr, 1)) {
2785         __ I8x16Shl(dst, src, i.InputInt3(1), tmp, kScratchDoubleReg);
2786       } else {
2787         XMMRegister tmp_simd = i.TempSimd128Register(1);
2788         __ I8x16Shl(dst, src, i.InputRegister(1), tmp, kScratchDoubleReg,
2789                     tmp_simd);
2790       }
2791       break;
2792     }
2793     case kIA32I8x16ShrS: {
2794       XMMRegister dst = i.OutputSimd128Register();
2795       XMMRegister src = i.InputSimd128Register(0);
2796       DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
2797 
2798       if (HasImmediateInput(instr, 1)) {
2799         __ I8x16ShrS(dst, src, i.InputInt3(1), kScratchDoubleReg);
2800       } else {
2801         __ I8x16ShrS(dst, src, i.InputRegister(1), i.TempRegister(0),
2802                      kScratchDoubleReg, i.TempSimd128Register(1));
2803       }
2804       break;
2805     }
2806     case kIA32I8x16Add: {
2807       __ Paddb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2808                i.InputOperand(1));
2809       break;
2810     }
2811     case kIA32I8x16AddSatS: {
2812       __ Paddsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2813                 i.InputOperand(1));
2814       break;
2815     }
2816     case kIA32I8x16Sub: {
2817       __ Psubb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2818                i.InputOperand(1));
2819       break;
2820     }
2821     case kIA32I8x16SubSatS: {
2822       __ Psubsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2823                 i.InputOperand(1));
2824       break;
2825     }
2826     case kIA32I8x16MinS: {
2827       __ Pminsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2828                 i.InputOperand(1));
2829       break;
2830     }
2831     case kIA32I8x16MaxS: {
2832       __ Pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2833                 i.InputOperand(1));
2834       break;
2835     }
2836     case kIA32I8x16Eq: {
2837       __ Pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2838                  i.InputOperand(1));
2839       break;
2840     }
2841     case kSSEI8x16Ne: {
2842       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2843       __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
2844       __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2845       __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2846       break;
2847     }
2848     case kAVXI8x16Ne: {
2849       CpuFeatureScope avx_scope(tasm(), AVX);
2850       __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2851                   i.InputOperand(1));
2852       __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2853       __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2854                kScratchDoubleReg);
2855       break;
2856     }
2857     case kIA32I8x16GtS: {
2858       __ Pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2859                  i.InputOperand(1));
2860       break;
2861     }
2862     case kSSEI8x16GeS: {
2863       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2864       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2865       XMMRegister dst = i.OutputSimd128Register();
2866       Operand src = i.InputOperand(1);
2867       __ pminsb(dst, src);
2868       __ pcmpeqb(dst, src);
2869       break;
2870     }
2871     case kAVXI8x16GeS: {
2872       CpuFeatureScope avx_scope(tasm(), AVX);
2873       XMMRegister src1 = i.InputSimd128Register(0);
2874       Operand src2 = i.InputOperand(1);
2875       __ vpminsb(kScratchDoubleReg, src1, src2);
2876       __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2877       break;
2878     }
2879     case kIA32I8x16UConvertI16x8: {
2880       __ Packuswb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2881                   i.InputSimd128Register(1));
2882       break;
2883     }
2884     case kIA32I8x16AddSatU: {
2885       __ Paddusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2886                  i.InputOperand(1));
2887       break;
2888     }
2889     case kIA32I8x16SubSatU: {
2890       __ Psubusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2891                  i.InputOperand(1));
2892       break;
2893     }
2894     case kIA32I8x16ShrU: {
2895       XMMRegister dst = i.OutputSimd128Register();
2896       XMMRegister src = i.InputSimd128Register(0);
2897       DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
2898       Register tmp = i.TempRegister(0);
2899 
2900       if (HasImmediateInput(instr, 1)) {
2901         __ I8x16ShrU(dst, src, i.InputInt3(1), tmp, kScratchDoubleReg);
2902       } else {
2903         __ I8x16ShrU(dst, src, i.InputRegister(1), tmp, kScratchDoubleReg,
2904                      i.TempSimd128Register(1));
2905       }
2906 
2907       break;
2908     }
2909     case kIA32I8x16MinU: {
2910       __ Pminub(i.OutputSimd128Register(), i.InputSimd128Register(0),
2911                 i.InputOperand(1));
2912       break;
2913     }
2914     case kIA32I8x16MaxU: {
2915       __ Pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0),
2916                 i.InputOperand(1));
2917       break;
2918     }
2919     case kSSEI8x16GtU: {
2920       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2921       XMMRegister dst = i.OutputSimd128Register();
2922       Operand src = i.InputOperand(1);
2923       __ pmaxub(dst, src);
2924       __ pcmpeqb(dst, src);
2925       __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2926       __ xorps(dst, kScratchDoubleReg);
2927       break;
2928     }
2929     case kAVXI8x16GtU: {
2930       CpuFeatureScope avx_scope(tasm(), AVX);
2931       XMMRegister dst = i.OutputSimd128Register();
2932       XMMRegister src1 = i.InputSimd128Register(0);
2933       Operand src2 = i.InputOperand(1);
2934       __ vpmaxub(kScratchDoubleReg, src1, src2);
2935       __ vpcmpeqb(dst, kScratchDoubleReg, src2);
2936       __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2937       __ vpxor(dst, dst, kScratchDoubleReg);
2938       break;
2939     }
2940     case kSSEI8x16GeU: {
2941       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2942       XMMRegister dst = i.OutputSimd128Register();
2943       Operand src = i.InputOperand(1);
2944       __ pminub(dst, src);
2945       __ pcmpeqb(dst, src);
2946       break;
2947     }
2948     case kAVXI8x16GeU: {
2949       CpuFeatureScope avx_scope(tasm(), AVX);
2950       XMMRegister src1 = i.InputSimd128Register(0);
2951       Operand src2 = i.InputOperand(1);
2952       __ vpminub(kScratchDoubleReg, src1, src2);
2953       __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2954       break;
2955     }
2956     case kIA32I8x16RoundingAverageU: {
2957       __ Pavgb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2958                i.InputOperand(1));
2959       break;
2960     }
2961     case kIA32I8x16Abs: {
2962       __ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
2963       break;
2964     }
2965     case kIA32I8x16BitMask: {
2966       __ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0));
2967       break;
2968     }
2969     case kIA32I8x16Popcnt: {
2970       __ I8x16Popcnt(i.OutputSimd128Register(), i.InputSimd128Register(0),
2971                      kScratchDoubleReg, i.TempSimd128Register(0),
2972                      i.TempRegister(1));
2973       break;
2974     }
2975     case kIA32S128Const: {
2976       XMMRegister dst = i.OutputSimd128Register();
2977       Register tmp = i.TempRegister(0);
2978       uint64_t low_qword = make_uint64(i.InputUint32(1), i.InputUint32(0));
2979       __ Move(dst, low_qword);
2980       __ Move(tmp, Immediate(i.InputUint32(2)));
2981       __ Pinsrd(dst, tmp, 2);
2982       __ Move(tmp, Immediate(i.InputUint32(3)));
2983       __ Pinsrd(dst, tmp, 3);
2984       break;
2985     }
2986     case kIA32S128Zero: {
2987       XMMRegister dst = i.OutputSimd128Register();
2988       __ Pxor(dst, dst);
2989       break;
2990     }
2991     case kIA32S128AllOnes: {
2992       XMMRegister dst = i.OutputSimd128Register();
2993       __ Pcmpeqd(dst, dst);
2994       break;
2995     }
2996     case kIA32S128Not: {
2997       __ S128Not(i.OutputSimd128Register(), i.InputSimd128Register(0),
2998                  kScratchDoubleReg);
2999       break;
3000     }
3001     case kIA32S128And: {
3002       __ Pand(i.OutputSimd128Register(), i.InputSimd128Register(0),
3003               i.InputOperand(1));
3004       break;
3005     }
3006     case kIA32S128Or: {
3007       __ Por(i.OutputSimd128Register(), i.InputSimd128Register(0),
3008              i.InputOperand(1));
3009       break;
3010     }
3011     case kIA32S128Xor: {
3012       __ Pxor(i.OutputSimd128Register(), i.InputSimd128Register(0),
3013               i.InputOperand(1));
3014       break;
3015     }
3016     case kIA32S128Select: {
3017       __ S128Select(i.OutputSimd128Register(), i.InputSimd128Register(0),
3018                     i.InputSimd128Register(1), i.InputSimd128Register(2),
3019                     kScratchDoubleReg);
3020       break;
3021     }
3022     case kIA32S128AndNot: {
3023       // The inputs have been inverted by instruction selector, so we can call
3024       // andnps here without any modifications.
3025       __ Andnps(i.OutputSimd128Register(), i.InputSimd128Register(0),
3026                 i.InputSimd128Register(1));
3027       break;
3028     }
3029     case kIA32I8x16Swizzle: {
3030       __ I8x16Swizzle(i.OutputSimd128Register(), i.InputSimd128Register(0),
3031                       i.InputSimd128Register(1), kScratchDoubleReg,
3032                       i.TempRegister(0), MiscField::decode(instr->opcode()));
3033       break;
3034     }
3035     case kIA32I8x16Shuffle: {
3036       XMMRegister dst = i.OutputSimd128Register();
3037       Operand src0 = i.InputOperand(0);
3038       Register tmp = i.TempRegister(0);
3039       // Prepare 16 byte aligned buffer for shuffle control mask
3040       __ mov(tmp, esp);
3041       __ and_(esp, -16);
3042       if (instr->InputCount() == 5) {  // only one input operand
3043         DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3044         for (int j = 4; j > 0; j--) {
3045           uint32_t mask = i.InputUint32(j);
3046           __ push(Immediate(mask));
3047         }
3048         __ Pshufb(dst, Operand(esp, 0));
3049       } else {  // two input operands
3050         DCHECK_EQ(6, instr->InputCount());
3051         __ Movups(kScratchDoubleReg, src0);
3052         for (int j = 5; j > 1; j--) {
3053           uint32_t lanes = i.InputUint32(j);
3054           uint32_t mask = 0;
3055           for (int k = 0; k < 32; k += 8) {
3056             uint8_t lane = lanes >> k;
3057             mask |= (lane < kSimd128Size ? lane : 0x80) << k;
3058           }
3059           __ push(Immediate(mask));
3060         }
3061         __ Pshufb(kScratchDoubleReg, Operand(esp, 0));
3062         Operand src1 = i.InputOperand(1);
3063         if (!src1.is_reg(dst)) __ Movups(dst, src1);
3064         for (int j = 5; j > 1; j--) {
3065           uint32_t lanes = i.InputUint32(j);
3066           uint32_t mask = 0;
3067           for (int k = 0; k < 32; k += 8) {
3068             uint8_t lane = lanes >> k;
3069             mask |= (lane >= kSimd128Size ? (lane & 0xF) : 0x80) << k;
3070           }
3071           __ push(Immediate(mask));
3072         }
3073         __ Pshufb(dst, Operand(esp, 0));
3074         __ por(dst, kScratchDoubleReg);
3075       }
3076       __ mov(esp, tmp);
3077       break;
3078     }
3079     case kIA32S128Load8Splat: {
3080       __ S128Load8Splat(i.OutputSimd128Register(), i.MemoryOperand(),
3081                         kScratchDoubleReg);
3082       break;
3083     }
3084     case kIA32S128Load16Splat: {
3085       __ S128Load16Splat(i.OutputSimd128Register(), i.MemoryOperand(),
3086                          kScratchDoubleReg);
3087       break;
3088     }
3089     case kIA32S128Load32Splat: {
3090       __ S128Load32Splat(i.OutputSimd128Register(), i.MemoryOperand());
3091       break;
3092     }
3093     case kIA32S128Load64Splat: {
3094       __ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
3095       break;
3096     }
3097     case kIA32S128Load8x8S: {
3098       __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
3099       break;
3100     }
3101     case kIA32S128Load8x8U: {
3102       __ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
3103       break;
3104     }
3105     case kIA32S128Load16x4S: {
3106       __ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
3107       break;
3108     }
3109     case kIA32S128Load16x4U: {
3110       __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
3111       break;
3112     }
3113     case kIA32S128Load32x2S: {
3114       __ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
3115       break;
3116     }
3117     case kIA32S128Load32x2U: {
3118       __ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
3119       break;
3120     }
3121     case kIA32S32x4Rotate: {
3122       XMMRegister dst = i.OutputSimd128Register();
3123       XMMRegister src = i.InputSimd128Register(0);
3124       uint8_t mask = i.InputUint8(1);
3125       if (dst == src) {
3126         // 1-byte shorter encoding than pshufd.
3127         __ Shufps(dst, src, src, mask);
3128       } else {
3129         __ Pshufd(dst, src, mask);
3130       }
3131       break;
3132     }
3133     case kIA32S32x4Swizzle: {
3134       DCHECK_EQ(2, instr->InputCount());
3135       __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputUint8(1));
3136       break;
3137     }
3138     case kIA32S32x4Shuffle: {
3139       DCHECK_EQ(4, instr->InputCount());  // Swizzles should be handled above.
3140       uint8_t shuffle = i.InputUint8(2);
3141       DCHECK_NE(0xe4, shuffle);  // A simple blend should be handled below.
3142       __ Pshufd(kScratchDoubleReg, i.InputOperand(1), shuffle);
3143       __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), shuffle);
3144       __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputUint8(3));
3145       break;
3146     }
3147     case kIA32S16x8Blend:
3148       ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
3149       break;
3150     case kIA32S16x8HalfShuffle1: {
3151       XMMRegister dst = i.OutputSimd128Register();
3152       __ Pshuflw(dst, i.InputOperand(0), i.InputUint8(1));
3153       __ Pshufhw(dst, dst, i.InputUint8(2));
3154       break;
3155     }
3156     case kIA32S16x8HalfShuffle2: {
3157       XMMRegister dst = i.OutputSimd128Register();
3158       __ Pshuflw(kScratchDoubleReg, i.InputOperand(1), i.InputUint8(2));
3159       __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputUint8(3));
3160       __ Pshuflw(dst, i.InputOperand(0), i.InputUint8(2));
3161       __ Pshufhw(dst, dst, i.InputUint8(3));
3162       __ Pblendw(dst, kScratchDoubleReg, i.InputUint8(4));
3163       break;
3164     }
3165     case kIA32S8x16Alignr:
3166       ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
3167       break;
3168     case kIA32S16x8Dup: {
3169       XMMRegister dst = i.OutputSimd128Register();
3170       Operand src = i.InputOperand(0);
3171       uint8_t lane = i.InputUint8(1) & 0x7;
3172       uint8_t lane4 = lane & 0x3;
3173       uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3174       if (lane < 4) {
3175         __ Pshuflw(dst, src, half_dup);
3176         __ Punpcklqdq(dst, dst);
3177       } else {
3178         __ Pshufhw(dst, src, half_dup);
3179         __ Punpckhqdq(dst, dst);
3180       }
3181       break;
3182     }
3183     case kIA32S8x16Dup: {
3184       XMMRegister dst = i.OutputSimd128Register();
3185       XMMRegister src = i.InputSimd128Register(0);
3186       uint8_t lane = i.InputUint8(1) & 0xf;
3187       if (CpuFeatures::IsSupported(AVX)) {
3188         CpuFeatureScope avx_scope(tasm(), AVX);
3189         if (lane < 8) {
3190           __ vpunpcklbw(dst, src, src);
3191         } else {
3192           __ vpunpckhbw(dst, src, src);
3193         }
3194       } else {
3195         DCHECK_EQ(dst, src);
3196         if (lane < 8) {
3197           __ punpcklbw(dst, dst);
3198         } else {
3199           __ punpckhbw(dst, dst);
3200         }
3201       }
3202       lane &= 0x7;
3203       uint8_t lane4 = lane & 0x3;
3204       uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3205       if (lane < 4) {
3206         __ Pshuflw(dst, dst, half_dup);
3207         __ Punpcklqdq(dst, dst);
3208       } else {
3209         __ Pshufhw(dst, dst, half_dup);
3210         __ Punpckhqdq(dst, dst);
3211       }
3212       break;
3213     }
3214     case kIA32S64x2UnpackHigh:
3215       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
3216       break;
3217     case kIA32S32x4UnpackHigh:
3218       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
3219       break;
3220     case kIA32S16x8UnpackHigh:
3221       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
3222       break;
3223     case kIA32S8x16UnpackHigh:
3224       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
3225       break;
3226     case kIA32S64x2UnpackLow:
3227       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
3228       break;
3229     case kIA32S32x4UnpackLow:
3230       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
3231       break;
3232     case kIA32S16x8UnpackLow:
3233       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
3234       break;
3235     case kIA32S8x16UnpackLow:
3236       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
3237       break;
3238     case kSSES16x8UnzipHigh: {
3239       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3240       XMMRegister dst = i.OutputSimd128Register();
3241       XMMRegister src2 = dst;
3242       DCHECK_EQ(dst, i.InputSimd128Register(0));
3243       if (instr->InputCount() == 2) {
3244         __ movups(kScratchDoubleReg, i.InputOperand(1));
3245         __ psrld(kScratchDoubleReg, 16);
3246         src2 = kScratchDoubleReg;
3247       }
3248       __ psrld(dst, 16);
3249       __ packusdw(dst, src2);
3250       break;
3251     }
3252     case kAVXS16x8UnzipHigh: {
3253       CpuFeatureScope avx_scope(tasm(), AVX);
3254       XMMRegister dst = i.OutputSimd128Register();
3255       XMMRegister src2 = dst;
3256       if (instr->InputCount() == 2) {
3257         __ vpsrld(kScratchDoubleReg, i.InputSimd128Register(1), 16);
3258         src2 = kScratchDoubleReg;
3259       }
3260       __ vpsrld(dst, i.InputSimd128Register(0), 16);
3261       __ vpackusdw(dst, dst, src2);
3262       break;
3263     }
3264     case kSSES16x8UnzipLow: {
3265       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3266       XMMRegister dst = i.OutputSimd128Register();
3267       XMMRegister src2 = dst;
3268       DCHECK_EQ(dst, i.InputSimd128Register(0));
3269       __ xorps(kScratchDoubleReg, kScratchDoubleReg);
3270       if (instr->InputCount() == 2) {
3271         __ pblendw(kScratchDoubleReg, i.InputOperand(1), 0x55);
3272         src2 = kScratchDoubleReg;
3273       }
3274       __ pblendw(dst, kScratchDoubleReg, 0xaa);
3275       __ packusdw(dst, src2);
3276       break;
3277     }
3278     case kAVXS16x8UnzipLow: {
3279       CpuFeatureScope avx_scope(tasm(), AVX);
3280       XMMRegister dst = i.OutputSimd128Register();
3281       XMMRegister src2 = dst;
3282       __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3283       if (instr->InputCount() == 2) {
3284         __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1),
3285                     0x55);
3286         src2 = kScratchDoubleReg;
3287       }
3288       __ vpblendw(dst, kScratchDoubleReg, i.InputSimd128Register(0), 0x55);
3289       __ vpackusdw(dst, dst, src2);
3290       break;
3291     }
3292     case kSSES8x16UnzipHigh: {
3293       XMMRegister dst = i.OutputSimd128Register();
3294       XMMRegister src2 = dst;
3295       DCHECK_EQ(dst, i.InputSimd128Register(0));
3296       if (instr->InputCount() == 2) {
3297         __ movups(kScratchDoubleReg, i.InputOperand(1));
3298         __ psrlw(kScratchDoubleReg, 8);
3299         src2 = kScratchDoubleReg;
3300       }
3301       __ psrlw(dst, 8);
3302       __ packuswb(dst, src2);
3303       break;
3304     }
3305     case kAVXS8x16UnzipHigh: {
3306       CpuFeatureScope avx_scope(tasm(), AVX);
3307       XMMRegister dst = i.OutputSimd128Register();
3308       XMMRegister src2 = dst;
3309       if (instr->InputCount() == 2) {
3310         __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3311         src2 = kScratchDoubleReg;
3312       }
3313       __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3314       __ vpackuswb(dst, dst, src2);
3315       break;
3316     }
3317     case kSSES8x16UnzipLow: {
3318       XMMRegister dst = i.OutputSimd128Register();
3319       XMMRegister src2 = dst;
3320       DCHECK_EQ(dst, i.InputSimd128Register(0));
3321       if (instr->InputCount() == 2) {
3322         __ movups(kScratchDoubleReg, i.InputOperand(1));
3323         __ psllw(kScratchDoubleReg, 8);
3324         __ psrlw(kScratchDoubleReg, 8);
3325         src2 = kScratchDoubleReg;
3326       }
3327       __ psllw(dst, 8);
3328       __ psrlw(dst, 8);
3329       __ packuswb(dst, src2);
3330       break;
3331     }
3332     case kAVXS8x16UnzipLow: {
3333       CpuFeatureScope avx_scope(tasm(), AVX);
3334       XMMRegister dst = i.OutputSimd128Register();
3335       XMMRegister src2 = dst;
3336       if (instr->InputCount() == 2) {
3337         __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3338         __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8);
3339         src2 = kScratchDoubleReg;
3340       }
3341       __ vpsllw(dst, i.InputSimd128Register(0), 8);
3342       __ vpsrlw(dst, dst, 8);
3343       __ vpackuswb(dst, dst, src2);
3344       break;
3345     }
3346     case kSSES8x16TransposeLow: {
3347       XMMRegister dst = i.OutputSimd128Register();
3348       DCHECK_EQ(dst, i.InputSimd128Register(0));
3349       __ psllw(dst, 8);
3350       if (instr->InputCount() == 1) {
3351         __ movups(kScratchDoubleReg, dst);
3352       } else {
3353         DCHECK_EQ(2, instr->InputCount());
3354         __ movups(kScratchDoubleReg, i.InputOperand(1));
3355         __ psllw(kScratchDoubleReg, 8);
3356       }
3357       __ psrlw(dst, 8);
3358       __ orps(dst, kScratchDoubleReg);
3359       break;
3360     }
3361     case kAVXS8x16TransposeLow: {
3362       CpuFeatureScope avx_scope(tasm(), AVX);
3363       XMMRegister dst = i.OutputSimd128Register();
3364       if (instr->InputCount() == 1) {
3365         __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(0), 8);
3366         __ vpsrlw(dst, kScratchDoubleReg, 8);
3367       } else {
3368         DCHECK_EQ(2, instr->InputCount());
3369         __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3370         __ vpsllw(dst, i.InputSimd128Register(0), 8);
3371         __ vpsrlw(dst, dst, 8);
3372       }
3373       __ vpor(dst, dst, kScratchDoubleReg);
3374       break;
3375     }
3376     case kSSES8x16TransposeHigh: {
3377       XMMRegister dst = i.OutputSimd128Register();
3378       DCHECK_EQ(dst, i.InputSimd128Register(0));
3379       __ psrlw(dst, 8);
3380       if (instr->InputCount() == 1) {
3381         __ movups(kScratchDoubleReg, dst);
3382       } else {
3383         DCHECK_EQ(2, instr->InputCount());
3384         __ movups(kScratchDoubleReg, i.InputOperand(1));
3385         __ psrlw(kScratchDoubleReg, 8);
3386       }
3387       __ psllw(kScratchDoubleReg, 8);
3388       __ orps(dst, kScratchDoubleReg);
3389       break;
3390     }
3391     case kAVXS8x16TransposeHigh: {
3392       CpuFeatureScope avx_scope(tasm(), AVX);
3393       XMMRegister dst = i.OutputSimd128Register();
3394       if (instr->InputCount() == 1) {
3395         __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3396         __ vpsllw(kScratchDoubleReg, dst, 8);
3397       } else {
3398         DCHECK_EQ(2, instr->InputCount());
3399         __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3400         __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3401         __ vpsllw(kScratchDoubleReg, kScratchDoubleReg, 8);
3402       }
3403       __ vpor(dst, dst, kScratchDoubleReg);
3404       break;
3405     }
3406     case kSSES8x8Reverse:
3407     case kSSES8x4Reverse:
3408     case kSSES8x2Reverse: {
3409       DCHECK_EQ(1, instr->InputCount());
3410       XMMRegister dst = i.OutputSimd128Register();
3411       DCHECK_EQ(dst, i.InputSimd128Register(0));
3412       if (arch_opcode != kSSES8x2Reverse) {
3413         // First shuffle words into position.
3414         int8_t shuffle_mask = arch_opcode == kSSES8x4Reverse ? 0xB1 : 0x1B;
3415         __ pshuflw(dst, dst, shuffle_mask);
3416         __ pshufhw(dst, dst, shuffle_mask);
3417       }
3418       __ movaps(kScratchDoubleReg, dst);
3419       __ psrlw(kScratchDoubleReg, 8);
3420       __ psllw(dst, 8);
3421       __ orps(dst, kScratchDoubleReg);
3422       break;
3423     }
3424     case kAVXS8x2Reverse:
3425     case kAVXS8x4Reverse:
3426     case kAVXS8x8Reverse: {
3427       DCHECK_EQ(1, instr->InputCount());
3428       CpuFeatureScope avx_scope(tasm(), AVX);
3429       XMMRegister dst = i.OutputSimd128Register();
3430       XMMRegister src = dst;
3431       if (arch_opcode != kAVXS8x2Reverse) {
3432         // First shuffle words into position.
3433         int8_t shuffle_mask = arch_opcode == kAVXS8x4Reverse ? 0xB1 : 0x1B;
3434         __ vpshuflw(dst, i.InputOperand(0), shuffle_mask);
3435         __ vpshufhw(dst, dst, shuffle_mask);
3436       } else {
3437         src = i.InputSimd128Register(0);
3438       }
3439       // Reverse each 16 bit lane.
3440       __ vpsrlw(kScratchDoubleReg, src, 8);
3441       __ vpsllw(dst, src, 8);
3442       __ vpor(dst, dst, kScratchDoubleReg);
3443       break;
3444     }
3445     case kIA32S128AnyTrue: {
3446       Register dst = i.OutputRegister();
3447       XMMRegister src = i.InputSimd128Register(0);
3448       Register tmp = i.TempRegister(0);
3449       __ xor_(tmp, tmp);
3450       __ mov(dst, Immediate(1));
3451       __ Ptest(src, src);
3452       __ cmov(zero, dst, tmp);
3453       break;
3454     }
3455     // Need to split up all the different lane structures because the
3456     // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
3457     // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
3458     // respectively.
3459     case kIA32I64x2AllTrue:
3460       ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqq);
3461       break;
3462     case kIA32I32x4AllTrue:
3463       ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
3464       break;
3465     case kIA32I16x8AllTrue:
3466       ASSEMBLE_SIMD_ALL_TRUE(pcmpeqw);
3467       break;
3468     case kIA32I8x16AllTrue: {
3469       ASSEMBLE_SIMD_ALL_TRUE(pcmpeqb);
3470       break;
3471     }
3472     case kIA32Pblendvb: {
3473       __ Pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3474                   i.InputSimd128Register(1), i.InputSimd128Register(2));
3475       break;
3476     }
3477     case kIA32I32x4TruncF64x2UZero: {
3478       __ I32x4TruncF64x2UZero(i.OutputSimd128Register(),
3479                               i.InputSimd128Register(0), i.TempRegister(0),
3480                               kScratchDoubleReg);
3481       break;
3482     }
3483     case kIA32I32x4TruncF32x4U: {
3484       __ I32x4TruncF32x4U(i.OutputSimd128Register(), i.InputSimd128Register(0),
3485                           i.TempRegister(0), kScratchDoubleReg);
3486       break;
3487     }
3488     case kIA32Cvttps2dq: {
3489       __ Cvttps2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
3490       break;
3491     }
3492     case kIA32Cvttpd2dq: {
3493       __ Cvttpd2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
3494       break;
3495     }
3496     case kIA32Word32AtomicPairLoad: {
3497       __ movq(kScratchDoubleReg, i.MemoryOperand());
3498       __ Pextrd(i.OutputRegister(0), kScratchDoubleReg, 0);
3499       __ Pextrd(i.OutputRegister(1), kScratchDoubleReg, 1);
3500       break;
3501     }
3502     case kIA32Word32ReleasePairStore: {
3503       __ push(ebx);
3504       i.MoveInstructionOperandToRegister(ebx, instr->InputAt(1));
3505       __ push(ebx);
3506       i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3507       __ push(ebx);
3508       frame_access_state()->IncreaseSPDelta(3);
3509       __ movq(kScratchDoubleReg, MemOperand(esp, 0));
3510       __ pop(ebx);
3511       __ pop(ebx);
3512       __ pop(ebx);
3513       frame_access_state()->IncreaseSPDelta(-3);
3514       __ movq(i.MemoryOperand(2), kScratchDoubleReg);
3515       break;
3516     }
3517     case kIA32Word32SeqCstPairStore: {
3518       Label store;
3519       __ bind(&store);
3520       __ mov(eax, i.MemoryOperand(2));
3521       __ mov(edx, i.NextMemoryOperand(2));
3522       __ push(ebx);
3523       frame_access_state()->IncreaseSPDelta(1);
3524       i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3525       __ lock();
3526       __ cmpxchg8b(i.MemoryOperand(2));
3527       __ pop(ebx);
3528       frame_access_state()->IncreaseSPDelta(-1);
3529       __ j(not_equal, &store);
3530       break;
3531     }
3532     case kAtomicExchangeInt8: {
3533       __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
3534       __ movsx_b(i.InputRegister(0), i.InputRegister(0));
3535       break;
3536     }
3537     case kAtomicExchangeUint8: {
3538       __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
3539       __ movzx_b(i.InputRegister(0), i.InputRegister(0));
3540       break;
3541     }
3542     case kAtomicExchangeInt16: {
3543       __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
3544       __ movsx_w(i.InputRegister(0), i.InputRegister(0));
3545       break;
3546     }
3547     case kAtomicExchangeUint16: {
3548       __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
3549       __ movzx_w(i.InputRegister(0), i.InputRegister(0));
3550       break;
3551     }
3552     case kAtomicExchangeWord32: {
3553       __ xchg(i.InputRegister(0), i.MemoryOperand(1));
3554       break;
3555     }
3556     case kIA32Word32AtomicPairExchange: {
3557       DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr));
3558       Label exchange;
3559       __ bind(&exchange);
3560       __ mov(eax, i.MemoryOperand(2));
3561       __ mov(edx, i.NextMemoryOperand(2));
3562       __ push(ebx);
3563       frame_access_state()->IncreaseSPDelta(1);
3564       i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3565       __ lock();
3566       __ cmpxchg8b(i.MemoryOperand(2));
3567       __ pop(ebx);
3568       frame_access_state()->IncreaseSPDelta(-1);
3569       __ j(not_equal, &exchange);
3570       break;
3571     }
3572     case kAtomicCompareExchangeInt8: {
3573       __ lock();
3574       __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
3575       __ movsx_b(eax, eax);
3576       break;
3577     }
3578     case kAtomicCompareExchangeUint8: {
3579       __ lock();
3580       __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
3581       __ movzx_b(eax, eax);
3582       break;
3583     }
3584     case kAtomicCompareExchangeInt16: {
3585       __ lock();
3586       __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
3587       __ movsx_w(eax, eax);
3588       break;
3589     }
3590     case kAtomicCompareExchangeUint16: {
3591       __ lock();
3592       __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
3593       __ movzx_w(eax, eax);
3594       break;
3595     }
3596     case kAtomicCompareExchangeWord32: {
3597       __ lock();
3598       __ cmpxchg(i.MemoryOperand(2), i.InputRegister(1));
3599       break;
3600     }
3601     case kIA32Word32AtomicPairCompareExchange: {
3602       __ push(ebx);
3603       frame_access_state()->IncreaseSPDelta(1);
3604       i.MoveInstructionOperandToRegister(ebx, instr->InputAt(2));
3605       __ lock();
3606       __ cmpxchg8b(i.MemoryOperand(4));
3607       __ pop(ebx);
3608       frame_access_state()->IncreaseSPDelta(-1);
3609       break;
3610     }
3611 #define ATOMIC_BINOP_CASE(op, inst)                \
3612   case kAtomic##op##Int8: {                        \
3613     ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
3614     __ movsx_b(eax, eax);                          \
3615     break;                                         \
3616   }                                                \
3617   case kAtomic##op##Uint8: {                       \
3618     ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
3619     __ movzx_b(eax, eax);                          \
3620     break;                                         \
3621   }                                                \
3622   case kAtomic##op##Int16: {                       \
3623     ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
3624     __ movsx_w(eax, eax);                          \
3625     break;                                         \
3626   }                                                \
3627   case kAtomic##op##Uint16: {                      \
3628     ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
3629     __ movzx_w(eax, eax);                          \
3630     break;                                         \
3631   }                                                \
3632   case kAtomic##op##Word32: {                      \
3633     ASSEMBLE_ATOMIC_BINOP(inst, mov, cmpxchg);     \
3634     break;                                         \
3635   }
3636       ATOMIC_BINOP_CASE(Add, add)
3637       ATOMIC_BINOP_CASE(Sub, sub)
3638       ATOMIC_BINOP_CASE(And, and_)
3639       ATOMIC_BINOP_CASE(Or, or_)
3640       ATOMIC_BINOP_CASE(Xor, xor_)
3641 #undef ATOMIC_BINOP_CASE
3642 #define ATOMIC_BINOP_CASE(op, instr1, instr2)         \
3643   case kIA32Word32AtomicPair##op: {                   \
3644     DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr)); \
3645     ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2)          \
3646     break;                                            \
3647   }
3648       ATOMIC_BINOP_CASE(Add, add, adc)
3649       ATOMIC_BINOP_CASE(And, and_, and_)
3650       ATOMIC_BINOP_CASE(Or, or_, or_)
3651       ATOMIC_BINOP_CASE(Xor, xor_, xor_)
3652 #undef ATOMIC_BINOP_CASE
3653     case kIA32Word32AtomicPairSub: {
3654       DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr));
3655       Label binop;
3656       __ bind(&binop);
3657       // Move memory operand into edx:eax
3658       __ mov(eax, i.MemoryOperand(2));
3659       __ mov(edx, i.NextMemoryOperand(2));
3660       // Save input registers temporarily on the stack.
3661       __ push(ebx);
3662       frame_access_state()->IncreaseSPDelta(1);
3663       i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3664       __ push(i.InputRegister(1));
3665       // Negate input in place
3666       __ neg(ebx);
3667       __ adc(i.InputRegister(1), 0);
3668       __ neg(i.InputRegister(1));
3669       // Add memory operand, negated input.
3670       __ add(ebx, eax);
3671       __ adc(i.InputRegister(1), edx);
3672       __ lock();
3673       __ cmpxchg8b(i.MemoryOperand(2));
3674       // Restore input registers
3675       __ pop(i.InputRegister(1));
3676       __ pop(ebx);
3677       frame_access_state()->IncreaseSPDelta(-1);
3678       __ j(not_equal, &binop);
3679       break;
3680     }
3681     case kAtomicLoadInt8:
3682     case kAtomicLoadUint8:
3683     case kAtomicLoadInt16:
3684     case kAtomicLoadUint16:
3685     case kAtomicLoadWord32:
3686     case kAtomicStoreWord8:
3687     case kAtomicStoreWord16:
3688     case kAtomicStoreWord32:
3689       UNREACHABLE();  // Won't be generated by instruction selector.
3690   }
3691   return kSuccess;
3692 }
3693 
FlagsConditionToCondition(FlagsCondition condition)3694 static Condition FlagsConditionToCondition(FlagsCondition condition) {
3695   switch (condition) {
3696     case kUnorderedEqual:
3697     case kEqual:
3698       return equal;
3699     case kUnorderedNotEqual:
3700     case kNotEqual:
3701       return not_equal;
3702     case kSignedLessThan:
3703       return less;
3704     case kSignedGreaterThanOrEqual:
3705       return greater_equal;
3706     case kSignedLessThanOrEqual:
3707       return less_equal;
3708     case kSignedGreaterThan:
3709       return greater;
3710     case kUnsignedLessThan:
3711       return below;
3712     case kUnsignedGreaterThanOrEqual:
3713       return above_equal;
3714     case kUnsignedLessThanOrEqual:
3715       return below_equal;
3716     case kUnsignedGreaterThan:
3717       return above;
3718     case kOverflow:
3719       return overflow;
3720     case kNotOverflow:
3721       return no_overflow;
3722     default:
3723       UNREACHABLE();
3724   }
3725 }
3726 
3727 // Assembles a branch after an instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)3728 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3729   Label::Distance flabel_distance =
3730       branch->fallthru ? Label::kNear : Label::kFar;
3731   Label* tlabel = branch->true_label;
3732   Label* flabel = branch->false_label;
3733   if (branch->condition == kUnorderedEqual) {
3734     __ j(parity_even, flabel, flabel_distance);
3735   } else if (branch->condition == kUnorderedNotEqual) {
3736     __ j(parity_even, tlabel);
3737   }
3738   __ j(FlagsConditionToCondition(branch->condition), tlabel);
3739 
3740   // Add a jump if not falling through to the next block.
3741   if (!branch->fallthru) __ jmp(flabel);
3742 }
3743 
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)3744 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
3745                                             BranchInfo* branch) {
3746   AssembleArchBranch(instr, branch);
3747 }
3748 
AssembleArchJumpRegardlessOfAssemblyOrder(RpoNumber target)3749 void CodeGenerator::AssembleArchJumpRegardlessOfAssemblyOrder(
3750     RpoNumber target) {
3751   __ jmp(GetLabel(target));
3752 }
3753 
3754 #if V8_ENABLE_WEBASSEMBLY
AssembleArchTrap(Instruction * instr,FlagsCondition condition)3755 void CodeGenerator::AssembleArchTrap(Instruction* instr,
3756                                      FlagsCondition condition) {
3757   class OutOfLineTrap final : public OutOfLineCode {
3758    public:
3759     OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
3760         : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
3761 
3762     void Generate() final {
3763       IA32OperandConverter i(gen_, instr_);
3764       TrapId trap_id =
3765           static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
3766       GenerateCallToTrap(trap_id);
3767     }
3768 
3769    private:
3770     void GenerateCallToTrap(TrapId trap_id) {
3771       if (trap_id == TrapId::kInvalid) {
3772         // We cannot test calls to the runtime in cctest/test-run-wasm.
3773         // Therefore we emit a call to C here instead of a call to the runtime.
3774         __ PrepareCallCFunction(0, esi);
3775         __ CallCFunction(
3776             ExternalReference::wasm_call_trap_callback_for_testing(), 0);
3777         __ LeaveFrame(StackFrame::WASM);
3778         auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
3779         size_t pop_size =
3780             call_descriptor->ParameterSlotCount() * kSystemPointerSize;
3781         // Use ecx as a scratch register, we return anyways immediately.
3782         __ Ret(static_cast<int>(pop_size), ecx);
3783       } else {
3784         gen_->AssembleSourcePosition(instr_);
3785         // A direct call to a wasm runtime stub defined in this module.
3786         // Just encode the stub index. This will be patched when the code
3787         // is added to the native module and copied into wasm code space.
3788         __ wasm_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
3789         ReferenceMap* reference_map =
3790             gen_->zone()->New<ReferenceMap>(gen_->zone());
3791         gen_->RecordSafepoint(reference_map);
3792         __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
3793       }
3794     }
3795 
3796     Instruction* instr_;
3797     CodeGenerator* gen_;
3798   };
3799   auto ool = zone()->New<OutOfLineTrap>(this, instr);
3800   Label* tlabel = ool->entry();
3801   Label end;
3802   if (condition == kUnorderedEqual) {
3803     __ j(parity_even, &end, Label::kNear);
3804   } else if (condition == kUnorderedNotEqual) {
3805     __ j(parity_even, tlabel);
3806   }
3807   __ j(FlagsConditionToCondition(condition), tlabel);
3808   __ bind(&end);
3809 }
3810 #endif  // V8_ENABLE_WEBASSEMBLY
3811 
3812 // Assembles boolean materializations after an instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)3813 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
3814                                         FlagsCondition condition) {
3815   IA32OperandConverter i(this, instr);
3816   Label done;
3817 
3818   // Materialize a full 32-bit 1 or 0 value. The result register is always the
3819   // last output of the instruction.
3820   Label check;
3821   DCHECK_NE(0u, instr->OutputCount());
3822   Register reg = i.OutputRegister(instr->OutputCount() - 1);
3823   if (condition == kUnorderedEqual) {
3824     __ j(parity_odd, &check, Label::kNear);
3825     __ Move(reg, Immediate(0));
3826     __ jmp(&done, Label::kNear);
3827   } else if (condition == kUnorderedNotEqual) {
3828     __ j(parity_odd, &check, Label::kNear);
3829     __ mov(reg, Immediate(1));
3830     __ jmp(&done, Label::kNear);
3831   }
3832   Condition cc = FlagsConditionToCondition(condition);
3833 
3834   __ bind(&check);
3835   if (reg.is_byte_register()) {
3836     // setcc for byte registers (al, bl, cl, dl).
3837     __ setcc(cc, reg);
3838     __ movzx_b(reg, reg);
3839   } else {
3840     // Emit a branch to set a register to either 1 or 0.
3841     Label set;
3842     __ j(cc, &set, Label::kNear);
3843     __ Move(reg, Immediate(0));
3844     __ jmp(&done, Label::kNear);
3845     __ bind(&set);
3846     __ mov(reg, Immediate(1));
3847   }
3848   __ bind(&done);
3849 }
3850 
AssembleArchBinarySearchSwitch(Instruction * instr)3851 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
3852   IA32OperandConverter i(this, instr);
3853   Register input = i.InputRegister(0);
3854   std::vector<std::pair<int32_t, Label*>> cases;
3855   for (size_t index = 2; index < instr->InputCount(); index += 2) {
3856     cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
3857   }
3858   AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
3859                                       cases.data() + cases.size());
3860 }
3861 
AssembleArchTableSwitch(Instruction * instr)3862 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
3863   IA32OperandConverter i(this, instr);
3864   Register input = i.InputRegister(0);
3865   size_t const case_count = instr->InputCount() - 2;
3866   Label** cases = zone()->NewArray<Label*>(case_count);
3867   for (size_t index = 0; index < case_count; ++index) {
3868     cases[index] = GetLabel(i.InputRpo(index + 2));
3869   }
3870   Label* const table = AddJumpTable(cases, case_count);
3871   __ cmp(input, Immediate(case_count));
3872   __ j(above_equal, GetLabel(i.InputRpo(1)));
3873   __ jmp(Operand::JumpTable(input, times_system_pointer_size, table));
3874 }
3875 
AssembleArchSelect(Instruction * instr,FlagsCondition condition)3876 void CodeGenerator::AssembleArchSelect(Instruction* instr,
3877                                        FlagsCondition condition) {
3878   UNIMPLEMENTED();
3879 }
3880 
3881 // The calling convention for JSFunctions on IA32 passes arguments on the
3882 // stack and the JSFunction and context in EDI and ESI, respectively, thus
3883 // the steps of the call look as follows:
3884 
3885 // --{ before the call instruction }--------------------------------------------
3886 //                                                         |  caller frame |
3887 //                                                         ^ esp           ^ ebp
3888 
3889 // --{ push arguments and setup ESI, EDI }--------------------------------------
3890 //                                       | args + receiver |  caller frame |
3891 //                                       ^ esp                             ^ ebp
3892 //                 [edi = JSFunction, esi = context]
3893 
3894 // --{ call [edi + kCodeEntryOffset] }------------------------------------------
3895 //                                 | RET | args + receiver |  caller frame |
3896 //                                 ^ esp                                   ^ ebp
3897 
3898 // =={ prologue of called function }============================================
3899 // --{ push ebp }---------------------------------------------------------------
3900 //                            | FP | RET | args + receiver |  caller frame |
3901 //                            ^ esp                                        ^ ebp
3902 
3903 // --{ mov ebp, esp }-----------------------------------------------------------
3904 //                            | FP | RET | args + receiver |  caller frame |
3905 //                            ^ ebp,esp
3906 
3907 // --{ push esi }---------------------------------------------------------------
3908 //                      | CTX | FP | RET | args + receiver |  caller frame |
3909 //                      ^esp  ^ ebp
3910 
3911 // --{ push edi }---------------------------------------------------------------
3912 //                | FNC | CTX | FP | RET | args + receiver |  caller frame |
3913 //                ^esp        ^ ebp
3914 
3915 // --{ subi esp, #N }-----------------------------------------------------------
3916 // | callee frame | FNC | CTX | FP | RET | args + receiver |  caller frame |
3917 // ^esp                       ^ ebp
3918 
3919 // =={ body of called function }================================================
3920 
3921 // =={ epilogue of called function }============================================
3922 // --{ mov esp, ebp }-----------------------------------------------------------
3923 //                            | FP | RET | args + receiver |  caller frame |
3924 //                            ^ esp,ebp
3925 
3926 // --{ pop ebp }-----------------------------------------------------------
3927 // |                               | RET | args + receiver |  caller frame |
3928 //                                 ^ esp                                   ^ ebp
3929 
3930 // --{ ret #A+1 }-----------------------------------------------------------
3931 // |                                                       |  caller frame |
3932 //                                                         ^ esp           ^ ebp
3933 
3934 // Runtime function calls are accomplished by doing a stub call to the
3935 // CEntry (a real code object). On IA32 passes arguments on the
3936 // stack, the number of arguments in EAX, the address of the runtime function
3937 // in EBX, and the context in ESI.
3938 
3939 // --{ before the call instruction }--------------------------------------------
3940 //                                                         |  caller frame |
3941 //                                                         ^ esp           ^ ebp
3942 
3943 // --{ push arguments and setup EAX, EBX, and ESI }-----------------------------
3944 //                                       | args + receiver |  caller frame |
3945 //                                       ^ esp                             ^ ebp
3946 //              [eax = #args, ebx = runtime function, esi = context]
3947 
3948 // --{ call #CEntry }-----------------------------------------------------------
3949 //                                 | RET | args + receiver |  caller frame |
3950 //                                 ^ esp                                   ^ ebp
3951 
3952 // =={ body of runtime function }===============================================
3953 
3954 // --{ runtime returns }--------------------------------------------------------
3955 //                                                         |  caller frame |
3956 //                                                         ^ esp           ^ ebp
3957 
3958 // Other custom linkages (e.g. for calling directly into and out of C++) may
3959 // need to save callee-saved registers on the stack, which is done in the
3960 // function prologue of generated code.
3961 
3962 // --{ before the call instruction }--------------------------------------------
3963 //                                                         |  caller frame |
3964 //                                                         ^ esp           ^ ebp
3965 
3966 // --{ set up arguments in registers on stack }---------------------------------
3967 //                                                  | args |  caller frame |
3968 //                                                  ^ esp                  ^ ebp
3969 //                  [r0 = arg0, r1 = arg1, ...]
3970 
3971 // --{ call code }--------------------------------------------------------------
3972 //                                            | RET | args |  caller frame |
3973 //                                            ^ esp                        ^ ebp
3974 
3975 // =={ prologue of called function }============================================
3976 // --{ push ebp }---------------------------------------------------------------
3977 //                                       | FP | RET | args |  caller frame |
3978 //                                       ^ esp                             ^ ebp
3979 
3980 // --{ mov ebp, esp }-----------------------------------------------------------
3981 //                                       | FP | RET | args |  caller frame |
3982 //                                       ^ ebp,esp
3983 
3984 // --{ save registers }---------------------------------------------------------
3985 //                                | regs | FP | RET | args |  caller frame |
3986 //                                ^ esp  ^ ebp
3987 
3988 // --{ subi esp, #N }-----------------------------------------------------------
3989 //                 | callee frame | regs | FP | RET | args |  caller frame |
3990 //                 ^esp                  ^ ebp
3991 
3992 // =={ body of called function }================================================
3993 
3994 // =={ epilogue of called function }============================================
3995 // --{ restore registers }------------------------------------------------------
3996 //                                | regs | FP | RET | args |  caller frame |
3997 //                                ^ esp  ^ ebp
3998 
3999 // --{ mov esp, ebp }-----------------------------------------------------------
4000 //                                       | FP | RET | args |  caller frame |
4001 //                                       ^ esp,ebp
4002 
4003 // --{ pop ebp }----------------------------------------------------------------
4004 //                                            | RET | args |  caller frame |
4005 //                                            ^ esp                        ^ ebp
4006 
FinishFrame(Frame * frame)4007 void CodeGenerator::FinishFrame(Frame* frame) {
4008   auto call_descriptor = linkage()->GetIncomingDescriptor();
4009   const RegList saves = call_descriptor->CalleeSavedRegisters();
4010   if (!saves.is_empty()) {  // Save callee-saved registers.
4011     DCHECK(!info()->is_osr());
4012     frame->AllocateSavedCalleeRegisterSlots(saves.Count());
4013   }
4014 }
4015 
AssembleConstructFrame()4016 void CodeGenerator::AssembleConstructFrame() {
4017   auto call_descriptor = linkage()->GetIncomingDescriptor();
4018   if (frame_access_state()->has_frame()) {
4019     if (call_descriptor->IsCFunctionCall()) {
4020       __ push(ebp);
4021       __ mov(ebp, esp);
4022 #if V8_ENABLE_WEBASSEMBLY
4023       if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
4024         __ Push(Immediate(StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY)));
4025         // Reserve stack space for saving the c_entry_fp later.
4026         __ AllocateStackSpace(kSystemPointerSize);
4027       }
4028 #endif  // V8_ENABLE_WEBASSEMBLY
4029     } else if (call_descriptor->IsJSFunctionCall()) {
4030       __ Prologue();
4031     } else {
4032       __ StubPrologue(info()->GetOutputStackFrameType());
4033 #if V8_ENABLE_WEBASSEMBLY
4034       if (call_descriptor->IsWasmFunctionCall() ||
4035           call_descriptor->IsWasmImportWrapper() ||
4036           call_descriptor->IsWasmCapiFunction()) {
4037         __ push(kWasmInstanceRegister);
4038       }
4039       if (call_descriptor->IsWasmCapiFunction()) {
4040         // Reserve space for saving the PC later.
4041         __ AllocateStackSpace(kSystemPointerSize);
4042       }
4043 #endif  // V8_ENABLE_WEBASSEMBLY
4044     }
4045   }
4046 
4047   int required_slots =
4048       frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
4049 
4050   if (info()->is_osr()) {
4051     // TurboFan OSR-compiled functions cannot be entered directly.
4052     __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
4053 
4054     // Unoptimized code jumps directly to this entrypoint while the unoptimized
4055     // frame is still on the stack. Optimized code uses OSR values directly from
4056     // the unoptimized frame. Thus, all that needs to be done is to allocate the
4057     // remaining stack slots.
4058     __ RecordComment("-- OSR entrypoint --");
4059     osr_pc_offset_ = __ pc_offset();
4060     required_slots -= osr_helper()->UnoptimizedFrameSlots();
4061   }
4062 
4063   const RegList saves = call_descriptor->CalleeSavedRegisters();
4064   if (required_slots > 0) {
4065     DCHECK(frame_access_state()->has_frame());
4066 #if V8_ENABLE_WEBASSEMBLY
4067     if (info()->IsWasm() && required_slots * kSystemPointerSize > 4 * KB) {
4068       // For WebAssembly functions with big frames we have to do the stack
4069       // overflow check before we construct the frame. Otherwise we may not
4070       // have enough space on the stack to call the runtime for the stack
4071       // overflow.
4072       Label done;
4073 
4074       // If the frame is bigger than the stack, we throw the stack overflow
4075       // exception unconditionally. Thereby we can avoid the integer overflow
4076       // check in the condition code.
4077       if (required_slots * kSystemPointerSize < FLAG_stack_size * KB) {
4078         Register scratch = esi;
4079         __ push(scratch);
4080         __ mov(scratch,
4081                FieldOperand(kWasmInstanceRegister,
4082                             WasmInstanceObject::kRealStackLimitAddressOffset));
4083         __ mov(scratch, Operand(scratch, 0));
4084         __ add(scratch, Immediate(required_slots * kSystemPointerSize));
4085         __ cmp(esp, scratch);
4086         __ pop(scratch);
4087         __ j(above_equal, &done, Label::kNear);
4088       }
4089 
4090       __ wasm_call(wasm::WasmCode::kWasmStackOverflow,
4091                    RelocInfo::WASM_STUB_CALL);
4092       // The call does not return, hence we can ignore any references and just
4093       // define an empty safepoint.
4094       ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
4095       RecordSafepoint(reference_map);
4096       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4097       __ bind(&done);
4098     }
4099 #endif  // V8_ENABLE_WEBASSEMBLY
4100 
4101     // Skip callee-saved and return slots, which are created below.
4102     required_slots -= saves.Count();
4103     required_slots -= frame()->GetReturnSlotCount();
4104     if (required_slots > 0) {
4105       __ AllocateStackSpace(required_slots * kSystemPointerSize);
4106     }
4107   }
4108 
4109   if (!saves.is_empty()) {  // Save callee-saved registers.
4110     DCHECK(!info()->is_osr());
4111     for (Register reg : base::Reversed(saves)) {
4112       __ push(reg);
4113     }
4114   }
4115 
4116   // Allocate return slots (located after callee-saved).
4117   if (frame()->GetReturnSlotCount() > 0) {
4118     __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize);
4119   }
4120 }
4121 
AssembleReturn(InstructionOperand * additional_pop_count)4122 void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
4123   auto call_descriptor = linkage()->GetIncomingDescriptor();
4124 
4125   const RegList saves = call_descriptor->CalleeSavedRegisters();
4126   // Restore registers.
4127   if (!saves.is_empty()) {
4128     const int returns = frame()->GetReturnSlotCount();
4129     if (returns != 0) {
4130       __ add(esp, Immediate(returns * kSystemPointerSize));
4131     }
4132     for (Register reg : saves) {
4133       __ pop(reg);
4134     }
4135   }
4136 
4137   IA32OperandConverter g(this, nullptr);
4138   int parameter_slots = static_cast<int>(call_descriptor->ParameterSlotCount());
4139 
4140   // {aditional_pop_count} is only greater than zero if {parameter_slots = 0}.
4141   // Check RawMachineAssembler::PopAndReturn.
4142   if (parameter_slots != 0) {
4143     if (additional_pop_count->IsImmediate()) {
4144       DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
4145     } else if (FLAG_debug_code) {
4146       __ cmp(g.ToRegister(additional_pop_count), Immediate(0));
4147       __ Assert(equal, AbortReason::kUnexpectedAdditionalPopValue);
4148     }
4149   }
4150 
4151   Register argc_reg = ecx;
4152   // Functions with JS linkage have at least one parameter (the receiver).
4153   // If {parameter_slots} == 0, it means it is a builtin with
4154   // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
4155   // itself.
4156 
4157   const bool drop_jsargs = parameter_slots != 0 &&
4158                            frame_access_state()->has_frame() &&
4159                            call_descriptor->IsJSFunctionCall();
4160   if (call_descriptor->IsCFunctionCall()) {
4161     AssembleDeconstructFrame();
4162   } else if (frame_access_state()->has_frame()) {
4163     // Canonicalize JSFunction return sites for now if they always have the same
4164     // number of return args.
4165     if (additional_pop_count->IsImmediate() &&
4166         g.ToConstant(additional_pop_count).ToInt32() == 0) {
4167       if (return_label_.is_bound()) {
4168         __ jmp(&return_label_);
4169         return;
4170       } else {
4171         __ bind(&return_label_);
4172       }
4173     }
4174     if (drop_jsargs) {
4175       // Get the actual argument count.
4176       __ mov(argc_reg, Operand(ebp, StandardFrameConstants::kArgCOffset));
4177       DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
4178     }
4179     AssembleDeconstructFrame();
4180   }
4181 
4182   if (drop_jsargs) {
4183     // We must pop all arguments from the stack (including the receiver).
4184     // The number of arguments without the receiver is
4185     // max(argc_reg, parameter_slots-1), and the receiver is added in
4186     // DropArguments().
4187     Label mismatch_return;
4188     Register scratch_reg = edx;
4189     DCHECK_NE(argc_reg, scratch_reg);
4190     DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
4191     DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
4192     __ cmp(argc_reg, Immediate(parameter_slots));
4193     __ j(greater, &mismatch_return, Label::kNear);
4194     __ Ret(parameter_slots * kSystemPointerSize, scratch_reg);
4195     __ bind(&mismatch_return);
4196     __ DropArguments(argc_reg, scratch_reg, TurboAssembler::kCountIsInteger,
4197                      TurboAssembler::kCountIncludesReceiver);
4198     // We use a return instead of a jump for better return address prediction.
4199     __ Ret();
4200   } else if (additional_pop_count->IsImmediate()) {
4201     int additional_count = g.ToConstant(additional_pop_count).ToInt32();
4202     size_t pop_size = (parameter_slots + additional_count) * kSystemPointerSize;
4203     if (is_uint16(pop_size)) {
4204       // Avoid the additional scratch register, it might clobber the
4205       // CalleeSavedRegisters.
4206       __ ret(static_cast<int>(pop_size));
4207     } else {
4208       Register scratch_reg = ecx;
4209       DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
4210       CHECK_LE(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
4211       __ Ret(static_cast<int>(pop_size), scratch_reg);
4212     }
4213   } else {
4214     Register pop_reg = g.ToRegister(additional_pop_count);
4215     Register scratch_reg = pop_reg == ecx ? edx : ecx;
4216     DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
4217     DCHECK(!call_descriptor->CalleeSavedRegisters().has(pop_reg));
4218     int pop_size = static_cast<int>(parameter_slots * kSystemPointerSize);
4219     __ PopReturnAddressTo(scratch_reg);
4220     __ lea(esp, Operand(esp, pop_reg, times_system_pointer_size,
4221                         static_cast<int>(pop_size)));
4222     __ PushReturnAddressFrom(scratch_reg);
4223     __ Ret();
4224   }
4225 }
4226 
FinishCode()4227 void CodeGenerator::FinishCode() {}
4228 
PrepareForDeoptimizationExits(ZoneDeque<DeoptimizationExit * > * exits)4229 void CodeGenerator::PrepareForDeoptimizationExits(
4230     ZoneDeque<DeoptimizationExit*>* exits) {}
4231 
AssembleMove(InstructionOperand * source,InstructionOperand * destination)4232 void CodeGenerator::AssembleMove(InstructionOperand* source,
4233                                  InstructionOperand* destination) {
4234   IA32OperandConverter g(this, nullptr);
4235   // Dispatch on the source and destination operand kinds.
4236   switch (MoveType::InferMove(source, destination)) {
4237     case MoveType::kRegisterToRegister:
4238       if (source->IsRegister()) {
4239         __ mov(g.ToRegister(destination), g.ToRegister(source));
4240       } else {
4241         DCHECK(source->IsFPRegister());
4242         __ Movaps(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4243       }
4244       return;
4245     case MoveType::kRegisterToStack: {
4246       Operand dst = g.ToOperand(destination);
4247       if (source->IsRegister()) {
4248         __ mov(dst, g.ToRegister(source));
4249       } else {
4250         DCHECK(source->IsFPRegister());
4251         XMMRegister src = g.ToDoubleRegister(source);
4252         MachineRepresentation rep =
4253             LocationOperand::cast(source)->representation();
4254         if (rep == MachineRepresentation::kFloat32) {
4255           __ Movss(dst, src);
4256         } else if (rep == MachineRepresentation::kFloat64) {
4257           __ Movsd(dst, src);
4258         } else {
4259           DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4260           __ Movups(dst, src);
4261         }
4262       }
4263       return;
4264     }
4265     case MoveType::kStackToRegister: {
4266       Operand src = g.ToOperand(source);
4267       if (source->IsStackSlot()) {
4268         __ mov(g.ToRegister(destination), src);
4269       } else {
4270         DCHECK(source->IsFPStackSlot());
4271         XMMRegister dst = g.ToDoubleRegister(destination);
4272         MachineRepresentation rep =
4273             LocationOperand::cast(source)->representation();
4274         if (rep == MachineRepresentation::kFloat32) {
4275           __ Movss(dst, src);
4276         } else if (rep == MachineRepresentation::kFloat64) {
4277           __ Movsd(dst, src);
4278         } else {
4279           DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4280           __ Movups(dst, src);
4281         }
4282       }
4283       return;
4284     }
4285     case MoveType::kStackToStack: {
4286       Operand src = g.ToOperand(source);
4287       Operand dst = g.ToOperand(destination);
4288       if (source->IsStackSlot()) {
4289         __ push(src);
4290         __ pop(dst);
4291       } else {
4292         MachineRepresentation rep =
4293             LocationOperand::cast(source)->representation();
4294         if (rep == MachineRepresentation::kFloat32) {
4295           __ Movss(kScratchDoubleReg, src);
4296           __ Movss(dst, kScratchDoubleReg);
4297         } else if (rep == MachineRepresentation::kFloat64) {
4298           __ Movsd(kScratchDoubleReg, src);
4299           __ Movsd(dst, kScratchDoubleReg);
4300         } else {
4301           DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4302           __ Movups(kScratchDoubleReg, src);
4303           __ Movups(dst, kScratchDoubleReg);
4304         }
4305       }
4306       return;
4307     }
4308     case MoveType::kConstantToRegister: {
4309       Constant src = g.ToConstant(source);
4310       if (destination->IsRegister()) {
4311         Register dst = g.ToRegister(destination);
4312         if (src.type() == Constant::kHeapObject) {
4313           __ Move(dst, src.ToHeapObject());
4314         } else {
4315           __ Move(dst, g.ToImmediate(source));
4316         }
4317       } else {
4318         DCHECK(destination->IsFPRegister());
4319         XMMRegister dst = g.ToDoubleRegister(destination);
4320         if (src.type() == Constant::kFloat32) {
4321           // TODO(turbofan): Can we do better here?
4322           __ Move(dst, src.ToFloat32AsInt());
4323         } else {
4324           DCHECK_EQ(src.type(), Constant::kFloat64);
4325           __ Move(dst, src.ToFloat64().AsUint64());
4326         }
4327       }
4328       return;
4329     }
4330     case MoveType::kConstantToStack: {
4331       Constant src = g.ToConstant(source);
4332       Operand dst = g.ToOperand(destination);
4333       if (destination->IsStackSlot()) {
4334         __ Move(dst, g.ToImmediate(source));
4335       } else {
4336         DCHECK(destination->IsFPStackSlot());
4337         if (src.type() == Constant::kFloat32) {
4338           __ Move(dst, Immediate(src.ToFloat32AsInt()));
4339         } else {
4340           DCHECK_EQ(src.type(), Constant::kFloat64);
4341           uint64_t constant_value = src.ToFloat64().AsUint64();
4342           uint32_t lower = static_cast<uint32_t>(constant_value);
4343           uint32_t upper = static_cast<uint32_t>(constant_value >> 32);
4344           Operand dst0 = dst;
4345           Operand dst1 = g.ToOperand(destination, kSystemPointerSize);
4346           __ Move(dst0, Immediate(lower));
4347           __ Move(dst1, Immediate(upper));
4348         }
4349       }
4350       return;
4351     }
4352   }
4353   UNREACHABLE();
4354 }
4355 
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)4356 void CodeGenerator::AssembleSwap(InstructionOperand* source,
4357                                  InstructionOperand* destination) {
4358   IA32OperandConverter g(this, nullptr);
4359   // Dispatch on the source and destination operand kinds.  Not all
4360   // combinations are possible.
4361   switch (MoveType::InferSwap(source, destination)) {
4362     case MoveType::kRegisterToRegister: {
4363       if (source->IsRegister()) {
4364         Register src = g.ToRegister(source);
4365         Register dst = g.ToRegister(destination);
4366         __ push(src);
4367         __ mov(src, dst);
4368         __ pop(dst);
4369       } else {
4370         DCHECK(source->IsFPRegister());
4371         XMMRegister src = g.ToDoubleRegister(source);
4372         XMMRegister dst = g.ToDoubleRegister(destination);
4373         __ Movaps(kScratchDoubleReg, src);
4374         __ Movaps(src, dst);
4375         __ Movaps(dst, kScratchDoubleReg);
4376       }
4377       return;
4378     }
4379     case MoveType::kRegisterToStack: {
4380       if (source->IsRegister()) {
4381         Register src = g.ToRegister(source);
4382         __ push(src);
4383         frame_access_state()->IncreaseSPDelta(1);
4384         Operand dst = g.ToOperand(destination);
4385         __ mov(src, dst);
4386         frame_access_state()->IncreaseSPDelta(-1);
4387         dst = g.ToOperand(destination);
4388         __ pop(dst);
4389       } else {
4390         DCHECK(source->IsFPRegister());
4391         XMMRegister src = g.ToDoubleRegister(source);
4392         Operand dst = g.ToOperand(destination);
4393         MachineRepresentation rep =
4394             LocationOperand::cast(source)->representation();
4395         if (rep == MachineRepresentation::kFloat32) {
4396           __ Movss(kScratchDoubleReg, dst);
4397           __ Movss(dst, src);
4398           __ Movaps(src, kScratchDoubleReg);
4399         } else if (rep == MachineRepresentation::kFloat64) {
4400           __ Movsd(kScratchDoubleReg, dst);
4401           __ Movsd(dst, src);
4402           __ Movaps(src, kScratchDoubleReg);
4403         } else {
4404           DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4405           __ Movups(kScratchDoubleReg, dst);
4406           __ Movups(dst, src);
4407           __ Movups(src, kScratchDoubleReg);
4408         }
4409       }
4410       return;
4411     }
4412     case MoveType::kStackToStack: {
4413       if (source->IsStackSlot()) {
4414         Operand dst1 = g.ToOperand(destination);
4415         __ push(dst1);
4416         frame_access_state()->IncreaseSPDelta(1);
4417         Operand src1 = g.ToOperand(source);
4418         __ push(src1);
4419         Operand dst2 = g.ToOperand(destination);
4420         __ pop(dst2);
4421         frame_access_state()->IncreaseSPDelta(-1);
4422         Operand src2 = g.ToOperand(source);
4423         __ pop(src2);
4424       } else {
4425         DCHECK(source->IsFPStackSlot());
4426         Operand src0 = g.ToOperand(source);
4427         Operand dst0 = g.ToOperand(destination);
4428         MachineRepresentation rep =
4429             LocationOperand::cast(source)->representation();
4430         if (rep == MachineRepresentation::kFloat32) {
4431           __ Movss(kScratchDoubleReg, dst0);  // Save dst in scratch register.
4432           __ push(src0);  // Then use stack to copy src to destination.
4433           __ pop(dst0);
4434           __ Movss(src0, kScratchDoubleReg);
4435         } else if (rep == MachineRepresentation::kFloat64) {
4436           __ Movsd(kScratchDoubleReg, dst0);  // Save dst in scratch register.
4437           __ push(src0);  // Then use stack to copy src to destination.
4438           __ pop(dst0);
4439           __ push(g.ToOperand(source, kSystemPointerSize));
4440           __ pop(g.ToOperand(destination, kSystemPointerSize));
4441           __ Movsd(src0, kScratchDoubleReg);
4442         } else {
4443           DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4444           __ Movups(kScratchDoubleReg, dst0);  // Save dst in scratch register.
4445           __ push(src0);  // Then use stack to copy src to destination.
4446           __ pop(dst0);
4447           __ push(g.ToOperand(source, kSystemPointerSize));
4448           __ pop(g.ToOperand(destination, kSystemPointerSize));
4449           __ push(g.ToOperand(source, 2 * kSystemPointerSize));
4450           __ pop(g.ToOperand(destination, 2 * kSystemPointerSize));
4451           __ push(g.ToOperand(source, 3 * kSystemPointerSize));
4452           __ pop(g.ToOperand(destination, 3 * kSystemPointerSize));
4453           __ Movups(src0, kScratchDoubleReg);
4454         }
4455       }
4456       return;
4457     }
4458     default:
4459       UNREACHABLE();
4460   }
4461 }
4462 
AssembleJumpTable(Label ** targets,size_t target_count)4463 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4464   for (size_t index = 0; index < target_count; ++index) {
4465     __ dd(targets[index]);
4466   }
4467 }
4468 
4469 #undef __
4470 #undef kScratchDoubleReg
4471 #undef ASSEMBLE_COMPARE
4472 #undef ASSEMBLE_IEEE754_BINOP
4473 #undef ASSEMBLE_IEEE754_UNOP
4474 #undef ASSEMBLE_BINOP
4475 #undef ASSEMBLE_ATOMIC_BINOP
4476 #undef ASSEMBLE_I64ATOMIC_BINOP
4477 #undef ASSEMBLE_MOVX
4478 #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
4479 #undef ASSEMBLE_SIMD_IMM_SHUFFLE
4480 #undef ASSEMBLE_SIMD_ALL_TRUE
4481 #undef ASSEMBLE_SIMD_SHIFT
4482 #undef ASSEMBLE_SIMD_PINSR
4483 
4484 }  // namespace compiler
4485 }  // namespace internal
4486 }  // namespace v8
4487