1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/base/overflowing-math.h"
6 #include "src/codegen/assembler-inl.h"
7 #include "src/codegen/callable.h"
8 #include "src/codegen/cpu-features.h"
9 #include "src/codegen/ia32/assembler-ia32.h"
10 #include "src/codegen/ia32/register-ia32.h"
11 #include "src/codegen/macro-assembler.h"
12 #include "src/codegen/optimized-compilation-info.h"
13 #include "src/compiler/backend/code-generator-impl.h"
14 #include "src/compiler/backend/code-generator.h"
15 #include "src/compiler/backend/gap-resolver.h"
16 #include "src/compiler/node-matchers.h"
17 #include "src/compiler/osr.h"
18 #include "src/execution/frame-constants.h"
19 #include "src/execution/frames.h"
20 #include "src/heap/memory-chunk.h"
21 #include "src/objects/smi.h"
22
23 #if V8_ENABLE_WEBASSEMBLY
24 #include "src/wasm/wasm-code-manager.h"
25 #include "src/wasm/wasm-objects.h"
26 #endif // V8_ENABLE_WEBASSEMBLY
27
28 namespace v8 {
29 namespace internal {
30 namespace compiler {
31
32 #define __ tasm()->
33
34 #define kScratchDoubleReg xmm0
35
36 // Adds IA-32 specific methods for decoding operands.
37 class IA32OperandConverter : public InstructionOperandConverter {
38 public:
IA32OperandConverter(CodeGenerator * gen,Instruction * instr)39 IA32OperandConverter(CodeGenerator* gen, Instruction* instr)
40 : InstructionOperandConverter(gen, instr) {}
41
InputOperand(size_t index,int extra=0)42 Operand InputOperand(size_t index, int extra = 0) {
43 return ToOperand(instr_->InputAt(index), extra);
44 }
45
InputImmediate(size_t index)46 Immediate InputImmediate(size_t index) {
47 return ToImmediate(instr_->InputAt(index));
48 }
49
OutputOperand()50 Operand OutputOperand() { return ToOperand(instr_->Output()); }
51
ToOperand(InstructionOperand * op,int extra=0)52 Operand ToOperand(InstructionOperand* op, int extra = 0) {
53 if (op->IsRegister()) {
54 DCHECK_EQ(0, extra);
55 return Operand(ToRegister(op));
56 } else if (op->IsFPRegister()) {
57 DCHECK_EQ(0, extra);
58 return Operand(ToDoubleRegister(op));
59 }
60 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
61 return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
62 }
63
SlotToOperand(int slot,int extra=0)64 Operand SlotToOperand(int slot, int extra = 0) {
65 FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
66 return Operand(offset.from_stack_pointer() ? esp : ebp,
67 offset.offset() + extra);
68 }
69
ToImmediate(InstructionOperand * operand)70 Immediate ToImmediate(InstructionOperand* operand) {
71 Constant constant = ToConstant(operand);
72 #if V8_ENABLE_WEBASSEMBLY
73 if (constant.type() == Constant::kInt32 &&
74 RelocInfo::IsWasmReference(constant.rmode())) {
75 return Immediate(static_cast<Address>(constant.ToInt32()),
76 constant.rmode());
77 }
78 #endif // V8_ENABLE_WEBASSEMBLY
79 switch (constant.type()) {
80 case Constant::kInt32:
81 return Immediate(constant.ToInt32());
82 case Constant::kFloat32:
83 return Immediate::EmbeddedNumber(constant.ToFloat32());
84 case Constant::kFloat64:
85 return Immediate::EmbeddedNumber(constant.ToFloat64().value());
86 case Constant::kExternalReference:
87 return Immediate(constant.ToExternalReference());
88 case Constant::kHeapObject:
89 return Immediate(constant.ToHeapObject());
90 case Constant::kCompressedHeapObject:
91 break;
92 case Constant::kDelayedStringConstant:
93 return Immediate::EmbeddedStringConstant(
94 constant.ToDelayedStringConstant());
95 case Constant::kInt64:
96 break;
97 case Constant::kRpoNumber:
98 return Immediate::CodeRelativeOffset(ToLabel(operand));
99 }
100 UNREACHABLE();
101 }
102
NextOffset(size_t * offset)103 static size_t NextOffset(size_t* offset) {
104 size_t i = *offset;
105 (*offset)++;
106 return i;
107 }
108
ScaleFor(AddressingMode one,AddressingMode mode)109 static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
110 STATIC_ASSERT(0 == static_cast<int>(times_1));
111 STATIC_ASSERT(1 == static_cast<int>(times_2));
112 STATIC_ASSERT(2 == static_cast<int>(times_4));
113 STATIC_ASSERT(3 == static_cast<int>(times_8));
114 int scale = static_cast<int>(mode - one);
115 DCHECK(scale >= 0 && scale < 4);
116 return static_cast<ScaleFactor>(scale);
117 }
118
MemoryOperand(size_t * offset)119 Operand MemoryOperand(size_t* offset) {
120 AddressingMode mode = AddressingModeField::decode(instr_->opcode());
121 switch (mode) {
122 case kMode_MR: {
123 Register base = InputRegister(NextOffset(offset));
124 int32_t disp = 0;
125 return Operand(base, disp);
126 }
127 case kMode_MRI: {
128 Register base = InputRegister(NextOffset(offset));
129 Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
130 return Operand(base, ctant.ToInt32(), ctant.rmode());
131 }
132 case kMode_MR1:
133 case kMode_MR2:
134 case kMode_MR4:
135 case kMode_MR8: {
136 Register base = InputRegister(NextOffset(offset));
137 Register index = InputRegister(NextOffset(offset));
138 ScaleFactor scale = ScaleFor(kMode_MR1, mode);
139 int32_t disp = 0;
140 return Operand(base, index, scale, disp);
141 }
142 case kMode_MR1I:
143 case kMode_MR2I:
144 case kMode_MR4I:
145 case kMode_MR8I: {
146 Register base = InputRegister(NextOffset(offset));
147 Register index = InputRegister(NextOffset(offset));
148 ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
149 Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
150 return Operand(base, index, scale, ctant.ToInt32(), ctant.rmode());
151 }
152 case kMode_M1:
153 case kMode_M2:
154 case kMode_M4:
155 case kMode_M8: {
156 Register index = InputRegister(NextOffset(offset));
157 ScaleFactor scale = ScaleFor(kMode_M1, mode);
158 int32_t disp = 0;
159 return Operand(index, scale, disp);
160 }
161 case kMode_M1I:
162 case kMode_M2I:
163 case kMode_M4I:
164 case kMode_M8I: {
165 Register index = InputRegister(NextOffset(offset));
166 ScaleFactor scale = ScaleFor(kMode_M1I, mode);
167 Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
168 return Operand(index, scale, ctant.ToInt32(), ctant.rmode());
169 }
170 case kMode_MI: {
171 Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
172 return Operand(ctant.ToInt32(), ctant.rmode());
173 }
174 case kMode_Root: {
175 Register base = kRootRegister;
176 int32_t disp = InputInt32(NextOffset(offset));
177 return Operand(base, disp);
178 }
179 case kMode_None:
180 UNREACHABLE();
181 }
182 UNREACHABLE();
183 }
184
MemoryOperand(size_t first_input=0)185 Operand MemoryOperand(size_t first_input = 0) {
186 return MemoryOperand(&first_input);
187 }
188
NextMemoryOperand(size_t offset=0)189 Operand NextMemoryOperand(size_t offset = 0) {
190 AddressingMode mode = AddressingModeField::decode(instr_->opcode());
191 Register base = InputRegister(NextOffset(&offset));
192 const int32_t disp = 4;
193 if (mode == kMode_MR1) {
194 Register index = InputRegister(NextOffset(&offset));
195 ScaleFactor scale = ScaleFor(kMode_MR1, kMode_MR1);
196 return Operand(base, index, scale, disp);
197 } else if (mode == kMode_MRI) {
198 Constant ctant = ToConstant(instr_->InputAt(NextOffset(&offset)));
199 return Operand(base, ctant.ToInt32() + disp, ctant.rmode());
200 } else {
201 UNREACHABLE();
202 }
203 }
204
MoveInstructionOperandToRegister(Register destination,InstructionOperand * op)205 void MoveInstructionOperandToRegister(Register destination,
206 InstructionOperand* op) {
207 if (op->IsImmediate() || op->IsConstant()) {
208 gen_->tasm()->mov(destination, ToImmediate(op));
209 } else if (op->IsRegister()) {
210 gen_->tasm()->Move(destination, ToRegister(op));
211 } else {
212 gen_->tasm()->mov(destination, ToOperand(op));
213 }
214 }
215 };
216
217 namespace {
218
HasAddressingMode(Instruction * instr)219 bool HasAddressingMode(Instruction* instr) {
220 return instr->addressing_mode() != kMode_None;
221 }
222
HasImmediateInput(Instruction * instr,size_t index)223 bool HasImmediateInput(Instruction* instr, size_t index) {
224 return instr->InputAt(index)->IsImmediate();
225 }
226
HasRegisterInput(Instruction * instr,size_t index)227 bool HasRegisterInput(Instruction* instr, size_t index) {
228 return instr->InputAt(index)->IsRegister();
229 }
230
231 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
232 public:
OutOfLineLoadFloat32NaN(CodeGenerator * gen,XMMRegister result)233 OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
234 : OutOfLineCode(gen), result_(result) {}
235
Generate()236 void Generate() final {
237 __ xorps(result_, result_);
238 __ divss(result_, result_);
239 }
240
241 private:
242 XMMRegister const result_;
243 };
244
245 class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
246 public:
OutOfLineLoadFloat64NaN(CodeGenerator * gen,XMMRegister result)247 OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
248 : OutOfLineCode(gen), result_(result) {}
249
Generate()250 void Generate() final {
251 __ xorpd(result_, result_);
252 __ divsd(result_, result_);
253 }
254
255 private:
256 XMMRegister const result_;
257 };
258
259 class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
260 public:
OutOfLineTruncateDoubleToI(CodeGenerator * gen,Register result,XMMRegister input,StubCallMode stub_mode)261 OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
262 XMMRegister input, StubCallMode stub_mode)
263 : OutOfLineCode(gen),
264 result_(result),
265 input_(input),
266 #if V8_ENABLE_WEBASSEMBLY
267 stub_mode_(stub_mode),
268 #endif // V8_ENABLE_WEBASSEMBLY
269 isolate_(gen->isolate()),
270 zone_(gen->zone()) {
271 }
272
Generate()273 void Generate() final {
274 __ AllocateStackSpace(kDoubleSize);
275 __ Movsd(MemOperand(esp, 0), input_);
276 #if V8_ENABLE_WEBASSEMBLY
277 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
278 // A direct call to a wasm runtime stub defined in this module.
279 // Just encode the stub index. This will be patched when the code
280 // is added to the native module and copied into wasm code space.
281 __ wasm_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
282 #else
283 // For balance.
284 if (false) {
285 #endif // V8_ENABLE_WEBASSEMBLY
286 } else if (tasm()->options().inline_offheap_trampolines) {
287 __ CallBuiltin(Builtin::kDoubleToI);
288 } else {
289 __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
290 }
291 __ mov(result_, MemOperand(esp, 0));
292 __ add(esp, Immediate(kDoubleSize));
293 }
294
295 private:
296 Register const result_;
297 XMMRegister const input_;
298 #if V8_ENABLE_WEBASSEMBLY
299 StubCallMode stub_mode_;
300 #endif // V8_ENABLE_WEBASSEMBLY
301 Isolate* isolate_;
302 Zone* zone_;
303 };
304
305 class OutOfLineRecordWrite final : public OutOfLineCode {
306 public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand operand,Register value,Register scratch0,Register scratch1,RecordWriteMode mode,StubCallMode stub_mode)307 OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
308 Register value, Register scratch0, Register scratch1,
309 RecordWriteMode mode, StubCallMode stub_mode)
310 : OutOfLineCode(gen),
311 object_(object),
312 operand_(operand),
313 value_(value),
314 scratch0_(scratch0),
315 scratch1_(scratch1),
316 mode_(mode),
317 #if V8_ENABLE_WEBASSEMBLY
318 stub_mode_(stub_mode),
319 #endif // V8_ENABLE_WEBASSEMBLY
320 zone_(gen->zone()) {
321 DCHECK(!AreAliased(object, scratch0, scratch1));
322 DCHECK(!AreAliased(value, scratch0, scratch1));
323 }
324
Generate()325 void Generate() final {
326 __ CheckPageFlag(value_, scratch0_,
327 MemoryChunk::kPointersToHereAreInterestingMask, zero,
328 exit());
329 __ lea(scratch1_, operand_);
330 RememberedSetAction const remembered_set_action =
331 mode_ > RecordWriteMode::kValueIsMap ||
332 FLAG_use_full_record_write_builtin
333 ? RememberedSetAction::kEmit
334 : RememberedSetAction::kOmit;
335 SaveFPRegsMode const save_fp_mode = frame()->DidAllocateDoubleRegisters()
336 ? SaveFPRegsMode::kSave
337 : SaveFPRegsMode::kIgnore;
338 if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
339 __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
340 #if V8_ENABLE_WEBASSEMBLY
341 } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
342 // A direct call to a wasm runtime stub defined in this module.
343 // Just encode the stub index. This will be patched when the code
344 // is added to the native module and copied into wasm code space.
345 __ CallRecordWriteStubSaveRegisters(object_, scratch1_,
346 remembered_set_action, save_fp_mode,
347 StubCallMode::kCallWasmRuntimeStub);
348 #endif // V8_ENABLE_WEBASSEMBLY
349 } else {
350 __ CallRecordWriteStubSaveRegisters(object_, scratch1_,
351 remembered_set_action, save_fp_mode);
352 }
353 }
354
355 private:
356 Register const object_;
357 Operand const operand_;
358 Register const value_;
359 Register const scratch0_;
360 Register const scratch1_;
361 RecordWriteMode const mode_;
362 #if V8_ENABLE_WEBASSEMBLY
363 StubCallMode const stub_mode_;
364 #endif // V8_ENABLE_WEBASSEMBLY
365 Zone* zone_;
366 };
367
368 } // namespace
369
370 #define ASSEMBLE_COMPARE(asm_instr) \
371 do { \
372 if (HasAddressingMode(instr)) { \
373 size_t index = 0; \
374 Operand left = i.MemoryOperand(&index); \
375 if (HasImmediateInput(instr, index)) { \
376 __ asm_instr(left, i.InputImmediate(index)); \
377 } else { \
378 __ asm_instr(left, i.InputRegister(index)); \
379 } \
380 } else { \
381 if (HasImmediateInput(instr, 1)) { \
382 if (HasRegisterInput(instr, 0)) { \
383 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
384 } else { \
385 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
386 } \
387 } else { \
388 if (HasRegisterInput(instr, 1)) { \
389 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
390 } else { \
391 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
392 } \
393 } \
394 } \
395 } while (0)
396
397 #define ASSEMBLE_IEEE754_BINOP(name) \
398 do { \
399 /* Pass two doubles as arguments on the stack. */ \
400 __ PrepareCallCFunction(4, eax); \
401 __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \
402 __ movsd(Operand(esp, 1 * kDoubleSize), i.InputDoubleRegister(1)); \
403 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 4); \
404 /* Return value is in st(0) on ia32. */ \
405 /* Store it into the result register. */ \
406 __ AllocateStackSpace(kDoubleSize); \
407 __ fstp_d(Operand(esp, 0)); \
408 __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \
409 __ add(esp, Immediate(kDoubleSize)); \
410 } while (false)
411
412 #define ASSEMBLE_IEEE754_UNOP(name) \
413 do { \
414 /* Pass one double as argument on the stack. */ \
415 __ PrepareCallCFunction(2, eax); \
416 __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \
417 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
418 /* Return value is in st(0) on ia32. */ \
419 /* Store it into the result register. */ \
420 __ AllocateStackSpace(kDoubleSize); \
421 __ fstp_d(Operand(esp, 0)); \
422 __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \
423 __ add(esp, Immediate(kDoubleSize)); \
424 } while (false)
425
426 #define ASSEMBLE_BINOP(asm_instr) \
427 do { \
428 if (HasAddressingMode(instr)) { \
429 size_t index = 1; \
430 Operand right = i.MemoryOperand(&index); \
431 __ asm_instr(i.InputRegister(0), right); \
432 } else { \
433 if (HasImmediateInput(instr, 1)) { \
434 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
435 } else { \
436 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
437 } \
438 } \
439 } while (0)
440
441 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
442 do { \
443 Label binop; \
444 __ bind(&binop); \
445 __ mov_inst(eax, i.MemoryOperand(1)); \
446 __ Move(i.TempRegister(0), eax); \
447 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
448 __ lock(); \
449 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
450 __ j(not_equal, &binop); \
451 } while (false)
452
453 #define ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \
454 do { \
455 Label binop; \
456 __ bind(&binop); \
457 __ mov(eax, i.MemoryOperand(2)); \
458 __ mov(edx, i.NextMemoryOperand(2)); \
459 __ push(ebx); \
460 frame_access_state()->IncreaseSPDelta(1); \
461 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0)); \
462 __ push(i.InputRegister(1)); \
463 __ instr1(ebx, eax); \
464 __ instr2(i.InputRegister(1), edx); \
465 __ lock(); \
466 __ cmpxchg8b(i.MemoryOperand(2)); \
467 __ pop(i.InputRegister(1)); \
468 __ pop(ebx); \
469 frame_access_state()->IncreaseSPDelta(-1); \
470 __ j(not_equal, &binop); \
471 } while (false);
472
473 #define ASSEMBLE_MOVX(mov_instr) \
474 do { \
475 if (HasAddressingMode(instr)) { \
476 __ mov_instr(i.OutputRegister(), i.MemoryOperand()); \
477 } else if (HasRegisterInput(instr, 0)) { \
478 __ mov_instr(i.OutputRegister(), i.InputRegister(0)); \
479 } else { \
480 __ mov_instr(i.OutputRegister(), i.InputOperand(0)); \
481 } \
482 } while (0)
483
484 #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
485 do { \
486 XMMRegister src0 = i.InputSimd128Register(0); \
487 Operand src1 = i.InputOperand(instr->InputCount() == 2 ? 1 : 0); \
488 if (CpuFeatures::IsSupported(AVX)) { \
489 CpuFeatureScope avx_scope(tasm(), AVX); \
490 __ v##opcode(i.OutputSimd128Register(), src0, src1); \
491 } else { \
492 DCHECK_EQ(i.OutputSimd128Register(), src0); \
493 __ opcode(i.OutputSimd128Register(), src1); \
494 } \
495 } while (false)
496
497 #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \
498 if (CpuFeatures::IsSupported(AVX)) { \
499 CpuFeatureScope avx_scope(tasm(), AVX); \
500 __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
501 i.InputOperand(1), imm); \
502 } else { \
503 CpuFeatureScope sse_scope(tasm(), SSELevel); \
504 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
505 __ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm); \
506 }
507
508 #define ASSEMBLE_SIMD_ALL_TRUE(opcode) \
509 do { \
510 Register dst = i.OutputRegister(); \
511 Operand src = i.InputOperand(0); \
512 Register tmp = i.TempRegister(0); \
513 XMMRegister tmp_simd = i.TempSimd128Register(1); \
514 __ mov(tmp, Immediate(1)); \
515 __ xor_(dst, dst); \
516 __ Pxor(tmp_simd, tmp_simd); \
517 __ opcode(tmp_simd, src); \
518 __ Ptest(tmp_simd, tmp_simd); \
519 __ cmov(zero, dst, tmp); \
520 } while (false)
521
522 #define ASSEMBLE_SIMD_SHIFT(opcode, width) \
523 do { \
524 XMMRegister dst = i.OutputSimd128Register(); \
525 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
526 if (HasImmediateInput(instr, 1)) { \
527 __ opcode(dst, dst, byte{i.InputInt##width(1)}); \
528 } else { \
529 XMMRegister tmp = i.TempSimd128Register(0); \
530 Register tmp_shift = i.TempRegister(1); \
531 constexpr int mask = (1 << width) - 1; \
532 __ mov(tmp_shift, i.InputRegister(1)); \
533 __ and_(tmp_shift, Immediate(mask)); \
534 __ Movd(tmp, tmp_shift); \
535 __ opcode(dst, dst, tmp); \
536 } \
537 } while (false)
538
539 #define ASSEMBLE_SIMD_PINSR(OPCODE, CPU_FEATURE) \
540 do { \
541 XMMRegister dst = i.OutputSimd128Register(); \
542 XMMRegister src = i.InputSimd128Register(0); \
543 int8_t laneidx = i.InputInt8(1); \
544 if (HasAddressingMode(instr)) { \
545 if (CpuFeatures::IsSupported(AVX)) { \
546 CpuFeatureScope avx_scope(tasm(), AVX); \
547 __ v##OPCODE(dst, src, i.MemoryOperand(2), laneidx); \
548 } else { \
549 DCHECK_EQ(dst, src); \
550 CpuFeatureScope sse_scope(tasm(), CPU_FEATURE); \
551 __ OPCODE(dst, i.MemoryOperand(2), laneidx); \
552 } \
553 } else { \
554 if (CpuFeatures::IsSupported(AVX)) { \
555 CpuFeatureScope avx_scope(tasm(), AVX); \
556 __ v##OPCODE(dst, src, i.InputOperand(2), laneidx); \
557 } else { \
558 DCHECK_EQ(dst, src); \
559 CpuFeatureScope sse_scope(tasm(), CPU_FEATURE); \
560 __ OPCODE(dst, i.InputOperand(2), laneidx); \
561 } \
562 } \
563 } while (false)
564
565
AssembleDeconstructFrame()566 void CodeGenerator::AssembleDeconstructFrame() {
567 __ mov(esp, ebp);
568 __ pop(ebp);
569 }
570
AssemblePrepareTailCall()571 void CodeGenerator::AssemblePrepareTailCall() {
572 if (frame_access_state()->has_frame()) {
573 __ mov(ebp, MemOperand(ebp, 0));
574 }
575 frame_access_state()->SetFrameAccessToSP();
576 }
577
578 namespace {
579
AdjustStackPointerForTailCall(TurboAssembler * tasm,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)580 void AdjustStackPointerForTailCall(TurboAssembler* tasm,
581 FrameAccessState* state,
582 int new_slot_above_sp,
583 bool allow_shrinkage = true) {
584 int current_sp_offset = state->GetSPToFPSlotCount() +
585 StandardFrameConstants::kFixedSlotCountAboveFp;
586 int stack_slot_delta = new_slot_above_sp - current_sp_offset;
587 if (stack_slot_delta > 0) {
588 tasm->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
589 state->IncreaseSPDelta(stack_slot_delta);
590 } else if (allow_shrinkage && stack_slot_delta < 0) {
591 tasm->add(esp, Immediate(-stack_slot_delta * kSystemPointerSize));
592 state->IncreaseSPDelta(stack_slot_delta);
593 }
594 }
595
596 #ifdef DEBUG
VerifyOutputOfAtomicPairInstr(IA32OperandConverter * converter,const Instruction * instr)597 bool VerifyOutputOfAtomicPairInstr(IA32OperandConverter* converter,
598 const Instruction* instr) {
599 if (instr->OutputCount() == 2) {
600 return (converter->OutputRegister(0) == eax &&
601 converter->OutputRegister(1) == edx);
602 }
603 if (instr->OutputCount() == 1) {
604 return (converter->OutputRegister(0) == eax &&
605 converter->TempRegister(0) == edx) ||
606 (converter->OutputRegister(0) == edx &&
607 converter->TempRegister(0) == eax);
608 }
609 DCHECK_EQ(instr->OutputCount(), 0);
610 return (converter->TempRegister(0) == eax &&
611 converter->TempRegister(1) == edx);
612 }
613 #endif
614
615 } // namespace
616
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_slot_offset)617 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
618 int first_unused_slot_offset) {
619 CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
620 ZoneVector<MoveOperands*> pushes(zone());
621 GetPushCompatibleMoves(instr, flags, &pushes);
622
623 if (!pushes.empty() &&
624 (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
625 first_unused_slot_offset)) {
626 IA32OperandConverter g(this, instr);
627 for (auto move : pushes) {
628 LocationOperand destination_location(
629 LocationOperand::cast(move->destination()));
630 InstructionOperand source(move->source());
631 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
632 destination_location.index());
633 if (source.IsStackSlot()) {
634 LocationOperand source_location(LocationOperand::cast(source));
635 __ push(g.SlotToOperand(source_location.index()));
636 } else if (source.IsRegister()) {
637 LocationOperand source_location(LocationOperand::cast(source));
638 __ push(source_location.GetRegister());
639 } else if (source.IsImmediate()) {
640 __ Push(Immediate(ImmediateOperand::cast(source).inline_int32_value()));
641 } else {
642 // Pushes of non-scalar data types is not supported.
643 UNIMPLEMENTED();
644 }
645 frame_access_state()->IncreaseSPDelta(1);
646 move->Eliminate();
647 }
648 }
649 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
650 first_unused_slot_offset, false);
651 }
652
AssembleTailCallAfterGap(Instruction * instr,int first_unused_slot_offset)653 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
654 int first_unused_slot_offset) {
655 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
656 first_unused_slot_offset);
657 }
658
659 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()660 void CodeGenerator::AssembleCodeStartRegisterCheck() {
661 __ push(eax); // Push eax so we can use it as a scratch register.
662 __ ComputeCodeStartAddress(eax);
663 __ cmp(eax, kJavaScriptCallCodeStartRegister);
664 __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
665 __ pop(eax); // Restore eax.
666 }
667
668 // Check if the code object is marked for deoptimization. If it is, then it
669 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
670 // to:
671 // 1. read from memory the word that contains that bit, which can be found in
672 // the flags in the referenced {CodeDataContainer} object;
673 // 2. test kMarkedForDeoptimizationBit in those flags; and
674 // 3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()675 void CodeGenerator::BailoutIfDeoptimized() {
676 int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
677 __ push(eax); // Push eax so we can use it as a scratch register.
678 __ mov(eax, Operand(kJavaScriptCallCodeStartRegister, offset));
679 __ test(FieldOperand(eax, CodeDataContainer::kKindSpecificFlagsOffset),
680 Immediate(1 << Code::kMarkedForDeoptimizationBit));
681 __ pop(eax); // Restore eax.
682
683 Label skip;
684 __ j(zero, &skip, Label::kNear);
685 __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
686 RelocInfo::CODE_TARGET);
687 __ bind(&skip);
688 }
689
690 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)691 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
692 Instruction* instr) {
693 IA32OperandConverter i(this, instr);
694 InstructionCode opcode = instr->opcode();
695 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
696 switch (arch_opcode) {
697 case kArchCallCodeObject: {
698 InstructionOperand* op = instr->InputAt(0);
699 if (op->IsImmediate()) {
700 Handle<Code> code = i.InputCode(0);
701 __ Call(code, RelocInfo::CODE_TARGET);
702 } else {
703 Register reg = i.InputRegister(0);
704 DCHECK_IMPLIES(
705 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
706 reg == kJavaScriptCallCodeStartRegister);
707 __ LoadCodeObjectEntry(reg, reg);
708 __ call(reg);
709 }
710 RecordCallPosition(instr);
711 frame_access_state()->ClearSPDelta();
712 break;
713 }
714 case kArchCallBuiltinPointer: {
715 DCHECK(!HasImmediateInput(instr, 0));
716 Register builtin_index = i.InputRegister(0);
717 __ CallBuiltinByIndex(builtin_index);
718 RecordCallPosition(instr);
719 frame_access_state()->ClearSPDelta();
720 break;
721 }
722 #if V8_ENABLE_WEBASSEMBLY
723 case kArchCallWasmFunction: {
724 if (HasImmediateInput(instr, 0)) {
725 Constant constant = i.ToConstant(instr->InputAt(0));
726 Address wasm_code = static_cast<Address>(constant.ToInt32());
727 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
728 __ wasm_call(wasm_code, constant.rmode());
729 } else {
730 __ call(wasm_code, constant.rmode());
731 }
732 } else {
733 __ call(i.InputRegister(0));
734 }
735 RecordCallPosition(instr);
736 frame_access_state()->ClearSPDelta();
737 break;
738 }
739 case kArchTailCallWasm: {
740 if (HasImmediateInput(instr, 0)) {
741 Constant constant = i.ToConstant(instr->InputAt(0));
742 Address wasm_code = static_cast<Address>(constant.ToInt32());
743 __ jmp(wasm_code, constant.rmode());
744 } else {
745 __ jmp(i.InputRegister(0));
746 }
747 frame_access_state()->ClearSPDelta();
748 frame_access_state()->SetFrameAccessToDefault();
749 break;
750 }
751 #endif // V8_ENABLE_WEBASSEMBLY
752 case kArchTailCallCodeObject: {
753 if (HasImmediateInput(instr, 0)) {
754 Handle<Code> code = i.InputCode(0);
755 __ Jump(code, RelocInfo::CODE_TARGET);
756 } else {
757 Register reg = i.InputRegister(0);
758 DCHECK_IMPLIES(
759 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
760 reg == kJavaScriptCallCodeStartRegister);
761 __ LoadCodeObjectEntry(reg, reg);
762 __ jmp(reg);
763 }
764 frame_access_state()->ClearSPDelta();
765 frame_access_state()->SetFrameAccessToDefault();
766 break;
767 }
768 case kArchTailCallAddress: {
769 CHECK(!HasImmediateInput(instr, 0));
770 Register reg = i.InputRegister(0);
771 DCHECK_IMPLIES(
772 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
773 reg == kJavaScriptCallCodeStartRegister);
774 __ jmp(reg);
775 frame_access_state()->ClearSPDelta();
776 frame_access_state()->SetFrameAccessToDefault();
777 break;
778 }
779 case kArchCallJSFunction: {
780 Register func = i.InputRegister(0);
781 if (FLAG_debug_code) {
782 // Check the function's context matches the context argument.
783 __ cmp(esi, FieldOperand(func, JSFunction::kContextOffset));
784 __ Assert(equal, AbortReason::kWrongFunctionContext);
785 }
786 static_assert(kJavaScriptCallCodeStartRegister == ecx, "ABI mismatch");
787 __ mov(ecx, FieldOperand(func, JSFunction::kCodeOffset));
788 __ CallCodeObject(ecx);
789 RecordCallPosition(instr);
790 frame_access_state()->ClearSPDelta();
791 break;
792 }
793 case kArchPrepareCallCFunction: {
794 // Frame alignment requires using FP-relative frame addressing.
795 frame_access_state()->SetFrameAccessToFP();
796 int const num_gp_parameters = ParamField::decode(instr->opcode());
797 int const num_fp_parameters = FPParamField::decode(instr->opcode());
798 __ PrepareCallCFunction(num_gp_parameters + num_fp_parameters,
799 i.TempRegister(0));
800 break;
801 }
802 case kArchSaveCallerRegisters: {
803 fp_mode_ =
804 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
805 DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
806 fp_mode_ == SaveFPRegsMode::kSave);
807 // kReturnRegister0 should have been saved before entering the stub.
808 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
809 DCHECK(IsAligned(bytes, kSystemPointerSize));
810 DCHECK_EQ(0, frame_access_state()->sp_delta());
811 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
812 DCHECK(!caller_registers_saved_);
813 caller_registers_saved_ = true;
814 break;
815 }
816 case kArchRestoreCallerRegisters: {
817 DCHECK(fp_mode_ ==
818 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
819 DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
820 fp_mode_ == SaveFPRegsMode::kSave);
821 // Don't overwrite the returned value.
822 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
823 frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
824 DCHECK_EQ(0, frame_access_state()->sp_delta());
825 DCHECK(caller_registers_saved_);
826 caller_registers_saved_ = false;
827 break;
828 }
829 case kArchPrepareTailCall:
830 AssemblePrepareTailCall();
831 break;
832 case kArchCallCFunction: {
833 int const num_parameters = MiscField::decode(instr->opcode());
834 Label return_location;
835 #if V8_ENABLE_WEBASSEMBLY
836 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
837 // Put the return address in a stack slot.
838 Register scratch = eax;
839 __ push(scratch);
840 __ PushPC();
841 int pc = __ pc_offset();
842 __ pop(scratch);
843 __ sub(scratch, Immediate(pc + Code::kHeaderSize - kHeapObjectTag));
844 __ add(scratch, Immediate::CodeRelativeOffset(&return_location));
845 __ mov(MemOperand(ebp, WasmExitFrameConstants::kCallingPCOffset),
846 scratch);
847 __ pop(scratch);
848 }
849 #endif // V8_ENABLE_WEBASSEMBLY
850 if (HasImmediateInput(instr, 0)) {
851 ExternalReference ref = i.InputExternalReference(0);
852 __ CallCFunction(ref, num_parameters);
853 } else {
854 Register func = i.InputRegister(0);
855 __ CallCFunction(func, num_parameters);
856 }
857 __ bind(&return_location);
858 #if V8_ENABLE_WEBASSEMBLY
859 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
860 RecordSafepoint(instr->reference_map());
861 }
862 #endif // V8_ENABLE_WEBASSEMBLY
863 frame_access_state()->SetFrameAccessToDefault();
864 // Ideally, we should decrement SP delta to match the change of stack
865 // pointer in CallCFunction. However, for certain architectures (e.g.
866 // ARM), there may be more strict alignment requirement, causing old SP
867 // to be saved on the stack. In those cases, we can not calculate the SP
868 // delta statically.
869 frame_access_state()->ClearSPDelta();
870 if (caller_registers_saved_) {
871 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
872 // Here, we assume the sequence to be:
873 // kArchSaveCallerRegisters;
874 // kArchCallCFunction;
875 // kArchRestoreCallerRegisters;
876 int bytes =
877 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
878 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
879 }
880 break;
881 }
882 case kArchJmp:
883 AssembleArchJump(i.InputRpo(0));
884 break;
885 case kArchBinarySearchSwitch:
886 AssembleArchBinarySearchSwitch(instr);
887 break;
888 case kArchTableSwitch:
889 AssembleArchTableSwitch(instr);
890 break;
891 case kArchComment:
892 __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
893 break;
894 case kArchAbortCSADcheck:
895 DCHECK(i.InputRegister(0) == edx);
896 {
897 // We don't actually want to generate a pile of code for this, so just
898 // claim there is a stack frame, without generating one.
899 FrameScope scope(tasm(), StackFrame::NO_FRAME_TYPE);
900 __ Call(isolate()->builtins()->code_handle(Builtin::kAbortCSADcheck),
901 RelocInfo::CODE_TARGET);
902 }
903 __ int3();
904 break;
905 case kArchDebugBreak:
906 __ DebugBreak();
907 break;
908 case kArchNop:
909 case kArchThrowTerminator:
910 // don't emit code for nops.
911 break;
912 case kArchDeoptimize: {
913 DeoptimizationExit* exit =
914 BuildTranslation(instr, -1, 0, 0, OutputFrameStateCombine::Ignore());
915 __ jmp(exit->label());
916 break;
917 }
918 case kArchRet:
919 AssembleReturn(instr->InputAt(0));
920 break;
921 case kArchFramePointer:
922 __ mov(i.OutputRegister(), ebp);
923 break;
924 case kArchParentFramePointer:
925 if (frame_access_state()->has_frame()) {
926 __ mov(i.OutputRegister(), Operand(ebp, 0));
927 } else {
928 __ mov(i.OutputRegister(), ebp);
929 }
930 break;
931 case kArchStackPointerGreaterThan: {
932 // Potentially apply an offset to the current stack pointer before the
933 // comparison to consider the size difference of an optimized frame versus
934 // the contained unoptimized frames.
935 Register lhs_register = esp;
936 uint32_t offset;
937
938 if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
939 lhs_register = i.TempRegister(0);
940 __ lea(lhs_register, Operand(esp, -1 * static_cast<int32_t>(offset)));
941 }
942
943 constexpr size_t kValueIndex = 0;
944 if (HasAddressingMode(instr)) {
945 __ cmp(lhs_register, i.MemoryOperand(kValueIndex));
946 } else {
947 __ cmp(lhs_register, i.InputRegister(kValueIndex));
948 }
949 break;
950 }
951 case kArchStackCheckOffset:
952 __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
953 break;
954 case kArchTruncateDoubleToI: {
955 auto result = i.OutputRegister();
956 auto input = i.InputDoubleRegister(0);
957 auto ool = zone()->New<OutOfLineTruncateDoubleToI>(
958 this, result, input, DetermineStubCallMode());
959 __ cvttsd2si(result, Operand(input));
960 __ cmp(result, 1);
961 __ j(overflow, ool->entry());
962 __ bind(ool->exit());
963 break;
964 }
965 case kArchStoreWithWriteBarrier: // Fall thrugh.
966 case kArchAtomicStoreWithWriteBarrier: {
967 RecordWriteMode mode =
968 static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
969 Register object = i.InputRegister(0);
970 size_t index = 0;
971 Operand operand = i.MemoryOperand(&index);
972 Register value = i.InputRegister(index);
973 Register scratch0 = i.TempRegister(0);
974 Register scratch1 = i.TempRegister(1);
975
976 if (FLAG_debug_code) {
977 // Checking that |value| is not a cleared weakref: our write barrier
978 // does not support that for now.
979 __ cmp(value, Immediate(kClearedWeakHeapObjectLower32));
980 __ Check(not_equal, AbortReason::kOperandIsCleared);
981 }
982
983 auto ool = zone()->New<OutOfLineRecordWrite>(this, object, operand, value,
984 scratch0, scratch1, mode,
985 DetermineStubCallMode());
986 if (arch_opcode == kArchStoreWithWriteBarrier) {
987 __ mov(operand, value);
988 } else {
989 __ mov(scratch0, value);
990 __ xchg(scratch0, operand);
991 }
992 if (mode > RecordWriteMode::kValueIsPointer) {
993 __ JumpIfSmi(value, ool->exit());
994 }
995 __ CheckPageFlag(object, scratch0,
996 MemoryChunk::kPointersFromHereAreInterestingMask,
997 not_zero, ool->entry());
998 __ bind(ool->exit());
999 break;
1000 }
1001 case kArchStackSlot: {
1002 FrameOffset offset =
1003 frame_access_state()->GetFrameOffset(i.InputInt32(0));
1004 Register base = offset.from_stack_pointer() ? esp : ebp;
1005 __ lea(i.OutputRegister(), Operand(base, offset.offset()));
1006 break;
1007 }
1008 case kIeee754Float64Acos:
1009 ASSEMBLE_IEEE754_UNOP(acos);
1010 break;
1011 case kIeee754Float64Acosh:
1012 ASSEMBLE_IEEE754_UNOP(acosh);
1013 break;
1014 case kIeee754Float64Asin:
1015 ASSEMBLE_IEEE754_UNOP(asin);
1016 break;
1017 case kIeee754Float64Asinh:
1018 ASSEMBLE_IEEE754_UNOP(asinh);
1019 break;
1020 case kIeee754Float64Atan:
1021 ASSEMBLE_IEEE754_UNOP(atan);
1022 break;
1023 case kIeee754Float64Atanh:
1024 ASSEMBLE_IEEE754_UNOP(atanh);
1025 break;
1026 case kIeee754Float64Atan2:
1027 ASSEMBLE_IEEE754_BINOP(atan2);
1028 break;
1029 case kIeee754Float64Cbrt:
1030 ASSEMBLE_IEEE754_UNOP(cbrt);
1031 break;
1032 case kIeee754Float64Cos:
1033 ASSEMBLE_IEEE754_UNOP(cos);
1034 break;
1035 case kIeee754Float64Cosh:
1036 ASSEMBLE_IEEE754_UNOP(cosh);
1037 break;
1038 case kIeee754Float64Expm1:
1039 ASSEMBLE_IEEE754_UNOP(expm1);
1040 break;
1041 case kIeee754Float64Exp:
1042 ASSEMBLE_IEEE754_UNOP(exp);
1043 break;
1044 case kIeee754Float64Log:
1045 ASSEMBLE_IEEE754_UNOP(log);
1046 break;
1047 case kIeee754Float64Log1p:
1048 ASSEMBLE_IEEE754_UNOP(log1p);
1049 break;
1050 case kIeee754Float64Log2:
1051 ASSEMBLE_IEEE754_UNOP(log2);
1052 break;
1053 case kIeee754Float64Log10:
1054 ASSEMBLE_IEEE754_UNOP(log10);
1055 break;
1056 case kIeee754Float64Pow:
1057 ASSEMBLE_IEEE754_BINOP(pow);
1058 break;
1059 case kIeee754Float64Sin:
1060 ASSEMBLE_IEEE754_UNOP(sin);
1061 break;
1062 case kIeee754Float64Sinh:
1063 ASSEMBLE_IEEE754_UNOP(sinh);
1064 break;
1065 case kIeee754Float64Tan:
1066 ASSEMBLE_IEEE754_UNOP(tan);
1067 break;
1068 case kIeee754Float64Tanh:
1069 ASSEMBLE_IEEE754_UNOP(tanh);
1070 break;
1071 case kIA32Add:
1072 ASSEMBLE_BINOP(add);
1073 break;
1074 case kIA32And:
1075 ASSEMBLE_BINOP(and_);
1076 break;
1077 case kIA32Cmp:
1078 ASSEMBLE_COMPARE(cmp);
1079 break;
1080 case kIA32Cmp16:
1081 ASSEMBLE_COMPARE(cmpw);
1082 break;
1083 case kIA32Cmp8:
1084 ASSEMBLE_COMPARE(cmpb);
1085 break;
1086 case kIA32Test:
1087 ASSEMBLE_COMPARE(test);
1088 break;
1089 case kIA32Test16:
1090 ASSEMBLE_COMPARE(test_w);
1091 break;
1092 case kIA32Test8:
1093 ASSEMBLE_COMPARE(test_b);
1094 break;
1095 case kIA32Imul:
1096 if (HasImmediateInput(instr, 1)) {
1097 __ imul(i.OutputRegister(), i.InputOperand(0), i.InputInt32(1));
1098 } else {
1099 __ imul(i.OutputRegister(), i.InputOperand(1));
1100 }
1101 break;
1102 case kIA32ImulHigh:
1103 __ imul(i.InputRegister(1));
1104 break;
1105 case kIA32UmulHigh:
1106 __ mul(i.InputRegister(1));
1107 break;
1108 case kIA32Idiv:
1109 __ cdq();
1110 __ idiv(i.InputOperand(1));
1111 break;
1112 case kIA32Udiv:
1113 __ Move(edx, Immediate(0));
1114 __ div(i.InputOperand(1));
1115 break;
1116 case kIA32Not:
1117 __ not_(i.OutputOperand());
1118 break;
1119 case kIA32Neg:
1120 __ neg(i.OutputOperand());
1121 break;
1122 case kIA32Or:
1123 ASSEMBLE_BINOP(or_);
1124 break;
1125 case kIA32Xor:
1126 ASSEMBLE_BINOP(xor_);
1127 break;
1128 case kIA32Sub:
1129 ASSEMBLE_BINOP(sub);
1130 break;
1131 case kIA32Shl:
1132 if (HasImmediateInput(instr, 1)) {
1133 __ shl(i.OutputOperand(), i.InputInt5(1));
1134 } else {
1135 __ shl_cl(i.OutputOperand());
1136 }
1137 break;
1138 case kIA32Shr:
1139 if (HasImmediateInput(instr, 1)) {
1140 __ shr(i.OutputOperand(), i.InputInt5(1));
1141 } else {
1142 __ shr_cl(i.OutputOperand());
1143 }
1144 break;
1145 case kIA32Sar:
1146 if (HasImmediateInput(instr, 1)) {
1147 __ sar(i.OutputOperand(), i.InputInt5(1));
1148 } else {
1149 __ sar_cl(i.OutputOperand());
1150 }
1151 break;
1152 case kIA32AddPair: {
1153 // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
1154 // i.InputRegister(1) ... left high word.
1155 // i.InputRegister(2) ... right low word.
1156 // i.InputRegister(3) ... right high word.
1157 bool use_temp = false;
1158 if ((HasRegisterInput(instr, 1) &&
1159 i.OutputRegister(0).code() == i.InputRegister(1).code()) ||
1160 i.OutputRegister(0).code() == i.InputRegister(3).code()) {
1161 // We cannot write to the output register directly, because it would
1162 // overwrite an input for adc. We have to use the temp register.
1163 use_temp = true;
1164 __ Move(i.TempRegister(0), i.InputRegister(0));
1165 __ add(i.TempRegister(0), i.InputRegister(2));
1166 } else {
1167 __ add(i.OutputRegister(0), i.InputRegister(2));
1168 }
1169 i.MoveInstructionOperandToRegister(i.OutputRegister(1),
1170 instr->InputAt(1));
1171 __ adc(i.OutputRegister(1), Operand(i.InputRegister(3)));
1172 if (use_temp) {
1173 __ Move(i.OutputRegister(0), i.TempRegister(0));
1174 }
1175 break;
1176 }
1177 case kIA32SubPair: {
1178 // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
1179 // i.InputRegister(1) ... left high word.
1180 // i.InputRegister(2) ... right low word.
1181 // i.InputRegister(3) ... right high word.
1182 bool use_temp = false;
1183 if ((HasRegisterInput(instr, 1) &&
1184 i.OutputRegister(0).code() == i.InputRegister(1).code()) ||
1185 i.OutputRegister(0).code() == i.InputRegister(3).code()) {
1186 // We cannot write to the output register directly, because it would
1187 // overwrite an input for adc. We have to use the temp register.
1188 use_temp = true;
1189 __ Move(i.TempRegister(0), i.InputRegister(0));
1190 __ sub(i.TempRegister(0), i.InputRegister(2));
1191 } else {
1192 __ sub(i.OutputRegister(0), i.InputRegister(2));
1193 }
1194 i.MoveInstructionOperandToRegister(i.OutputRegister(1),
1195 instr->InputAt(1));
1196 __ sbb(i.OutputRegister(1), Operand(i.InputRegister(3)));
1197 if (use_temp) {
1198 __ Move(i.OutputRegister(0), i.TempRegister(0));
1199 }
1200 break;
1201 }
1202 case kIA32MulPair: {
1203 __ imul(i.OutputRegister(1), i.InputOperand(0));
1204 i.MoveInstructionOperandToRegister(i.TempRegister(0), instr->InputAt(1));
1205 __ imul(i.TempRegister(0), i.InputOperand(2));
1206 __ add(i.OutputRegister(1), i.TempRegister(0));
1207 __ mov(i.OutputRegister(0), i.InputOperand(0));
1208 // Multiplies the low words and stores them in eax and edx.
1209 __ mul(i.InputRegister(2));
1210 __ add(i.OutputRegister(1), i.TempRegister(0));
1211
1212 break;
1213 }
1214 case kIA32ShlPair:
1215 if (HasImmediateInput(instr, 2)) {
1216 __ ShlPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1217 } else {
1218 // Shift has been loaded into CL by the register allocator.
1219 __ ShlPair_cl(i.InputRegister(1), i.InputRegister(0));
1220 }
1221 break;
1222 case kIA32ShrPair:
1223 if (HasImmediateInput(instr, 2)) {
1224 __ ShrPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1225 } else {
1226 // Shift has been loaded into CL by the register allocator.
1227 __ ShrPair_cl(i.InputRegister(1), i.InputRegister(0));
1228 }
1229 break;
1230 case kIA32SarPair:
1231 if (HasImmediateInput(instr, 2)) {
1232 __ SarPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1233 } else {
1234 // Shift has been loaded into CL by the register allocator.
1235 __ SarPair_cl(i.InputRegister(1), i.InputRegister(0));
1236 }
1237 break;
1238 case kIA32Rol:
1239 if (HasImmediateInput(instr, 1)) {
1240 __ rol(i.OutputOperand(), i.InputInt5(1));
1241 } else {
1242 __ rol_cl(i.OutputOperand());
1243 }
1244 break;
1245 case kIA32Ror:
1246 if (HasImmediateInput(instr, 1)) {
1247 __ ror(i.OutputOperand(), i.InputInt5(1));
1248 } else {
1249 __ ror_cl(i.OutputOperand());
1250 }
1251 break;
1252 case kIA32Lzcnt:
1253 __ Lzcnt(i.OutputRegister(), i.InputOperand(0));
1254 break;
1255 case kIA32Tzcnt:
1256 __ Tzcnt(i.OutputRegister(), i.InputOperand(0));
1257 break;
1258 case kIA32Popcnt:
1259 __ Popcnt(i.OutputRegister(), i.InputOperand(0));
1260 break;
1261 case kIA32Bswap:
1262 __ bswap(i.OutputRegister());
1263 break;
1264 case kIA32MFence:
1265 __ mfence();
1266 break;
1267 case kIA32LFence:
1268 __ lfence();
1269 break;
1270 case kIA32Float32Cmp:
1271 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1272 break;
1273 case kIA32Float32Sqrt:
1274 __ Sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
1275 break;
1276 case kIA32Float32Round: {
1277 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1278 RoundingMode const mode =
1279 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1280 __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1281 break;
1282 }
1283 case kIA32Float64Cmp:
1284 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1285 break;
1286 case kIA32Float32Max: {
1287 Label compare_swap, done_compare;
1288 if (instr->InputAt(1)->IsFPRegister()) {
1289 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1290 } else {
1291 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1292 }
1293 auto ool =
1294 zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1295 __ j(parity_even, ool->entry());
1296 __ j(above, &done_compare, Label::kNear);
1297 __ j(below, &compare_swap, Label::kNear);
1298 __ Movmskps(i.TempRegister(0), i.InputDoubleRegister(0));
1299 __ test(i.TempRegister(0), Immediate(1));
1300 __ j(zero, &done_compare, Label::kNear);
1301 __ bind(&compare_swap);
1302 if (instr->InputAt(1)->IsFPRegister()) {
1303 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1304 } else {
1305 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1306 }
1307 __ bind(&done_compare);
1308 __ bind(ool->exit());
1309 break;
1310 }
1311
1312 case kIA32Float64Max: {
1313 Label compare_swap, done_compare;
1314 if (instr->InputAt(1)->IsFPRegister()) {
1315 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1316 } else {
1317 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1318 }
1319 auto ool =
1320 zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1321 __ j(parity_even, ool->entry());
1322 __ j(above, &done_compare, Label::kNear);
1323 __ j(below, &compare_swap, Label::kNear);
1324 __ Movmskpd(i.TempRegister(0), i.InputDoubleRegister(0));
1325 __ test(i.TempRegister(0), Immediate(1));
1326 __ j(zero, &done_compare, Label::kNear);
1327 __ bind(&compare_swap);
1328 if (instr->InputAt(1)->IsFPRegister()) {
1329 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1330 } else {
1331 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1332 }
1333 __ bind(&done_compare);
1334 __ bind(ool->exit());
1335 break;
1336 }
1337 case kIA32Float32Min: {
1338 Label compare_swap, done_compare;
1339 if (instr->InputAt(1)->IsFPRegister()) {
1340 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1341 } else {
1342 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1343 }
1344 auto ool =
1345 zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1346 __ j(parity_even, ool->entry());
1347 __ j(below, &done_compare, Label::kNear);
1348 __ j(above, &compare_swap, Label::kNear);
1349 if (instr->InputAt(1)->IsFPRegister()) {
1350 __ Movmskps(i.TempRegister(0), i.InputDoubleRegister(1));
1351 } else {
1352 __ Movss(kScratchDoubleReg, i.InputOperand(1));
1353 __ Movmskps(i.TempRegister(0), kScratchDoubleReg);
1354 }
1355 __ test(i.TempRegister(0), Immediate(1));
1356 __ j(zero, &done_compare, Label::kNear);
1357 __ bind(&compare_swap);
1358 if (instr->InputAt(1)->IsFPRegister()) {
1359 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1360 } else {
1361 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1362 }
1363 __ bind(&done_compare);
1364 __ bind(ool->exit());
1365 break;
1366 }
1367 case kIA32Float64Min: {
1368 Label compare_swap, done_compare;
1369 if (instr->InputAt(1)->IsFPRegister()) {
1370 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1371 } else {
1372 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1373 }
1374 auto ool =
1375 zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1376 __ j(parity_even, ool->entry());
1377 __ j(below, &done_compare, Label::kNear);
1378 __ j(above, &compare_swap, Label::kNear);
1379 if (instr->InputAt(1)->IsFPRegister()) {
1380 __ Movmskpd(i.TempRegister(0), i.InputDoubleRegister(1));
1381 } else {
1382 __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1383 __ Movmskpd(i.TempRegister(0), kScratchDoubleReg);
1384 }
1385 __ test(i.TempRegister(0), Immediate(1));
1386 __ j(zero, &done_compare, Label::kNear);
1387 __ bind(&compare_swap);
1388 if (instr->InputAt(1)->IsFPRegister()) {
1389 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1390 } else {
1391 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1392 }
1393 __ bind(&done_compare);
1394 __ bind(ool->exit());
1395 break;
1396 }
1397 case kIA32Float64Mod: {
1398 Register tmp = i.TempRegister(1);
1399 __ mov(tmp, esp);
1400 __ AllocateStackSpace(kDoubleSize);
1401 __ and_(esp, -8); // align to 8 byte boundary.
1402 // Move values to st(0) and st(1).
1403 __ Movsd(Operand(esp, 0), i.InputDoubleRegister(1));
1404 __ fld_d(Operand(esp, 0));
1405 __ Movsd(Operand(esp, 0), i.InputDoubleRegister(0));
1406 __ fld_d(Operand(esp, 0));
1407 // Loop while fprem isn't done.
1408 Label mod_loop;
1409 __ bind(&mod_loop);
1410 // This instruction traps on all kinds of inputs, but we are assuming the
1411 // floating point control word is set to ignore them all.
1412 __ fprem();
1413 // fnstsw_ax clobbers eax.
1414 DCHECK_EQ(eax, i.TempRegister(0));
1415 __ fnstsw_ax();
1416 __ sahf();
1417 __ j(parity_even, &mod_loop);
1418 // Move output to stack and clean up.
1419 __ fstp(1);
1420 __ fstp_d(Operand(esp, 0));
1421 __ Movsd(i.OutputDoubleRegister(), Operand(esp, 0));
1422 __ mov(esp, tmp);
1423 break;
1424 }
1425 case kIA32Float64Sqrt:
1426 __ Sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
1427 break;
1428 case kIA32Float64Round: {
1429 RoundingMode const mode =
1430 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1431 __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1432 break;
1433 }
1434 case kIA32Float32ToFloat64:
1435 __ Cvtss2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1436 break;
1437 case kIA32Float64ToFloat32:
1438 __ Cvtsd2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1439 break;
1440 case kIA32Float32ToInt32:
1441 __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1442 break;
1443 case kIA32Float32ToUint32:
1444 __ Cvttss2ui(i.OutputRegister(), i.InputOperand(0),
1445 i.TempSimd128Register(0));
1446 break;
1447 case kIA32Float64ToInt32:
1448 __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1449 break;
1450 case kIA32Float64ToUint32:
1451 __ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0),
1452 i.TempSimd128Register(0));
1453 break;
1454 case kSSEInt32ToFloat32:
1455 // Calling Cvtsi2ss (which does a xor) regresses some benchmarks.
1456 __ cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1457 break;
1458 case kIA32Uint32ToFloat32:
1459 __ Cvtui2ss(i.OutputDoubleRegister(), i.InputOperand(0),
1460 i.TempRegister(0));
1461 break;
1462 case kSSEInt32ToFloat64:
1463 // Calling Cvtsi2sd (which does a xor) regresses some benchmarks.
1464 __ cvtsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1465 break;
1466 case kIA32Uint32ToFloat64:
1467 __ Cvtui2sd(i.OutputDoubleRegister(), i.InputOperand(0),
1468 i.TempRegister(0));
1469 break;
1470 case kIA32Float64ExtractLowWord32:
1471 if (instr->InputAt(0)->IsFPStackSlot()) {
1472 __ mov(i.OutputRegister(), i.InputOperand(0));
1473 } else {
1474 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1475 }
1476 break;
1477 case kIA32Float64ExtractHighWord32:
1478 if (instr->InputAt(0)->IsFPStackSlot()) {
1479 __ mov(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1480 } else {
1481 __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1482 }
1483 break;
1484 case kIA32Float64InsertLowWord32:
1485 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1486 break;
1487 case kIA32Float64InsertHighWord32:
1488 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1489 break;
1490 case kIA32Float64LoadLowWord32:
1491 __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
1492 break;
1493 case kFloat32Add: {
1494 __ Addss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1495 i.InputOperand(1));
1496 break;
1497 }
1498 case kFloat32Sub: {
1499 __ Subss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1500 i.InputOperand(1));
1501 break;
1502 }
1503 case kFloat32Mul: {
1504 __ Mulss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1505 i.InputOperand(1));
1506 break;
1507 }
1508 case kFloat32Div: {
1509 __ Divss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1510 i.InputOperand(1));
1511 // Don't delete this mov. It may improve performance on some CPUs,
1512 // when there is a (v)mulss depending on the result.
1513 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1514 break;
1515 }
1516 case kFloat64Add: {
1517 __ Addsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1518 i.InputOperand(1));
1519 break;
1520 }
1521 case kFloat64Sub: {
1522 __ Subsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1523 i.InputOperand(1));
1524 break;
1525 }
1526 case kFloat64Mul: {
1527 __ Mulsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1528 i.InputOperand(1));
1529 break;
1530 }
1531 case kFloat64Div: {
1532 __ Divsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1533 i.InputOperand(1));
1534 // Don't delete this mov. It may improve performance on some CPUs,
1535 // when there is a (v)mulsd depending on the result.
1536 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1537 break;
1538 }
1539 case kFloat32Abs: {
1540 __ Absps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1541 i.TempRegister(0));
1542 break;
1543 }
1544 case kFloat32Neg: {
1545 __ Negps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1546 i.TempRegister(0));
1547 break;
1548 }
1549 case kFloat64Abs: {
1550 __ Abspd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1551 i.TempRegister(0));
1552 break;
1553 }
1554 case kFloat64Neg: {
1555 __ Negpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1556 i.TempRegister(0));
1557 break;
1558 }
1559 case kIA32Float64SilenceNaN:
1560 __ Xorps(kScratchDoubleReg, kScratchDoubleReg);
1561 __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1562 break;
1563 case kIA32Movsxbl:
1564 ASSEMBLE_MOVX(movsx_b);
1565 break;
1566 case kIA32Movzxbl:
1567 ASSEMBLE_MOVX(movzx_b);
1568 break;
1569 case kIA32Movb: {
1570 size_t index = 0;
1571 Operand operand = i.MemoryOperand(&index);
1572 if (HasImmediateInput(instr, index)) {
1573 __ mov_b(operand, i.InputInt8(index));
1574 } else {
1575 __ mov_b(operand, i.InputRegister(index));
1576 }
1577 break;
1578 }
1579 case kIA32Movsxwl:
1580 ASSEMBLE_MOVX(movsx_w);
1581 break;
1582 case kIA32Movzxwl:
1583 ASSEMBLE_MOVX(movzx_w);
1584 break;
1585 case kIA32Movw: {
1586 size_t index = 0;
1587 Operand operand = i.MemoryOperand(&index);
1588 if (HasImmediateInput(instr, index)) {
1589 __ mov_w(operand, i.InputInt16(index));
1590 } else {
1591 __ mov_w(operand, i.InputRegister(index));
1592 }
1593 break;
1594 }
1595 case kIA32Movl:
1596 if (instr->HasOutput()) {
1597 __ mov(i.OutputRegister(), i.MemoryOperand());
1598 } else {
1599 size_t index = 0;
1600 Operand operand = i.MemoryOperand(&index);
1601 if (HasImmediateInput(instr, index)) {
1602 __ mov(operand, i.InputImmediate(index));
1603 } else {
1604 __ mov(operand, i.InputRegister(index));
1605 }
1606 }
1607 break;
1608 case kIA32Movsd:
1609 if (instr->HasOutput()) {
1610 __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
1611 } else {
1612 size_t index = 0;
1613 Operand operand = i.MemoryOperand(&index);
1614 __ Movsd(operand, i.InputDoubleRegister(index));
1615 }
1616 break;
1617 case kIA32Movss:
1618 if (instr->HasOutput()) {
1619 __ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
1620 } else {
1621 size_t index = 0;
1622 Operand operand = i.MemoryOperand(&index);
1623 __ Movss(operand, i.InputDoubleRegister(index));
1624 }
1625 break;
1626 case kIA32Movdqu:
1627 if (instr->HasOutput()) {
1628 __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
1629 } else {
1630 size_t index = 0;
1631 Operand operand = i.MemoryOperand(&index);
1632 __ Movdqu(operand, i.InputSimd128Register(index));
1633 }
1634 break;
1635 case kIA32BitcastFI:
1636 if (instr->InputAt(0)->IsFPStackSlot()) {
1637 __ mov(i.OutputRegister(), i.InputOperand(0));
1638 } else {
1639 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1640 }
1641 break;
1642 case kIA32BitcastIF:
1643 if (HasRegisterInput(instr, 0)) {
1644 __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1645 } else {
1646 __ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
1647 }
1648 break;
1649 case kIA32Lea: {
1650 AddressingMode mode = AddressingModeField::decode(instr->opcode());
1651 // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
1652 // and addressing mode just happens to work out. The "addl"/"subl" forms
1653 // in these cases are faster based on measurements.
1654 if (mode == kMode_MI) {
1655 __ Move(i.OutputRegister(), Immediate(i.InputInt32(0)));
1656 } else if (i.InputRegister(0) == i.OutputRegister()) {
1657 if (mode == kMode_MRI) {
1658 int32_t constant_summand = i.InputInt32(1);
1659 if (constant_summand > 0) {
1660 __ add(i.OutputRegister(), Immediate(constant_summand));
1661 } else if (constant_summand < 0) {
1662 __ sub(i.OutputRegister(),
1663 Immediate(base::NegateWithWraparound(constant_summand)));
1664 }
1665 } else if (mode == kMode_MR1) {
1666 if (i.InputRegister(1) == i.OutputRegister()) {
1667 __ shl(i.OutputRegister(), 1);
1668 } else {
1669 __ add(i.OutputRegister(), i.InputRegister(1));
1670 }
1671 } else if (mode == kMode_M2) {
1672 __ shl(i.OutputRegister(), 1);
1673 } else if (mode == kMode_M4) {
1674 __ shl(i.OutputRegister(), 2);
1675 } else if (mode == kMode_M8) {
1676 __ shl(i.OutputRegister(), 3);
1677 } else {
1678 __ lea(i.OutputRegister(), i.MemoryOperand());
1679 }
1680 } else if (mode == kMode_MR1 &&
1681 i.InputRegister(1) == i.OutputRegister()) {
1682 __ add(i.OutputRegister(), i.InputRegister(0));
1683 } else {
1684 __ lea(i.OutputRegister(), i.MemoryOperand());
1685 }
1686 break;
1687 }
1688 case kIA32Push: {
1689 int stack_decrement = i.InputInt32(0);
1690 int slots = stack_decrement / kSystemPointerSize;
1691 // Whenever codegen uses push, we need to check if stack_decrement
1692 // contains any extra padding and adjust the stack before the push.
1693 if (HasImmediateInput(instr, 1)) {
1694 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
1695 __ push(i.InputImmediate(1));
1696 } else if (HasAddressingMode(instr)) {
1697 // Only single slot pushes from memory are supported.
1698 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
1699 size_t index = 1;
1700 Operand operand = i.MemoryOperand(&index);
1701 __ push(operand);
1702 } else {
1703 InstructionOperand* input = instr->InputAt(1);
1704 if (input->IsRegister()) {
1705 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
1706 __ push(i.InputRegister(1));
1707 } else if (input->IsFloatRegister()) {
1708 DCHECK_GE(stack_decrement, kFloatSize);
1709 __ AllocateStackSpace(stack_decrement);
1710 __ Movss(Operand(esp, 0), i.InputDoubleRegister(1));
1711 } else if (input->IsDoubleRegister()) {
1712 DCHECK_GE(stack_decrement, kDoubleSize);
1713 __ AllocateStackSpace(stack_decrement);
1714 __ Movsd(Operand(esp, 0), i.InputDoubleRegister(1));
1715 } else if (input->IsSimd128Register()) {
1716 DCHECK_GE(stack_decrement, kSimd128Size);
1717 __ AllocateStackSpace(stack_decrement);
1718 // TODO(bbudge) Use Movaps when slots are aligned.
1719 __ Movups(Operand(esp, 0), i.InputSimd128Register(1));
1720 } else if (input->IsStackSlot() || input->IsFloatStackSlot()) {
1721 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
1722 __ push(i.InputOperand(1));
1723 } else if (input->IsDoubleStackSlot()) {
1724 DCHECK_GE(stack_decrement, kDoubleSize);
1725 __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1726 __ AllocateStackSpace(stack_decrement);
1727 __ Movsd(Operand(esp, 0), kScratchDoubleReg);
1728 } else {
1729 DCHECK(input->IsSimd128StackSlot());
1730 DCHECK_GE(stack_decrement, kSimd128Size);
1731 // TODO(bbudge) Use Movaps when slots are aligned.
1732 __ Movups(kScratchDoubleReg, i.InputOperand(1));
1733 __ AllocateStackSpace(stack_decrement);
1734 __ Movups(Operand(esp, 0), kScratchDoubleReg);
1735 }
1736 }
1737 frame_access_state()->IncreaseSPDelta(slots);
1738 break;
1739 }
1740 case kIA32Poke: {
1741 int slot = MiscField::decode(instr->opcode());
1742 if (HasImmediateInput(instr, 0)) {
1743 __ mov(Operand(esp, slot * kSystemPointerSize), i.InputImmediate(0));
1744 } else {
1745 __ mov(Operand(esp, slot * kSystemPointerSize), i.InputRegister(0));
1746 }
1747 break;
1748 }
1749 case kIA32Peek: {
1750 int reverse_slot = i.InputInt32(0);
1751 int offset =
1752 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1753 if (instr->OutputAt(0)->IsFPRegister()) {
1754 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1755 if (op->representation() == MachineRepresentation::kFloat64) {
1756 __ Movsd(i.OutputDoubleRegister(), Operand(ebp, offset));
1757 } else if (op->representation() == MachineRepresentation::kFloat32) {
1758 __ Movss(i.OutputFloatRegister(), Operand(ebp, offset));
1759 } else {
1760 DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
1761 __ Movdqu(i.OutputSimd128Register(), Operand(ebp, offset));
1762 }
1763 } else {
1764 __ mov(i.OutputRegister(), Operand(ebp, offset));
1765 }
1766 break;
1767 }
1768 case kIA32F64x2Splat: {
1769 __ Movddup(i.OutputSimd128Register(), i.InputDoubleRegister(0));
1770 break;
1771 }
1772 case kIA32F64x2ExtractLane: {
1773 __ F64x2ExtractLane(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1774 i.InputUint8(1));
1775 break;
1776 }
1777 case kIA32F64x2ReplaceLane: {
1778 __ F64x2ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
1779 i.InputDoubleRegister(2), i.InputInt8(1));
1780 break;
1781 }
1782 case kIA32F64x2Sqrt: {
1783 __ Sqrtpd(i.OutputSimd128Register(), i.InputOperand(0));
1784 break;
1785 }
1786 case kIA32F64x2Add: {
1787 __ Addpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1788 i.InputOperand(1));
1789 break;
1790 }
1791 case kIA32F64x2Sub: {
1792 __ Subpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1793 i.InputOperand(1));
1794 break;
1795 }
1796 case kIA32F64x2Mul: {
1797 __ Mulpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1798 i.InputOperand(1));
1799 break;
1800 }
1801 case kIA32F64x2Div: {
1802 __ Divpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1803 i.InputOperand(1));
1804 break;
1805 }
1806 case kIA32F64x2Min: {
1807 __ F64x2Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
1808 i.InputSimd128Register(1), kScratchDoubleReg);
1809 break;
1810 }
1811 case kIA32F64x2Max: {
1812 __ F64x2Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
1813 i.InputSimd128Register(1), kScratchDoubleReg);
1814 break;
1815 }
1816 case kIA32F64x2Eq: {
1817 __ Cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1818 i.InputOperand(1));
1819 break;
1820 }
1821 case kIA32F64x2Ne: {
1822 __ Cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1823 i.InputOperand(1));
1824 break;
1825 }
1826 case kIA32F64x2Lt: {
1827 __ Cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1828 i.InputOperand(1));
1829 break;
1830 }
1831 case kIA32F64x2Le: {
1832 __ Cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1833 i.InputOperand(1));
1834 break;
1835 }
1836 case kIA32F64x2Qfma: {
1837 __ F64x2Qfma(i.OutputSimd128Register(), i.InputSimd128Register(0),
1838 i.InputSimd128Register(1), i.InputSimd128Register(2),
1839 kScratchDoubleReg);
1840 break;
1841 }
1842 case kIA32F64x2Qfms: {
1843 __ F64x2Qfms(i.OutputSimd128Register(), i.InputSimd128Register(0),
1844 i.InputSimd128Register(1), i.InputSimd128Register(2),
1845 kScratchDoubleReg);
1846 break;
1847 }
1848 case kIA32Minpd: {
1849 __ Minpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1850 i.InputSimd128Register(1));
1851 break;
1852 }
1853 case kIA32Maxpd: {
1854 __ Maxpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1855 i.InputSimd128Register(1));
1856 break;
1857 }
1858 case kIA32F64x2Round: {
1859 RoundingMode const mode =
1860 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1861 __ Roundpd(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode);
1862 break;
1863 }
1864 case kIA32F64x2PromoteLowF32x4: {
1865 if (HasAddressingMode(instr)) {
1866 __ Cvtps2pd(i.OutputSimd128Register(), i.MemoryOperand());
1867 } else {
1868 __ Cvtps2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
1869 }
1870 break;
1871 }
1872 case kIA32F32x4DemoteF64x2Zero: {
1873 __ Cvtpd2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
1874 break;
1875 }
1876 case kIA32I32x4TruncSatF64x2SZero: {
1877 __ I32x4TruncSatF64x2SZero(i.OutputSimd128Register(),
1878 i.InputSimd128Register(0), kScratchDoubleReg,
1879 i.TempRegister(0));
1880 break;
1881 }
1882 case kIA32I32x4TruncSatF64x2UZero: {
1883 __ I32x4TruncSatF64x2UZero(i.OutputSimd128Register(),
1884 i.InputSimd128Register(0), kScratchDoubleReg,
1885 i.TempRegister(0));
1886 break;
1887 }
1888 case kIA32F64x2ConvertLowI32x4S: {
1889 __ Cvtdq2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
1890 break;
1891 }
1892 case kIA32F64x2ConvertLowI32x4U: {
1893 __ F64x2ConvertLowI32x4U(i.OutputSimd128Register(),
1894 i.InputSimd128Register(0), i.TempRegister(0));
1895 break;
1896 }
1897 case kIA32I64x2ExtMulLowI32x4S: {
1898 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1899 i.InputSimd128Register(1), kScratchDoubleReg,
1900 /*low=*/true, /*is_signed=*/true);
1901 break;
1902 }
1903 case kIA32I64x2ExtMulHighI32x4S: {
1904 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1905 i.InputSimd128Register(1), kScratchDoubleReg,
1906 /*low=*/false, /*is_signed=*/true);
1907 break;
1908 }
1909 case kIA32I64x2ExtMulLowI32x4U: {
1910 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1911 i.InputSimd128Register(1), kScratchDoubleReg,
1912 /*low=*/true, /*is_signed=*/false);
1913 break;
1914 }
1915 case kIA32I64x2ExtMulHighI32x4U: {
1916 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1917 i.InputSimd128Register(1), kScratchDoubleReg,
1918 /*low=*/false, /*is_signed=*/false);
1919 break;
1920 }
1921 case kIA32I32x4ExtMulLowI16x8S: {
1922 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1923 i.InputSimd128Register(1), kScratchDoubleReg,
1924 /*low=*/true, /*is_signed=*/true);
1925 break;
1926 }
1927 case kIA32I32x4ExtMulHighI16x8S: {
1928 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1929 i.InputSimd128Register(1), kScratchDoubleReg,
1930 /*low=*/false, /*is_signed=*/true);
1931 break;
1932 }
1933 case kIA32I32x4ExtMulLowI16x8U: {
1934 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1935 i.InputSimd128Register(1), kScratchDoubleReg,
1936 /*low=*/true, /*is_signed=*/false);
1937 break;
1938 }
1939 case kIA32I32x4ExtMulHighI16x8U: {
1940 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1941 i.InputSimd128Register(1), kScratchDoubleReg,
1942 /*low=*/false, /*is_signed=*/false);
1943 break;
1944 }
1945 case kIA32I16x8ExtMulLowI8x16S: {
1946 __ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
1947 i.InputSimd128Register(1), kScratchDoubleReg,
1948 /*is_signed=*/true);
1949 break;
1950 }
1951 case kIA32I16x8ExtMulHighI8x16S: {
1952 __ I16x8ExtMulHighS(i.OutputSimd128Register(), i.InputSimd128Register(0),
1953 i.InputSimd128Register(1), kScratchDoubleReg);
1954 break;
1955 }
1956 case kIA32I16x8ExtMulLowI8x16U: {
1957 __ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
1958 i.InputSimd128Register(1), kScratchDoubleReg,
1959 /*is_signed=*/false);
1960 break;
1961 }
1962 case kIA32I16x8ExtMulHighI8x16U: {
1963 __ I16x8ExtMulHighU(i.OutputSimd128Register(), i.InputSimd128Register(0),
1964 i.InputSimd128Register(1), kScratchDoubleReg);
1965 break;
1966 }
1967 case kIA32I64x2SplatI32Pair: {
1968 XMMRegister dst = i.OutputSimd128Register();
1969 __ Pinsrd(dst, i.InputRegister(0), 0);
1970 __ Pinsrd(dst, i.InputOperand(1), 1);
1971 __ Pshufd(dst, dst, uint8_t{0x44});
1972 break;
1973 }
1974 case kIA32I64x2ReplaceLaneI32Pair: {
1975 int8_t lane = i.InputInt8(1);
1976 __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), lane * 2);
1977 __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(3), lane * 2 + 1);
1978 break;
1979 }
1980 case kIA32I64x2Abs: {
1981 __ I64x2Abs(i.OutputSimd128Register(), i.InputSimd128Register(0),
1982 kScratchDoubleReg);
1983 break;
1984 }
1985 case kIA32I64x2Neg: {
1986 __ I64x2Neg(i.OutputSimd128Register(), i.InputSimd128Register(0),
1987 kScratchDoubleReg);
1988 break;
1989 }
1990 case kIA32I64x2Shl: {
1991 ASSEMBLE_SIMD_SHIFT(Psllq, 6);
1992 break;
1993 }
1994 case kIA32I64x2ShrS: {
1995 XMMRegister dst = i.OutputSimd128Register();
1996 XMMRegister src = i.InputSimd128Register(0);
1997 if (HasImmediateInput(instr, 1)) {
1998 __ I64x2ShrS(dst, src, i.InputInt6(1), kScratchDoubleReg);
1999 } else {
2000 __ I64x2ShrS(dst, src, i.InputRegister(1), kScratchDoubleReg,
2001 i.TempSimd128Register(0), i.TempRegister(1));
2002 }
2003 break;
2004 }
2005 case kIA32I64x2Add: {
2006 __ Paddq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2007 i.InputOperand(1));
2008 break;
2009 }
2010 case kIA32I64x2Sub: {
2011 __ Psubq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2012 i.InputOperand(1));
2013 break;
2014 }
2015 case kIA32I64x2Mul: {
2016 __ I64x2Mul(i.OutputSimd128Register(), i.InputSimd128Register(0),
2017 i.InputSimd128Register(1), i.TempSimd128Register(0),
2018 i.TempSimd128Register(1));
2019 break;
2020 }
2021 case kIA32I64x2ShrU: {
2022 ASSEMBLE_SIMD_SHIFT(Psrlq, 6);
2023 break;
2024 }
2025 case kIA32I64x2BitMask: {
2026 __ Movmskpd(i.OutputRegister(), i.InputSimd128Register(0));
2027 break;
2028 }
2029 case kIA32I64x2Eq: {
2030 __ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2031 i.InputOperand(1));
2032 break;
2033 }
2034 case kIA32I64x2Ne: {
2035 __ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2036 i.InputOperand(1));
2037 __ Pcmpeqq(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2038 __ Pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2039 break;
2040 }
2041 case kIA32I64x2GtS: {
2042 __ I64x2GtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2043 i.InputSimd128Register(1), kScratchDoubleReg);
2044 break;
2045 }
2046 case kIA32I64x2GeS: {
2047 __ I64x2GeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2048 i.InputSimd128Register(1), kScratchDoubleReg);
2049 break;
2050 }
2051 case kIA32I64x2SConvertI32x4Low: {
2052 __ Pmovsxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
2053 break;
2054 }
2055 case kIA32I64x2SConvertI32x4High: {
2056 __ I64x2SConvertI32x4High(i.OutputSimd128Register(),
2057 i.InputSimd128Register(0));
2058 break;
2059 }
2060 case kIA32I64x2UConvertI32x4Low: {
2061 __ Pmovzxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
2062 break;
2063 }
2064 case kIA32I64x2UConvertI32x4High: {
2065 __ I64x2UConvertI32x4High(i.OutputSimd128Register(),
2066 i.InputSimd128Register(0), kScratchDoubleReg);
2067 break;
2068 }
2069 case kIA32I32x4ExtAddPairwiseI16x8S: {
2070 __ I32x4ExtAddPairwiseI16x8S(i.OutputSimd128Register(),
2071 i.InputSimd128Register(0),
2072 i.TempRegister(0));
2073 break;
2074 }
2075 case kIA32I32x4ExtAddPairwiseI16x8U: {
2076 __ I32x4ExtAddPairwiseI16x8U(i.OutputSimd128Register(),
2077 i.InputSimd128Register(0),
2078 kScratchDoubleReg);
2079 break;
2080 }
2081 case kIA32I16x8ExtAddPairwiseI8x16S: {
2082 __ I16x8ExtAddPairwiseI8x16S(i.OutputSimd128Register(),
2083 i.InputSimd128Register(0), kScratchDoubleReg,
2084 i.TempRegister(0));
2085 break;
2086 }
2087 case kIA32I16x8ExtAddPairwiseI8x16U: {
2088 __ I16x8ExtAddPairwiseI8x16U(i.OutputSimd128Register(),
2089 i.InputSimd128Register(0),
2090 i.TempRegister(0));
2091 break;
2092 }
2093 case kIA32I16x8Q15MulRSatS: {
2094 __ I16x8Q15MulRSatS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2095 i.InputSimd128Register(1), kScratchDoubleReg);
2096 break;
2097 }
2098 case kIA32F32x4Splat: {
2099 __ F32x4Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0));
2100 break;
2101 }
2102 case kIA32F32x4ExtractLane: {
2103 __ F32x4ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
2104 i.InputUint8(1));
2105 break;
2106 }
2107 case kIA32Insertps: {
2108 if (CpuFeatures::IsSupported(AVX)) {
2109 CpuFeatureScope avx_scope(tasm(), AVX);
2110 __ vinsertps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2111 i.InputOperand(2), i.InputInt8(1) << 4);
2112 } else {
2113 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2114 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2115 __ insertps(i.OutputSimd128Register(), i.InputOperand(2),
2116 i.InputInt8(1) << 4);
2117 }
2118 break;
2119 }
2120 case kIA32F32x4SConvertI32x4: {
2121 __ Cvtdq2ps(i.OutputSimd128Register(), i.InputOperand(0));
2122 break;
2123 }
2124 case kIA32F32x4UConvertI32x4: {
2125 XMMRegister dst = i.OutputSimd128Register();
2126 XMMRegister src = i.InputSimd128Register(0);
2127 __ Pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
2128 __ Pblendw(kScratchDoubleReg, src, uint8_t{0x55}); // get lo 16 bits
2129 __ Psubd(dst, src, kScratchDoubleReg); // get hi 16 bits
2130 __ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
2131 __ Psrld(dst, dst, byte{1}); // divide by 2 to get in unsigned range
2132 __ Cvtdq2ps(dst, dst); // convert hi exactly
2133 __ Addps(dst, dst, dst); // double hi, exactly
2134 __ Addps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
2135 break;
2136 }
2137 case kIA32F32x4Sqrt: {
2138 __ Sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2139 break;
2140 }
2141 case kIA32F32x4RecipApprox: {
2142 __ Rcpps(i.OutputSimd128Register(), i.InputOperand(0));
2143 break;
2144 }
2145 case kIA32F32x4RecipSqrtApprox: {
2146 __ Rsqrtps(i.OutputSimd128Register(), i.InputOperand(0));
2147 break;
2148 }
2149 case kIA32F32x4Add: {
2150 __ Addps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2151 i.InputOperand(1));
2152 break;
2153 };
2154 case kIA32F32x4Sub: {
2155 __ Subps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2156 i.InputOperand(1));
2157 break;
2158 }
2159 case kIA32F32x4Mul: {
2160 __ Mulps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2161 i.InputOperand(1));
2162 break;
2163 }
2164 case kIA32F32x4Div: {
2165 __ Divps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2166 i.InputOperand(1));
2167 break;
2168 }
2169 case kIA32F32x4Min: {
2170 __ F32x4Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
2171 i.InputSimd128Register(1), kScratchDoubleReg);
2172 break;
2173 }
2174 case kIA32F32x4Max: {
2175 __ F32x4Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
2176 i.InputSimd128Register(1), kScratchDoubleReg);
2177 break;
2178 }
2179 case kIA32F32x4Eq: {
2180 __ Cmpeqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2181 i.InputOperand(1));
2182 break;
2183 }
2184 case kIA32F32x4Ne: {
2185 __ Cmpneqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2186 i.InputOperand(1));
2187 break;
2188 }
2189 case kIA32F32x4Lt: {
2190 __ Cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2191 i.InputOperand(1));
2192 break;
2193 }
2194 case kIA32F32x4Le: {
2195 __ Cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2196 i.InputOperand(1));
2197 break;
2198 }
2199 case kIA32F32x4Qfma: {
2200 __ F32x4Qfma(i.OutputSimd128Register(), i.InputSimd128Register(0),
2201 i.InputSimd128Register(1), i.InputSimd128Register(2),
2202 kScratchDoubleReg);
2203 break;
2204 }
2205 case kIA32F32x4Qfms: {
2206 __ F32x4Qfms(i.OutputSimd128Register(), i.InputSimd128Register(0),
2207 i.InputSimd128Register(1), i.InputSimd128Register(2),
2208 kScratchDoubleReg);
2209 break;
2210 }
2211 case kIA32Minps: {
2212 __ Minps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2213 i.InputSimd128Register(1));
2214 break;
2215 }
2216 case kIA32Maxps: {
2217 __ Maxps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2218 i.InputSimd128Register(1));
2219 break;
2220 }
2221 case kIA32F32x4Round: {
2222 RoundingMode const mode =
2223 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
2224 __ Roundps(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode);
2225 break;
2226 }
2227 case kIA32I32x4Splat: {
2228 XMMRegister dst = i.OutputSimd128Register();
2229 __ Movd(dst, i.InputOperand(0));
2230 __ Pshufd(dst, dst, uint8_t{0x0});
2231 break;
2232 }
2233 case kIA32I32x4ExtractLane: {
2234 __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2235 break;
2236 }
2237 case kIA32I32x4SConvertF32x4: {
2238 __ I32x4SConvertF32x4(i.OutputSimd128Register(),
2239 i.InputSimd128Register(0), kScratchDoubleReg,
2240 i.TempRegister(0));
2241 break;
2242 }
2243 case kIA32I32x4SConvertI16x8Low: {
2244 __ Pmovsxwd(i.OutputSimd128Register(), i.InputOperand(0));
2245 break;
2246 }
2247 case kIA32I32x4SConvertI16x8High: {
2248 __ I32x4SConvertI16x8High(i.OutputSimd128Register(),
2249 i.InputSimd128Register(0));
2250 break;
2251 }
2252 case kIA32I32x4Neg: {
2253 XMMRegister dst = i.OutputSimd128Register();
2254 Operand src = i.InputOperand(0);
2255 if (src.is_reg(dst)) {
2256 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2257 __ Psignd(dst, kScratchDoubleReg);
2258 } else {
2259 __ Pxor(dst, dst);
2260 __ Psubd(dst, src);
2261 }
2262 break;
2263 }
2264 case kIA32I32x4Shl: {
2265 ASSEMBLE_SIMD_SHIFT(Pslld, 5);
2266 break;
2267 }
2268 case kIA32I32x4ShrS: {
2269 ASSEMBLE_SIMD_SHIFT(Psrad, 5);
2270 break;
2271 }
2272 case kIA32I32x4Add: {
2273 __ Paddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2274 i.InputOperand(1));
2275 break;
2276 }
2277 case kIA32I32x4Sub: {
2278 __ Psubd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2279 i.InputOperand(1));
2280 break;
2281 }
2282 case kIA32I32x4Mul: {
2283 __ Pmulld(i.OutputSimd128Register(), i.InputSimd128Register(0),
2284 i.InputOperand(1));
2285 break;
2286 }
2287 case kIA32I32x4MinS: {
2288 __ Pminsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2289 i.InputOperand(1));
2290 break;
2291 }
2292 case kIA32I32x4MaxS: {
2293 __ Pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2294 i.InputOperand(1));
2295 break;
2296 }
2297 case kIA32I32x4Eq: {
2298 __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2299 i.InputOperand(1));
2300 break;
2301 }
2302 case kIA32I32x4Ne: {
2303 __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2304 i.InputOperand(1));
2305 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2306 __ Pxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2307 kScratchDoubleReg);
2308 break;
2309 }
2310 case kIA32I32x4GtS: {
2311 __ Pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2312 i.InputOperand(1));
2313 break;
2314 }
2315 case kIA32I32x4GeS: {
2316 XMMRegister dst = i.OutputSimd128Register();
2317 XMMRegister src1 = i.InputSimd128Register(0);
2318 XMMRegister src2 = i.InputSimd128Register(1);
2319 if (CpuFeatures::IsSupported(AVX)) {
2320 CpuFeatureScope avx_scope(tasm(), AVX);
2321 __ vpminsd(kScratchDoubleReg, src1, src2);
2322 __ vpcmpeqd(dst, kScratchDoubleReg, src2);
2323 } else {
2324 DCHECK_EQ(dst, src1);
2325 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2326 __ pminsd(dst, src2);
2327 __ pcmpeqd(dst, src2);
2328 }
2329 break;
2330 }
2331 case kSSEI32x4UConvertF32x4: {
2332 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2333 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2334 XMMRegister dst = i.OutputSimd128Register();
2335 XMMRegister tmp = i.TempSimd128Register(0);
2336 // NAN->0, negative->0
2337 __ xorps(kScratchDoubleReg, kScratchDoubleReg);
2338 __ maxps(dst, kScratchDoubleReg);
2339 // scratch: float representation of max_signed
2340 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2341 __ psrld(kScratchDoubleReg, 1); // 0x7fffffff
2342 __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
2343 // tmp: convert (src-max_signed).
2344 // Positive overflow lanes -> 0x7FFFFFFF
2345 // Negative lanes -> 0
2346 __ movaps(tmp, dst);
2347 __ subps(tmp, kScratchDoubleReg);
2348 __ cmpleps(kScratchDoubleReg, tmp);
2349 __ cvttps2dq(tmp, tmp);
2350 __ xorps(tmp, kScratchDoubleReg);
2351 __ xorps(kScratchDoubleReg, kScratchDoubleReg);
2352 __ pmaxsd(tmp, kScratchDoubleReg);
2353 // convert. Overflow lanes above max_signed will be 0x80000000
2354 __ cvttps2dq(dst, dst);
2355 // Add (src-max_signed) for overflow lanes.
2356 __ paddd(dst, tmp);
2357 break;
2358 }
2359 case kAVXI32x4UConvertF32x4: {
2360 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2361 CpuFeatureScope avx_scope(tasm(), AVX);
2362 XMMRegister dst = i.OutputSimd128Register();
2363 XMMRegister tmp = i.TempSimd128Register(0);
2364 // NAN->0, negative->0
2365 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2366 __ vmaxps(dst, dst, kScratchDoubleReg);
2367 // scratch: float representation of max_signed
2368 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2369 __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1); // 0x7fffffff
2370 __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
2371 // tmp: convert (src-max_signed).
2372 // Positive overflow lanes -> 0x7FFFFFFF
2373 // Negative lanes -> 0
2374 __ vsubps(tmp, dst, kScratchDoubleReg);
2375 __ vcmpleps(kScratchDoubleReg, kScratchDoubleReg, tmp);
2376 __ vcvttps2dq(tmp, tmp);
2377 __ vpxor(tmp, tmp, kScratchDoubleReg);
2378 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2379 __ vpmaxsd(tmp, tmp, kScratchDoubleReg);
2380 // convert. Overflow lanes above max_signed will be 0x80000000
2381 __ vcvttps2dq(dst, dst);
2382 // Add (src-max_signed) for overflow lanes.
2383 __ vpaddd(dst, dst, tmp);
2384 break;
2385 }
2386 case kIA32I32x4UConvertI16x8Low: {
2387 __ Pmovzxwd(i.OutputSimd128Register(), i.InputOperand(0));
2388 break;
2389 }
2390 case kIA32I32x4UConvertI16x8High: {
2391 __ I32x4UConvertI16x8High(i.OutputSimd128Register(),
2392 i.InputSimd128Register(0), kScratchDoubleReg);
2393 break;
2394 }
2395 case kIA32I32x4ShrU: {
2396 ASSEMBLE_SIMD_SHIFT(Psrld, 5);
2397 break;
2398 }
2399 case kIA32I32x4MinU: {
2400 __ Pminud(i.OutputSimd128Register(), i.InputSimd128Register(0),
2401 i.InputOperand(1));
2402 break;
2403 }
2404 case kIA32I32x4MaxU: {
2405 __ Pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(0),
2406 i.InputOperand(1));
2407 break;
2408 }
2409 case kSSEI32x4GtU: {
2410 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2411 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2412 XMMRegister dst = i.OutputSimd128Register();
2413 Operand src = i.InputOperand(1);
2414 __ pmaxud(dst, src);
2415 __ pcmpeqd(dst, src);
2416 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2417 __ xorps(dst, kScratchDoubleReg);
2418 break;
2419 }
2420 case kAVXI32x4GtU: {
2421 CpuFeatureScope avx_scope(tasm(), AVX);
2422 XMMRegister dst = i.OutputSimd128Register();
2423 XMMRegister src1 = i.InputSimd128Register(0);
2424 Operand src2 = i.InputOperand(1);
2425 __ vpmaxud(kScratchDoubleReg, src1, src2);
2426 __ vpcmpeqd(dst, kScratchDoubleReg, src2);
2427 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2428 __ vpxor(dst, dst, kScratchDoubleReg);
2429 break;
2430 }
2431 case kSSEI32x4GeU: {
2432 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2433 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2434 XMMRegister dst = i.OutputSimd128Register();
2435 Operand src = i.InputOperand(1);
2436 __ pminud(dst, src);
2437 __ pcmpeqd(dst, src);
2438 break;
2439 }
2440 case kAVXI32x4GeU: {
2441 CpuFeatureScope avx_scope(tasm(), AVX);
2442 XMMRegister src1 = i.InputSimd128Register(0);
2443 Operand src2 = i.InputOperand(1);
2444 __ vpminud(kScratchDoubleReg, src1, src2);
2445 __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2446 break;
2447 }
2448 case kIA32I32x4Abs: {
2449 __ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2450 break;
2451 }
2452 case kIA32I32x4BitMask: {
2453 __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
2454 break;
2455 }
2456 case kIA32I32x4DotI16x8S: {
2457 __ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2458 i.InputOperand(1));
2459 break;
2460 }
2461 case kIA32I16x8Splat: {
2462 if (instr->InputAt(0)->IsRegister()) {
2463 __ I16x8Splat(i.OutputSimd128Register(), i.InputRegister(0));
2464 } else {
2465 __ I16x8Splat(i.OutputSimd128Register(), i.InputOperand(0));
2466 }
2467 break;
2468 }
2469 case kIA32I16x8ExtractLaneS: {
2470 Register dst = i.OutputRegister();
2471 __ Pextrw(dst, i.InputSimd128Register(0), i.InputUint8(1));
2472 __ movsx_w(dst, dst);
2473 break;
2474 }
2475 case kIA32I16x8SConvertI8x16Low: {
2476 __ Pmovsxbw(i.OutputSimd128Register(), i.InputOperand(0));
2477 break;
2478 }
2479 case kIA32I16x8SConvertI8x16High: {
2480 __ I16x8SConvertI8x16High(i.OutputSimd128Register(),
2481 i.InputSimd128Register(0));
2482 break;
2483 }
2484 case kIA32I16x8Neg: {
2485 XMMRegister dst = i.OutputSimd128Register();
2486 Operand src = i.InputOperand(0);
2487 if (src.is_reg(dst)) {
2488 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2489 __ Psignw(dst, kScratchDoubleReg);
2490 } else {
2491 __ Pxor(dst, dst);
2492 __ Psubw(dst, src);
2493 }
2494 break;
2495 }
2496 case kIA32I16x8Shl: {
2497 ASSEMBLE_SIMD_SHIFT(Psllw, 4);
2498 break;
2499 }
2500 case kIA32I16x8ShrS: {
2501 ASSEMBLE_SIMD_SHIFT(Psraw, 4);
2502 break;
2503 }
2504 case kIA32I16x8SConvertI32x4: {
2505 __ Packssdw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2506 i.InputOperand(1));
2507 break;
2508 }
2509 case kIA32I16x8Add: {
2510 __ Paddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2511 i.InputOperand(1));
2512 break;
2513 }
2514 case kIA32I16x8AddSatS: {
2515 __ Paddsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2516 i.InputOperand(1));
2517 break;
2518 }
2519 case kIA32I16x8Sub: {
2520 __ Psubw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2521 i.InputOperand(1));
2522 break;
2523 }
2524 case kIA32I16x8SubSatS: {
2525 __ Psubsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2526 i.InputOperand(1));
2527 break;
2528 }
2529 case kIA32I16x8Mul: {
2530 __ Pmullw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2531 i.InputOperand(1));
2532 break;
2533 }
2534 case kIA32I16x8MinS: {
2535 __ Pminsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2536 i.InputOperand(1));
2537 break;
2538 }
2539 case kIA32I16x8MaxS: {
2540 __ Pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2541 i.InputOperand(1));
2542 break;
2543 }
2544 case kIA32I16x8Eq: {
2545 __ Pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2546 i.InputOperand(1));
2547 break;
2548 }
2549 case kSSEI16x8Ne: {
2550 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2551 __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1));
2552 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2553 __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2554 break;
2555 }
2556 case kAVXI16x8Ne: {
2557 CpuFeatureScope avx_scope(tasm(), AVX);
2558 __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2559 i.InputOperand(1));
2560 __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2561 __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2562 kScratchDoubleReg);
2563 break;
2564 }
2565 case kIA32I16x8GtS: {
2566 __ Pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2567 i.InputOperand(1));
2568 break;
2569 }
2570 case kSSEI16x8GeS: {
2571 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2572 XMMRegister dst = i.OutputSimd128Register();
2573 Operand src = i.InputOperand(1);
2574 __ pminsw(dst, src);
2575 __ pcmpeqw(dst, src);
2576 break;
2577 }
2578 case kAVXI16x8GeS: {
2579 CpuFeatureScope avx_scope(tasm(), AVX);
2580 XMMRegister src1 = i.InputSimd128Register(0);
2581 Operand src2 = i.InputOperand(1);
2582 __ vpminsw(kScratchDoubleReg, src1, src2);
2583 __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2584 break;
2585 }
2586 case kIA32I16x8UConvertI8x16Low: {
2587 __ Pmovzxbw(i.OutputSimd128Register(), i.InputOperand(0));
2588 break;
2589 }
2590 case kIA32I16x8UConvertI8x16High: {
2591 __ I16x8UConvertI8x16High(i.OutputSimd128Register(),
2592 i.InputSimd128Register(0), kScratchDoubleReg);
2593 break;
2594 }
2595 case kIA32I16x8ShrU: {
2596 ASSEMBLE_SIMD_SHIFT(Psrlw, 4);
2597 break;
2598 }
2599 case kIA32I16x8UConvertI32x4: {
2600 __ Packusdw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2601 i.InputSimd128Register(1));
2602 break;
2603 }
2604 case kIA32I16x8AddSatU: {
2605 __ Paddusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2606 i.InputOperand(1));
2607 break;
2608 }
2609 case kIA32I16x8SubSatU: {
2610 __ Psubusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2611 i.InputOperand(1));
2612 break;
2613 }
2614 case kIA32I16x8MinU: {
2615 __ Pminuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2616 i.InputOperand(1));
2617 break;
2618 }
2619 case kIA32I16x8MaxU: {
2620 __ Pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2621 i.InputOperand(1));
2622 break;
2623 }
2624 case kSSEI16x8GtU: {
2625 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2626 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2627 XMMRegister dst = i.OutputSimd128Register();
2628 Operand src = i.InputOperand(1);
2629 __ pmaxuw(dst, src);
2630 __ pcmpeqw(dst, src);
2631 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2632 __ xorps(dst, kScratchDoubleReg);
2633 break;
2634 }
2635 case kAVXI16x8GtU: {
2636 CpuFeatureScope avx_scope(tasm(), AVX);
2637 XMMRegister dst = i.OutputSimd128Register();
2638 XMMRegister src1 = i.InputSimd128Register(0);
2639 Operand src2 = i.InputOperand(1);
2640 __ vpmaxuw(kScratchDoubleReg, src1, src2);
2641 __ vpcmpeqw(dst, kScratchDoubleReg, src2);
2642 __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2643 __ vpxor(dst, dst, kScratchDoubleReg);
2644 break;
2645 }
2646 case kSSEI16x8GeU: {
2647 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2648 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2649 XMMRegister dst = i.OutputSimd128Register();
2650 Operand src = i.InputOperand(1);
2651 __ pminuw(dst, src);
2652 __ pcmpeqw(dst, src);
2653 break;
2654 }
2655 case kAVXI16x8GeU: {
2656 CpuFeatureScope avx_scope(tasm(), AVX);
2657 XMMRegister src1 = i.InputSimd128Register(0);
2658 Operand src2 = i.InputOperand(1);
2659 __ vpminuw(kScratchDoubleReg, src1, src2);
2660 __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2661 break;
2662 }
2663 case kIA32I16x8RoundingAverageU: {
2664 __ Pavgw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2665 i.InputOperand(1));
2666 break;
2667 }
2668 case kIA32I16x8Abs: {
2669 __ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2670 break;
2671 }
2672 case kIA32I16x8BitMask: {
2673 Register dst = i.OutputRegister();
2674 XMMRegister tmp = i.TempSimd128Register(0);
2675 __ Packsswb(tmp, i.InputSimd128Register(0));
2676 __ Pmovmskb(dst, tmp);
2677 __ shr(dst, 8);
2678 break;
2679 }
2680 case kIA32I8x16Splat: {
2681 if (instr->InputAt(0)->IsRegister()) {
2682 __ I8x16Splat(i.OutputSimd128Register(), i.InputRegister(0),
2683 kScratchDoubleReg);
2684 } else {
2685 __ I8x16Splat(i.OutputSimd128Register(), i.InputOperand(0),
2686 kScratchDoubleReg);
2687 }
2688 break;
2689 }
2690 case kIA32I8x16ExtractLaneS: {
2691 Register dst = i.OutputRegister();
2692 __ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
2693 __ movsx_b(dst, dst);
2694 break;
2695 }
2696 case kIA32Pinsrb: {
2697 ASSEMBLE_SIMD_PINSR(pinsrb, SSE4_1);
2698 break;
2699 }
2700 case kIA32Pinsrw: {
2701 ASSEMBLE_SIMD_PINSR(pinsrw, SSE4_1);
2702 break;
2703 }
2704 case kIA32Pinsrd: {
2705 ASSEMBLE_SIMD_PINSR(pinsrd, SSE4_1);
2706 break;
2707 }
2708 case kIA32Movlps: {
2709 if (instr->HasOutput()) {
2710 __ Movlps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2711 i.MemoryOperand(2));
2712 } else {
2713 size_t index = 0;
2714 Operand dst = i.MemoryOperand(&index);
2715 __ Movlps(dst, i.InputSimd128Register(index));
2716 }
2717 break;
2718 }
2719 case kIA32Movhps: {
2720 if (instr->HasOutput()) {
2721 __ Movhps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2722 i.MemoryOperand(2));
2723 } else {
2724 size_t index = 0;
2725 Operand dst = i.MemoryOperand(&index);
2726 __ Movhps(dst, i.InputSimd128Register(index));
2727 }
2728 break;
2729 }
2730 case kIA32Pextrb: {
2731 if (HasAddressingMode(instr)) {
2732 size_t index = 0;
2733 Operand operand = i.MemoryOperand(&index);
2734 __ Pextrb(operand, i.InputSimd128Register(index),
2735 i.InputUint8(index + 1));
2736 } else {
2737 Register dst = i.OutputRegister();
2738 __ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
2739 }
2740 break;
2741 }
2742 case kIA32Pextrw: {
2743 if (HasAddressingMode(instr)) {
2744 size_t index = 0;
2745 Operand operand = i.MemoryOperand(&index);
2746 __ Pextrw(operand, i.InputSimd128Register(index),
2747 i.InputUint8(index + 1));
2748 } else {
2749 Register dst = i.OutputRegister();
2750 __ Pextrw(dst, i.InputSimd128Register(0), i.InputUint8(1));
2751 }
2752 break;
2753 }
2754 case kIA32S128Store32Lane: {
2755 size_t index = 0;
2756 Operand operand = i.MemoryOperand(&index);
2757 uint8_t laneidx = i.InputUint8(index + 1);
2758 __ S128Store32Lane(operand, i.InputSimd128Register(index), laneidx);
2759 break;
2760 }
2761 case kIA32I8x16SConvertI16x8: {
2762 __ Packsswb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2763 i.InputOperand(1));
2764 break;
2765 }
2766 case kIA32I8x16Neg: {
2767 XMMRegister dst = i.OutputSimd128Register();
2768 Operand src = i.InputOperand(0);
2769 if (src.is_reg(dst)) {
2770 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2771 __ Psignb(dst, kScratchDoubleReg);
2772 } else {
2773 __ Pxor(dst, dst);
2774 __ Psubb(dst, src);
2775 }
2776 break;
2777 }
2778 case kIA32I8x16Shl: {
2779 XMMRegister dst = i.OutputSimd128Register();
2780 XMMRegister src = i.InputSimd128Register(0);
2781 DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
2782 Register tmp = i.TempRegister(0);
2783
2784 if (HasImmediateInput(instr, 1)) {
2785 __ I8x16Shl(dst, src, i.InputInt3(1), tmp, kScratchDoubleReg);
2786 } else {
2787 XMMRegister tmp_simd = i.TempSimd128Register(1);
2788 __ I8x16Shl(dst, src, i.InputRegister(1), tmp, kScratchDoubleReg,
2789 tmp_simd);
2790 }
2791 break;
2792 }
2793 case kIA32I8x16ShrS: {
2794 XMMRegister dst = i.OutputSimd128Register();
2795 XMMRegister src = i.InputSimd128Register(0);
2796 DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
2797
2798 if (HasImmediateInput(instr, 1)) {
2799 __ I8x16ShrS(dst, src, i.InputInt3(1), kScratchDoubleReg);
2800 } else {
2801 __ I8x16ShrS(dst, src, i.InputRegister(1), i.TempRegister(0),
2802 kScratchDoubleReg, i.TempSimd128Register(1));
2803 }
2804 break;
2805 }
2806 case kIA32I8x16Add: {
2807 __ Paddb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2808 i.InputOperand(1));
2809 break;
2810 }
2811 case kIA32I8x16AddSatS: {
2812 __ Paddsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2813 i.InputOperand(1));
2814 break;
2815 }
2816 case kIA32I8x16Sub: {
2817 __ Psubb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2818 i.InputOperand(1));
2819 break;
2820 }
2821 case kIA32I8x16SubSatS: {
2822 __ Psubsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2823 i.InputOperand(1));
2824 break;
2825 }
2826 case kIA32I8x16MinS: {
2827 __ Pminsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2828 i.InputOperand(1));
2829 break;
2830 }
2831 case kIA32I8x16MaxS: {
2832 __ Pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2833 i.InputOperand(1));
2834 break;
2835 }
2836 case kIA32I8x16Eq: {
2837 __ Pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2838 i.InputOperand(1));
2839 break;
2840 }
2841 case kSSEI8x16Ne: {
2842 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2843 __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
2844 __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2845 __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2846 break;
2847 }
2848 case kAVXI8x16Ne: {
2849 CpuFeatureScope avx_scope(tasm(), AVX);
2850 __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2851 i.InputOperand(1));
2852 __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2853 __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2854 kScratchDoubleReg);
2855 break;
2856 }
2857 case kIA32I8x16GtS: {
2858 __ Pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2859 i.InputOperand(1));
2860 break;
2861 }
2862 case kSSEI8x16GeS: {
2863 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2864 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2865 XMMRegister dst = i.OutputSimd128Register();
2866 Operand src = i.InputOperand(1);
2867 __ pminsb(dst, src);
2868 __ pcmpeqb(dst, src);
2869 break;
2870 }
2871 case kAVXI8x16GeS: {
2872 CpuFeatureScope avx_scope(tasm(), AVX);
2873 XMMRegister src1 = i.InputSimd128Register(0);
2874 Operand src2 = i.InputOperand(1);
2875 __ vpminsb(kScratchDoubleReg, src1, src2);
2876 __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2877 break;
2878 }
2879 case kIA32I8x16UConvertI16x8: {
2880 __ Packuswb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2881 i.InputSimd128Register(1));
2882 break;
2883 }
2884 case kIA32I8x16AddSatU: {
2885 __ Paddusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2886 i.InputOperand(1));
2887 break;
2888 }
2889 case kIA32I8x16SubSatU: {
2890 __ Psubusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2891 i.InputOperand(1));
2892 break;
2893 }
2894 case kIA32I8x16ShrU: {
2895 XMMRegister dst = i.OutputSimd128Register();
2896 XMMRegister src = i.InputSimd128Register(0);
2897 DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
2898 Register tmp = i.TempRegister(0);
2899
2900 if (HasImmediateInput(instr, 1)) {
2901 __ I8x16ShrU(dst, src, i.InputInt3(1), tmp, kScratchDoubleReg);
2902 } else {
2903 __ I8x16ShrU(dst, src, i.InputRegister(1), tmp, kScratchDoubleReg,
2904 i.TempSimd128Register(1));
2905 }
2906
2907 break;
2908 }
2909 case kIA32I8x16MinU: {
2910 __ Pminub(i.OutputSimd128Register(), i.InputSimd128Register(0),
2911 i.InputOperand(1));
2912 break;
2913 }
2914 case kIA32I8x16MaxU: {
2915 __ Pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0),
2916 i.InputOperand(1));
2917 break;
2918 }
2919 case kSSEI8x16GtU: {
2920 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2921 XMMRegister dst = i.OutputSimd128Register();
2922 Operand src = i.InputOperand(1);
2923 __ pmaxub(dst, src);
2924 __ pcmpeqb(dst, src);
2925 __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2926 __ xorps(dst, kScratchDoubleReg);
2927 break;
2928 }
2929 case kAVXI8x16GtU: {
2930 CpuFeatureScope avx_scope(tasm(), AVX);
2931 XMMRegister dst = i.OutputSimd128Register();
2932 XMMRegister src1 = i.InputSimd128Register(0);
2933 Operand src2 = i.InputOperand(1);
2934 __ vpmaxub(kScratchDoubleReg, src1, src2);
2935 __ vpcmpeqb(dst, kScratchDoubleReg, src2);
2936 __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2937 __ vpxor(dst, dst, kScratchDoubleReg);
2938 break;
2939 }
2940 case kSSEI8x16GeU: {
2941 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2942 XMMRegister dst = i.OutputSimd128Register();
2943 Operand src = i.InputOperand(1);
2944 __ pminub(dst, src);
2945 __ pcmpeqb(dst, src);
2946 break;
2947 }
2948 case kAVXI8x16GeU: {
2949 CpuFeatureScope avx_scope(tasm(), AVX);
2950 XMMRegister src1 = i.InputSimd128Register(0);
2951 Operand src2 = i.InputOperand(1);
2952 __ vpminub(kScratchDoubleReg, src1, src2);
2953 __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2954 break;
2955 }
2956 case kIA32I8x16RoundingAverageU: {
2957 __ Pavgb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2958 i.InputOperand(1));
2959 break;
2960 }
2961 case kIA32I8x16Abs: {
2962 __ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
2963 break;
2964 }
2965 case kIA32I8x16BitMask: {
2966 __ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0));
2967 break;
2968 }
2969 case kIA32I8x16Popcnt: {
2970 __ I8x16Popcnt(i.OutputSimd128Register(), i.InputSimd128Register(0),
2971 kScratchDoubleReg, i.TempSimd128Register(0),
2972 i.TempRegister(1));
2973 break;
2974 }
2975 case kIA32S128Const: {
2976 XMMRegister dst = i.OutputSimd128Register();
2977 Register tmp = i.TempRegister(0);
2978 uint64_t low_qword = make_uint64(i.InputUint32(1), i.InputUint32(0));
2979 __ Move(dst, low_qword);
2980 __ Move(tmp, Immediate(i.InputUint32(2)));
2981 __ Pinsrd(dst, tmp, 2);
2982 __ Move(tmp, Immediate(i.InputUint32(3)));
2983 __ Pinsrd(dst, tmp, 3);
2984 break;
2985 }
2986 case kIA32S128Zero: {
2987 XMMRegister dst = i.OutputSimd128Register();
2988 __ Pxor(dst, dst);
2989 break;
2990 }
2991 case kIA32S128AllOnes: {
2992 XMMRegister dst = i.OutputSimd128Register();
2993 __ Pcmpeqd(dst, dst);
2994 break;
2995 }
2996 case kIA32S128Not: {
2997 __ S128Not(i.OutputSimd128Register(), i.InputSimd128Register(0),
2998 kScratchDoubleReg);
2999 break;
3000 }
3001 case kIA32S128And: {
3002 __ Pand(i.OutputSimd128Register(), i.InputSimd128Register(0),
3003 i.InputOperand(1));
3004 break;
3005 }
3006 case kIA32S128Or: {
3007 __ Por(i.OutputSimd128Register(), i.InputSimd128Register(0),
3008 i.InputOperand(1));
3009 break;
3010 }
3011 case kIA32S128Xor: {
3012 __ Pxor(i.OutputSimd128Register(), i.InputSimd128Register(0),
3013 i.InputOperand(1));
3014 break;
3015 }
3016 case kIA32S128Select: {
3017 __ S128Select(i.OutputSimd128Register(), i.InputSimd128Register(0),
3018 i.InputSimd128Register(1), i.InputSimd128Register(2),
3019 kScratchDoubleReg);
3020 break;
3021 }
3022 case kIA32S128AndNot: {
3023 // The inputs have been inverted by instruction selector, so we can call
3024 // andnps here without any modifications.
3025 __ Andnps(i.OutputSimd128Register(), i.InputSimd128Register(0),
3026 i.InputSimd128Register(1));
3027 break;
3028 }
3029 case kIA32I8x16Swizzle: {
3030 __ I8x16Swizzle(i.OutputSimd128Register(), i.InputSimd128Register(0),
3031 i.InputSimd128Register(1), kScratchDoubleReg,
3032 i.TempRegister(0), MiscField::decode(instr->opcode()));
3033 break;
3034 }
3035 case kIA32I8x16Shuffle: {
3036 XMMRegister dst = i.OutputSimd128Register();
3037 Operand src0 = i.InputOperand(0);
3038 Register tmp = i.TempRegister(0);
3039 // Prepare 16 byte aligned buffer for shuffle control mask
3040 __ mov(tmp, esp);
3041 __ and_(esp, -16);
3042 if (instr->InputCount() == 5) { // only one input operand
3043 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3044 for (int j = 4; j > 0; j--) {
3045 uint32_t mask = i.InputUint32(j);
3046 __ push(Immediate(mask));
3047 }
3048 __ Pshufb(dst, Operand(esp, 0));
3049 } else { // two input operands
3050 DCHECK_EQ(6, instr->InputCount());
3051 __ Movups(kScratchDoubleReg, src0);
3052 for (int j = 5; j > 1; j--) {
3053 uint32_t lanes = i.InputUint32(j);
3054 uint32_t mask = 0;
3055 for (int k = 0; k < 32; k += 8) {
3056 uint8_t lane = lanes >> k;
3057 mask |= (lane < kSimd128Size ? lane : 0x80) << k;
3058 }
3059 __ push(Immediate(mask));
3060 }
3061 __ Pshufb(kScratchDoubleReg, Operand(esp, 0));
3062 Operand src1 = i.InputOperand(1);
3063 if (!src1.is_reg(dst)) __ Movups(dst, src1);
3064 for (int j = 5; j > 1; j--) {
3065 uint32_t lanes = i.InputUint32(j);
3066 uint32_t mask = 0;
3067 for (int k = 0; k < 32; k += 8) {
3068 uint8_t lane = lanes >> k;
3069 mask |= (lane >= kSimd128Size ? (lane & 0xF) : 0x80) << k;
3070 }
3071 __ push(Immediate(mask));
3072 }
3073 __ Pshufb(dst, Operand(esp, 0));
3074 __ por(dst, kScratchDoubleReg);
3075 }
3076 __ mov(esp, tmp);
3077 break;
3078 }
3079 case kIA32S128Load8Splat: {
3080 __ S128Load8Splat(i.OutputSimd128Register(), i.MemoryOperand(),
3081 kScratchDoubleReg);
3082 break;
3083 }
3084 case kIA32S128Load16Splat: {
3085 __ S128Load16Splat(i.OutputSimd128Register(), i.MemoryOperand(),
3086 kScratchDoubleReg);
3087 break;
3088 }
3089 case kIA32S128Load32Splat: {
3090 __ S128Load32Splat(i.OutputSimd128Register(), i.MemoryOperand());
3091 break;
3092 }
3093 case kIA32S128Load64Splat: {
3094 __ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
3095 break;
3096 }
3097 case kIA32S128Load8x8S: {
3098 __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
3099 break;
3100 }
3101 case kIA32S128Load8x8U: {
3102 __ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
3103 break;
3104 }
3105 case kIA32S128Load16x4S: {
3106 __ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
3107 break;
3108 }
3109 case kIA32S128Load16x4U: {
3110 __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
3111 break;
3112 }
3113 case kIA32S128Load32x2S: {
3114 __ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
3115 break;
3116 }
3117 case kIA32S128Load32x2U: {
3118 __ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
3119 break;
3120 }
3121 case kIA32S32x4Rotate: {
3122 XMMRegister dst = i.OutputSimd128Register();
3123 XMMRegister src = i.InputSimd128Register(0);
3124 uint8_t mask = i.InputUint8(1);
3125 if (dst == src) {
3126 // 1-byte shorter encoding than pshufd.
3127 __ Shufps(dst, src, src, mask);
3128 } else {
3129 __ Pshufd(dst, src, mask);
3130 }
3131 break;
3132 }
3133 case kIA32S32x4Swizzle: {
3134 DCHECK_EQ(2, instr->InputCount());
3135 __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputUint8(1));
3136 break;
3137 }
3138 case kIA32S32x4Shuffle: {
3139 DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
3140 uint8_t shuffle = i.InputUint8(2);
3141 DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
3142 __ Pshufd(kScratchDoubleReg, i.InputOperand(1), shuffle);
3143 __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), shuffle);
3144 __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputUint8(3));
3145 break;
3146 }
3147 case kIA32S16x8Blend:
3148 ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
3149 break;
3150 case kIA32S16x8HalfShuffle1: {
3151 XMMRegister dst = i.OutputSimd128Register();
3152 __ Pshuflw(dst, i.InputOperand(0), i.InputUint8(1));
3153 __ Pshufhw(dst, dst, i.InputUint8(2));
3154 break;
3155 }
3156 case kIA32S16x8HalfShuffle2: {
3157 XMMRegister dst = i.OutputSimd128Register();
3158 __ Pshuflw(kScratchDoubleReg, i.InputOperand(1), i.InputUint8(2));
3159 __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputUint8(3));
3160 __ Pshuflw(dst, i.InputOperand(0), i.InputUint8(2));
3161 __ Pshufhw(dst, dst, i.InputUint8(3));
3162 __ Pblendw(dst, kScratchDoubleReg, i.InputUint8(4));
3163 break;
3164 }
3165 case kIA32S8x16Alignr:
3166 ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
3167 break;
3168 case kIA32S16x8Dup: {
3169 XMMRegister dst = i.OutputSimd128Register();
3170 Operand src = i.InputOperand(0);
3171 uint8_t lane = i.InputUint8(1) & 0x7;
3172 uint8_t lane4 = lane & 0x3;
3173 uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3174 if (lane < 4) {
3175 __ Pshuflw(dst, src, half_dup);
3176 __ Punpcklqdq(dst, dst);
3177 } else {
3178 __ Pshufhw(dst, src, half_dup);
3179 __ Punpckhqdq(dst, dst);
3180 }
3181 break;
3182 }
3183 case kIA32S8x16Dup: {
3184 XMMRegister dst = i.OutputSimd128Register();
3185 XMMRegister src = i.InputSimd128Register(0);
3186 uint8_t lane = i.InputUint8(1) & 0xf;
3187 if (CpuFeatures::IsSupported(AVX)) {
3188 CpuFeatureScope avx_scope(tasm(), AVX);
3189 if (lane < 8) {
3190 __ vpunpcklbw(dst, src, src);
3191 } else {
3192 __ vpunpckhbw(dst, src, src);
3193 }
3194 } else {
3195 DCHECK_EQ(dst, src);
3196 if (lane < 8) {
3197 __ punpcklbw(dst, dst);
3198 } else {
3199 __ punpckhbw(dst, dst);
3200 }
3201 }
3202 lane &= 0x7;
3203 uint8_t lane4 = lane & 0x3;
3204 uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3205 if (lane < 4) {
3206 __ Pshuflw(dst, dst, half_dup);
3207 __ Punpcklqdq(dst, dst);
3208 } else {
3209 __ Pshufhw(dst, dst, half_dup);
3210 __ Punpckhqdq(dst, dst);
3211 }
3212 break;
3213 }
3214 case kIA32S64x2UnpackHigh:
3215 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
3216 break;
3217 case kIA32S32x4UnpackHigh:
3218 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
3219 break;
3220 case kIA32S16x8UnpackHigh:
3221 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
3222 break;
3223 case kIA32S8x16UnpackHigh:
3224 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
3225 break;
3226 case kIA32S64x2UnpackLow:
3227 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
3228 break;
3229 case kIA32S32x4UnpackLow:
3230 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
3231 break;
3232 case kIA32S16x8UnpackLow:
3233 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
3234 break;
3235 case kIA32S8x16UnpackLow:
3236 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
3237 break;
3238 case kSSES16x8UnzipHigh: {
3239 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3240 XMMRegister dst = i.OutputSimd128Register();
3241 XMMRegister src2 = dst;
3242 DCHECK_EQ(dst, i.InputSimd128Register(0));
3243 if (instr->InputCount() == 2) {
3244 __ movups(kScratchDoubleReg, i.InputOperand(1));
3245 __ psrld(kScratchDoubleReg, 16);
3246 src2 = kScratchDoubleReg;
3247 }
3248 __ psrld(dst, 16);
3249 __ packusdw(dst, src2);
3250 break;
3251 }
3252 case kAVXS16x8UnzipHigh: {
3253 CpuFeatureScope avx_scope(tasm(), AVX);
3254 XMMRegister dst = i.OutputSimd128Register();
3255 XMMRegister src2 = dst;
3256 if (instr->InputCount() == 2) {
3257 __ vpsrld(kScratchDoubleReg, i.InputSimd128Register(1), 16);
3258 src2 = kScratchDoubleReg;
3259 }
3260 __ vpsrld(dst, i.InputSimd128Register(0), 16);
3261 __ vpackusdw(dst, dst, src2);
3262 break;
3263 }
3264 case kSSES16x8UnzipLow: {
3265 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3266 XMMRegister dst = i.OutputSimd128Register();
3267 XMMRegister src2 = dst;
3268 DCHECK_EQ(dst, i.InputSimd128Register(0));
3269 __ xorps(kScratchDoubleReg, kScratchDoubleReg);
3270 if (instr->InputCount() == 2) {
3271 __ pblendw(kScratchDoubleReg, i.InputOperand(1), 0x55);
3272 src2 = kScratchDoubleReg;
3273 }
3274 __ pblendw(dst, kScratchDoubleReg, 0xaa);
3275 __ packusdw(dst, src2);
3276 break;
3277 }
3278 case kAVXS16x8UnzipLow: {
3279 CpuFeatureScope avx_scope(tasm(), AVX);
3280 XMMRegister dst = i.OutputSimd128Register();
3281 XMMRegister src2 = dst;
3282 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3283 if (instr->InputCount() == 2) {
3284 __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1),
3285 0x55);
3286 src2 = kScratchDoubleReg;
3287 }
3288 __ vpblendw(dst, kScratchDoubleReg, i.InputSimd128Register(0), 0x55);
3289 __ vpackusdw(dst, dst, src2);
3290 break;
3291 }
3292 case kSSES8x16UnzipHigh: {
3293 XMMRegister dst = i.OutputSimd128Register();
3294 XMMRegister src2 = dst;
3295 DCHECK_EQ(dst, i.InputSimd128Register(0));
3296 if (instr->InputCount() == 2) {
3297 __ movups(kScratchDoubleReg, i.InputOperand(1));
3298 __ psrlw(kScratchDoubleReg, 8);
3299 src2 = kScratchDoubleReg;
3300 }
3301 __ psrlw(dst, 8);
3302 __ packuswb(dst, src2);
3303 break;
3304 }
3305 case kAVXS8x16UnzipHigh: {
3306 CpuFeatureScope avx_scope(tasm(), AVX);
3307 XMMRegister dst = i.OutputSimd128Register();
3308 XMMRegister src2 = dst;
3309 if (instr->InputCount() == 2) {
3310 __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3311 src2 = kScratchDoubleReg;
3312 }
3313 __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3314 __ vpackuswb(dst, dst, src2);
3315 break;
3316 }
3317 case kSSES8x16UnzipLow: {
3318 XMMRegister dst = i.OutputSimd128Register();
3319 XMMRegister src2 = dst;
3320 DCHECK_EQ(dst, i.InputSimd128Register(0));
3321 if (instr->InputCount() == 2) {
3322 __ movups(kScratchDoubleReg, i.InputOperand(1));
3323 __ psllw(kScratchDoubleReg, 8);
3324 __ psrlw(kScratchDoubleReg, 8);
3325 src2 = kScratchDoubleReg;
3326 }
3327 __ psllw(dst, 8);
3328 __ psrlw(dst, 8);
3329 __ packuswb(dst, src2);
3330 break;
3331 }
3332 case kAVXS8x16UnzipLow: {
3333 CpuFeatureScope avx_scope(tasm(), AVX);
3334 XMMRegister dst = i.OutputSimd128Register();
3335 XMMRegister src2 = dst;
3336 if (instr->InputCount() == 2) {
3337 __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3338 __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8);
3339 src2 = kScratchDoubleReg;
3340 }
3341 __ vpsllw(dst, i.InputSimd128Register(0), 8);
3342 __ vpsrlw(dst, dst, 8);
3343 __ vpackuswb(dst, dst, src2);
3344 break;
3345 }
3346 case kSSES8x16TransposeLow: {
3347 XMMRegister dst = i.OutputSimd128Register();
3348 DCHECK_EQ(dst, i.InputSimd128Register(0));
3349 __ psllw(dst, 8);
3350 if (instr->InputCount() == 1) {
3351 __ movups(kScratchDoubleReg, dst);
3352 } else {
3353 DCHECK_EQ(2, instr->InputCount());
3354 __ movups(kScratchDoubleReg, i.InputOperand(1));
3355 __ psllw(kScratchDoubleReg, 8);
3356 }
3357 __ psrlw(dst, 8);
3358 __ orps(dst, kScratchDoubleReg);
3359 break;
3360 }
3361 case kAVXS8x16TransposeLow: {
3362 CpuFeatureScope avx_scope(tasm(), AVX);
3363 XMMRegister dst = i.OutputSimd128Register();
3364 if (instr->InputCount() == 1) {
3365 __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(0), 8);
3366 __ vpsrlw(dst, kScratchDoubleReg, 8);
3367 } else {
3368 DCHECK_EQ(2, instr->InputCount());
3369 __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3370 __ vpsllw(dst, i.InputSimd128Register(0), 8);
3371 __ vpsrlw(dst, dst, 8);
3372 }
3373 __ vpor(dst, dst, kScratchDoubleReg);
3374 break;
3375 }
3376 case kSSES8x16TransposeHigh: {
3377 XMMRegister dst = i.OutputSimd128Register();
3378 DCHECK_EQ(dst, i.InputSimd128Register(0));
3379 __ psrlw(dst, 8);
3380 if (instr->InputCount() == 1) {
3381 __ movups(kScratchDoubleReg, dst);
3382 } else {
3383 DCHECK_EQ(2, instr->InputCount());
3384 __ movups(kScratchDoubleReg, i.InputOperand(1));
3385 __ psrlw(kScratchDoubleReg, 8);
3386 }
3387 __ psllw(kScratchDoubleReg, 8);
3388 __ orps(dst, kScratchDoubleReg);
3389 break;
3390 }
3391 case kAVXS8x16TransposeHigh: {
3392 CpuFeatureScope avx_scope(tasm(), AVX);
3393 XMMRegister dst = i.OutputSimd128Register();
3394 if (instr->InputCount() == 1) {
3395 __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3396 __ vpsllw(kScratchDoubleReg, dst, 8);
3397 } else {
3398 DCHECK_EQ(2, instr->InputCount());
3399 __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3400 __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3401 __ vpsllw(kScratchDoubleReg, kScratchDoubleReg, 8);
3402 }
3403 __ vpor(dst, dst, kScratchDoubleReg);
3404 break;
3405 }
3406 case kSSES8x8Reverse:
3407 case kSSES8x4Reverse:
3408 case kSSES8x2Reverse: {
3409 DCHECK_EQ(1, instr->InputCount());
3410 XMMRegister dst = i.OutputSimd128Register();
3411 DCHECK_EQ(dst, i.InputSimd128Register(0));
3412 if (arch_opcode != kSSES8x2Reverse) {
3413 // First shuffle words into position.
3414 int8_t shuffle_mask = arch_opcode == kSSES8x4Reverse ? 0xB1 : 0x1B;
3415 __ pshuflw(dst, dst, shuffle_mask);
3416 __ pshufhw(dst, dst, shuffle_mask);
3417 }
3418 __ movaps(kScratchDoubleReg, dst);
3419 __ psrlw(kScratchDoubleReg, 8);
3420 __ psllw(dst, 8);
3421 __ orps(dst, kScratchDoubleReg);
3422 break;
3423 }
3424 case kAVXS8x2Reverse:
3425 case kAVXS8x4Reverse:
3426 case kAVXS8x8Reverse: {
3427 DCHECK_EQ(1, instr->InputCount());
3428 CpuFeatureScope avx_scope(tasm(), AVX);
3429 XMMRegister dst = i.OutputSimd128Register();
3430 XMMRegister src = dst;
3431 if (arch_opcode != kAVXS8x2Reverse) {
3432 // First shuffle words into position.
3433 int8_t shuffle_mask = arch_opcode == kAVXS8x4Reverse ? 0xB1 : 0x1B;
3434 __ vpshuflw(dst, i.InputOperand(0), shuffle_mask);
3435 __ vpshufhw(dst, dst, shuffle_mask);
3436 } else {
3437 src = i.InputSimd128Register(0);
3438 }
3439 // Reverse each 16 bit lane.
3440 __ vpsrlw(kScratchDoubleReg, src, 8);
3441 __ vpsllw(dst, src, 8);
3442 __ vpor(dst, dst, kScratchDoubleReg);
3443 break;
3444 }
3445 case kIA32S128AnyTrue: {
3446 Register dst = i.OutputRegister();
3447 XMMRegister src = i.InputSimd128Register(0);
3448 Register tmp = i.TempRegister(0);
3449 __ xor_(tmp, tmp);
3450 __ mov(dst, Immediate(1));
3451 __ Ptest(src, src);
3452 __ cmov(zero, dst, tmp);
3453 break;
3454 }
3455 // Need to split up all the different lane structures because the
3456 // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
3457 // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
3458 // respectively.
3459 case kIA32I64x2AllTrue:
3460 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqq);
3461 break;
3462 case kIA32I32x4AllTrue:
3463 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
3464 break;
3465 case kIA32I16x8AllTrue:
3466 ASSEMBLE_SIMD_ALL_TRUE(pcmpeqw);
3467 break;
3468 case kIA32I8x16AllTrue: {
3469 ASSEMBLE_SIMD_ALL_TRUE(pcmpeqb);
3470 break;
3471 }
3472 case kIA32Pblendvb: {
3473 __ Pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3474 i.InputSimd128Register(1), i.InputSimd128Register(2));
3475 break;
3476 }
3477 case kIA32I32x4TruncF64x2UZero: {
3478 __ I32x4TruncF64x2UZero(i.OutputSimd128Register(),
3479 i.InputSimd128Register(0), i.TempRegister(0),
3480 kScratchDoubleReg);
3481 break;
3482 }
3483 case kIA32I32x4TruncF32x4U: {
3484 __ I32x4TruncF32x4U(i.OutputSimd128Register(), i.InputSimd128Register(0),
3485 i.TempRegister(0), kScratchDoubleReg);
3486 break;
3487 }
3488 case kIA32Cvttps2dq: {
3489 __ Cvttps2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
3490 break;
3491 }
3492 case kIA32Cvttpd2dq: {
3493 __ Cvttpd2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
3494 break;
3495 }
3496 case kIA32Word32AtomicPairLoad: {
3497 __ movq(kScratchDoubleReg, i.MemoryOperand());
3498 __ Pextrd(i.OutputRegister(0), kScratchDoubleReg, 0);
3499 __ Pextrd(i.OutputRegister(1), kScratchDoubleReg, 1);
3500 break;
3501 }
3502 case kIA32Word32ReleasePairStore: {
3503 __ push(ebx);
3504 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(1));
3505 __ push(ebx);
3506 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3507 __ push(ebx);
3508 frame_access_state()->IncreaseSPDelta(3);
3509 __ movq(kScratchDoubleReg, MemOperand(esp, 0));
3510 __ pop(ebx);
3511 __ pop(ebx);
3512 __ pop(ebx);
3513 frame_access_state()->IncreaseSPDelta(-3);
3514 __ movq(i.MemoryOperand(2), kScratchDoubleReg);
3515 break;
3516 }
3517 case kIA32Word32SeqCstPairStore: {
3518 Label store;
3519 __ bind(&store);
3520 __ mov(eax, i.MemoryOperand(2));
3521 __ mov(edx, i.NextMemoryOperand(2));
3522 __ push(ebx);
3523 frame_access_state()->IncreaseSPDelta(1);
3524 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3525 __ lock();
3526 __ cmpxchg8b(i.MemoryOperand(2));
3527 __ pop(ebx);
3528 frame_access_state()->IncreaseSPDelta(-1);
3529 __ j(not_equal, &store);
3530 break;
3531 }
3532 case kAtomicExchangeInt8: {
3533 __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
3534 __ movsx_b(i.InputRegister(0), i.InputRegister(0));
3535 break;
3536 }
3537 case kAtomicExchangeUint8: {
3538 __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
3539 __ movzx_b(i.InputRegister(0), i.InputRegister(0));
3540 break;
3541 }
3542 case kAtomicExchangeInt16: {
3543 __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
3544 __ movsx_w(i.InputRegister(0), i.InputRegister(0));
3545 break;
3546 }
3547 case kAtomicExchangeUint16: {
3548 __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
3549 __ movzx_w(i.InputRegister(0), i.InputRegister(0));
3550 break;
3551 }
3552 case kAtomicExchangeWord32: {
3553 __ xchg(i.InputRegister(0), i.MemoryOperand(1));
3554 break;
3555 }
3556 case kIA32Word32AtomicPairExchange: {
3557 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr));
3558 Label exchange;
3559 __ bind(&exchange);
3560 __ mov(eax, i.MemoryOperand(2));
3561 __ mov(edx, i.NextMemoryOperand(2));
3562 __ push(ebx);
3563 frame_access_state()->IncreaseSPDelta(1);
3564 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3565 __ lock();
3566 __ cmpxchg8b(i.MemoryOperand(2));
3567 __ pop(ebx);
3568 frame_access_state()->IncreaseSPDelta(-1);
3569 __ j(not_equal, &exchange);
3570 break;
3571 }
3572 case kAtomicCompareExchangeInt8: {
3573 __ lock();
3574 __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
3575 __ movsx_b(eax, eax);
3576 break;
3577 }
3578 case kAtomicCompareExchangeUint8: {
3579 __ lock();
3580 __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
3581 __ movzx_b(eax, eax);
3582 break;
3583 }
3584 case kAtomicCompareExchangeInt16: {
3585 __ lock();
3586 __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
3587 __ movsx_w(eax, eax);
3588 break;
3589 }
3590 case kAtomicCompareExchangeUint16: {
3591 __ lock();
3592 __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
3593 __ movzx_w(eax, eax);
3594 break;
3595 }
3596 case kAtomicCompareExchangeWord32: {
3597 __ lock();
3598 __ cmpxchg(i.MemoryOperand(2), i.InputRegister(1));
3599 break;
3600 }
3601 case kIA32Word32AtomicPairCompareExchange: {
3602 __ push(ebx);
3603 frame_access_state()->IncreaseSPDelta(1);
3604 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(2));
3605 __ lock();
3606 __ cmpxchg8b(i.MemoryOperand(4));
3607 __ pop(ebx);
3608 frame_access_state()->IncreaseSPDelta(-1);
3609 break;
3610 }
3611 #define ATOMIC_BINOP_CASE(op, inst) \
3612 case kAtomic##op##Int8: { \
3613 ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
3614 __ movsx_b(eax, eax); \
3615 break; \
3616 } \
3617 case kAtomic##op##Uint8: { \
3618 ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
3619 __ movzx_b(eax, eax); \
3620 break; \
3621 } \
3622 case kAtomic##op##Int16: { \
3623 ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
3624 __ movsx_w(eax, eax); \
3625 break; \
3626 } \
3627 case kAtomic##op##Uint16: { \
3628 ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
3629 __ movzx_w(eax, eax); \
3630 break; \
3631 } \
3632 case kAtomic##op##Word32: { \
3633 ASSEMBLE_ATOMIC_BINOP(inst, mov, cmpxchg); \
3634 break; \
3635 }
3636 ATOMIC_BINOP_CASE(Add, add)
3637 ATOMIC_BINOP_CASE(Sub, sub)
3638 ATOMIC_BINOP_CASE(And, and_)
3639 ATOMIC_BINOP_CASE(Or, or_)
3640 ATOMIC_BINOP_CASE(Xor, xor_)
3641 #undef ATOMIC_BINOP_CASE
3642 #define ATOMIC_BINOP_CASE(op, instr1, instr2) \
3643 case kIA32Word32AtomicPair##op: { \
3644 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr)); \
3645 ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \
3646 break; \
3647 }
3648 ATOMIC_BINOP_CASE(Add, add, adc)
3649 ATOMIC_BINOP_CASE(And, and_, and_)
3650 ATOMIC_BINOP_CASE(Or, or_, or_)
3651 ATOMIC_BINOP_CASE(Xor, xor_, xor_)
3652 #undef ATOMIC_BINOP_CASE
3653 case kIA32Word32AtomicPairSub: {
3654 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr));
3655 Label binop;
3656 __ bind(&binop);
3657 // Move memory operand into edx:eax
3658 __ mov(eax, i.MemoryOperand(2));
3659 __ mov(edx, i.NextMemoryOperand(2));
3660 // Save input registers temporarily on the stack.
3661 __ push(ebx);
3662 frame_access_state()->IncreaseSPDelta(1);
3663 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3664 __ push(i.InputRegister(1));
3665 // Negate input in place
3666 __ neg(ebx);
3667 __ adc(i.InputRegister(1), 0);
3668 __ neg(i.InputRegister(1));
3669 // Add memory operand, negated input.
3670 __ add(ebx, eax);
3671 __ adc(i.InputRegister(1), edx);
3672 __ lock();
3673 __ cmpxchg8b(i.MemoryOperand(2));
3674 // Restore input registers
3675 __ pop(i.InputRegister(1));
3676 __ pop(ebx);
3677 frame_access_state()->IncreaseSPDelta(-1);
3678 __ j(not_equal, &binop);
3679 break;
3680 }
3681 case kAtomicLoadInt8:
3682 case kAtomicLoadUint8:
3683 case kAtomicLoadInt16:
3684 case kAtomicLoadUint16:
3685 case kAtomicLoadWord32:
3686 case kAtomicStoreWord8:
3687 case kAtomicStoreWord16:
3688 case kAtomicStoreWord32:
3689 UNREACHABLE(); // Won't be generated by instruction selector.
3690 }
3691 return kSuccess;
3692 }
3693
FlagsConditionToCondition(FlagsCondition condition)3694 static Condition FlagsConditionToCondition(FlagsCondition condition) {
3695 switch (condition) {
3696 case kUnorderedEqual:
3697 case kEqual:
3698 return equal;
3699 case kUnorderedNotEqual:
3700 case kNotEqual:
3701 return not_equal;
3702 case kSignedLessThan:
3703 return less;
3704 case kSignedGreaterThanOrEqual:
3705 return greater_equal;
3706 case kSignedLessThanOrEqual:
3707 return less_equal;
3708 case kSignedGreaterThan:
3709 return greater;
3710 case kUnsignedLessThan:
3711 return below;
3712 case kUnsignedGreaterThanOrEqual:
3713 return above_equal;
3714 case kUnsignedLessThanOrEqual:
3715 return below_equal;
3716 case kUnsignedGreaterThan:
3717 return above;
3718 case kOverflow:
3719 return overflow;
3720 case kNotOverflow:
3721 return no_overflow;
3722 default:
3723 UNREACHABLE();
3724 }
3725 }
3726
3727 // Assembles a branch after an instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)3728 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3729 Label::Distance flabel_distance =
3730 branch->fallthru ? Label::kNear : Label::kFar;
3731 Label* tlabel = branch->true_label;
3732 Label* flabel = branch->false_label;
3733 if (branch->condition == kUnorderedEqual) {
3734 __ j(parity_even, flabel, flabel_distance);
3735 } else if (branch->condition == kUnorderedNotEqual) {
3736 __ j(parity_even, tlabel);
3737 }
3738 __ j(FlagsConditionToCondition(branch->condition), tlabel);
3739
3740 // Add a jump if not falling through to the next block.
3741 if (!branch->fallthru) __ jmp(flabel);
3742 }
3743
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)3744 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
3745 BranchInfo* branch) {
3746 AssembleArchBranch(instr, branch);
3747 }
3748
AssembleArchJumpRegardlessOfAssemblyOrder(RpoNumber target)3749 void CodeGenerator::AssembleArchJumpRegardlessOfAssemblyOrder(
3750 RpoNumber target) {
3751 __ jmp(GetLabel(target));
3752 }
3753
3754 #if V8_ENABLE_WEBASSEMBLY
AssembleArchTrap(Instruction * instr,FlagsCondition condition)3755 void CodeGenerator::AssembleArchTrap(Instruction* instr,
3756 FlagsCondition condition) {
3757 class OutOfLineTrap final : public OutOfLineCode {
3758 public:
3759 OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
3760 : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
3761
3762 void Generate() final {
3763 IA32OperandConverter i(gen_, instr_);
3764 TrapId trap_id =
3765 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
3766 GenerateCallToTrap(trap_id);
3767 }
3768
3769 private:
3770 void GenerateCallToTrap(TrapId trap_id) {
3771 if (trap_id == TrapId::kInvalid) {
3772 // We cannot test calls to the runtime in cctest/test-run-wasm.
3773 // Therefore we emit a call to C here instead of a call to the runtime.
3774 __ PrepareCallCFunction(0, esi);
3775 __ CallCFunction(
3776 ExternalReference::wasm_call_trap_callback_for_testing(), 0);
3777 __ LeaveFrame(StackFrame::WASM);
3778 auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
3779 size_t pop_size =
3780 call_descriptor->ParameterSlotCount() * kSystemPointerSize;
3781 // Use ecx as a scratch register, we return anyways immediately.
3782 __ Ret(static_cast<int>(pop_size), ecx);
3783 } else {
3784 gen_->AssembleSourcePosition(instr_);
3785 // A direct call to a wasm runtime stub defined in this module.
3786 // Just encode the stub index. This will be patched when the code
3787 // is added to the native module and copied into wasm code space.
3788 __ wasm_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
3789 ReferenceMap* reference_map =
3790 gen_->zone()->New<ReferenceMap>(gen_->zone());
3791 gen_->RecordSafepoint(reference_map);
3792 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
3793 }
3794 }
3795
3796 Instruction* instr_;
3797 CodeGenerator* gen_;
3798 };
3799 auto ool = zone()->New<OutOfLineTrap>(this, instr);
3800 Label* tlabel = ool->entry();
3801 Label end;
3802 if (condition == kUnorderedEqual) {
3803 __ j(parity_even, &end, Label::kNear);
3804 } else if (condition == kUnorderedNotEqual) {
3805 __ j(parity_even, tlabel);
3806 }
3807 __ j(FlagsConditionToCondition(condition), tlabel);
3808 __ bind(&end);
3809 }
3810 #endif // V8_ENABLE_WEBASSEMBLY
3811
3812 // Assembles boolean materializations after an instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)3813 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
3814 FlagsCondition condition) {
3815 IA32OperandConverter i(this, instr);
3816 Label done;
3817
3818 // Materialize a full 32-bit 1 or 0 value. The result register is always the
3819 // last output of the instruction.
3820 Label check;
3821 DCHECK_NE(0u, instr->OutputCount());
3822 Register reg = i.OutputRegister(instr->OutputCount() - 1);
3823 if (condition == kUnorderedEqual) {
3824 __ j(parity_odd, &check, Label::kNear);
3825 __ Move(reg, Immediate(0));
3826 __ jmp(&done, Label::kNear);
3827 } else if (condition == kUnorderedNotEqual) {
3828 __ j(parity_odd, &check, Label::kNear);
3829 __ mov(reg, Immediate(1));
3830 __ jmp(&done, Label::kNear);
3831 }
3832 Condition cc = FlagsConditionToCondition(condition);
3833
3834 __ bind(&check);
3835 if (reg.is_byte_register()) {
3836 // setcc for byte registers (al, bl, cl, dl).
3837 __ setcc(cc, reg);
3838 __ movzx_b(reg, reg);
3839 } else {
3840 // Emit a branch to set a register to either 1 or 0.
3841 Label set;
3842 __ j(cc, &set, Label::kNear);
3843 __ Move(reg, Immediate(0));
3844 __ jmp(&done, Label::kNear);
3845 __ bind(&set);
3846 __ mov(reg, Immediate(1));
3847 }
3848 __ bind(&done);
3849 }
3850
AssembleArchBinarySearchSwitch(Instruction * instr)3851 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
3852 IA32OperandConverter i(this, instr);
3853 Register input = i.InputRegister(0);
3854 std::vector<std::pair<int32_t, Label*>> cases;
3855 for (size_t index = 2; index < instr->InputCount(); index += 2) {
3856 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
3857 }
3858 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
3859 cases.data() + cases.size());
3860 }
3861
AssembleArchTableSwitch(Instruction * instr)3862 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
3863 IA32OperandConverter i(this, instr);
3864 Register input = i.InputRegister(0);
3865 size_t const case_count = instr->InputCount() - 2;
3866 Label** cases = zone()->NewArray<Label*>(case_count);
3867 for (size_t index = 0; index < case_count; ++index) {
3868 cases[index] = GetLabel(i.InputRpo(index + 2));
3869 }
3870 Label* const table = AddJumpTable(cases, case_count);
3871 __ cmp(input, Immediate(case_count));
3872 __ j(above_equal, GetLabel(i.InputRpo(1)));
3873 __ jmp(Operand::JumpTable(input, times_system_pointer_size, table));
3874 }
3875
AssembleArchSelect(Instruction * instr,FlagsCondition condition)3876 void CodeGenerator::AssembleArchSelect(Instruction* instr,
3877 FlagsCondition condition) {
3878 UNIMPLEMENTED();
3879 }
3880
3881 // The calling convention for JSFunctions on IA32 passes arguments on the
3882 // stack and the JSFunction and context in EDI and ESI, respectively, thus
3883 // the steps of the call look as follows:
3884
3885 // --{ before the call instruction }--------------------------------------------
3886 // | caller frame |
3887 // ^ esp ^ ebp
3888
3889 // --{ push arguments and setup ESI, EDI }--------------------------------------
3890 // | args + receiver | caller frame |
3891 // ^ esp ^ ebp
3892 // [edi = JSFunction, esi = context]
3893
3894 // --{ call [edi + kCodeEntryOffset] }------------------------------------------
3895 // | RET | args + receiver | caller frame |
3896 // ^ esp ^ ebp
3897
3898 // =={ prologue of called function }============================================
3899 // --{ push ebp }---------------------------------------------------------------
3900 // | FP | RET | args + receiver | caller frame |
3901 // ^ esp ^ ebp
3902
3903 // --{ mov ebp, esp }-----------------------------------------------------------
3904 // | FP | RET | args + receiver | caller frame |
3905 // ^ ebp,esp
3906
3907 // --{ push esi }---------------------------------------------------------------
3908 // | CTX | FP | RET | args + receiver | caller frame |
3909 // ^esp ^ ebp
3910
3911 // --{ push edi }---------------------------------------------------------------
3912 // | FNC | CTX | FP | RET | args + receiver | caller frame |
3913 // ^esp ^ ebp
3914
3915 // --{ subi esp, #N }-----------------------------------------------------------
3916 // | callee frame | FNC | CTX | FP | RET | args + receiver | caller frame |
3917 // ^esp ^ ebp
3918
3919 // =={ body of called function }================================================
3920
3921 // =={ epilogue of called function }============================================
3922 // --{ mov esp, ebp }-----------------------------------------------------------
3923 // | FP | RET | args + receiver | caller frame |
3924 // ^ esp,ebp
3925
3926 // --{ pop ebp }-----------------------------------------------------------
3927 // | | RET | args + receiver | caller frame |
3928 // ^ esp ^ ebp
3929
3930 // --{ ret #A+1 }-----------------------------------------------------------
3931 // | | caller frame |
3932 // ^ esp ^ ebp
3933
3934 // Runtime function calls are accomplished by doing a stub call to the
3935 // CEntry (a real code object). On IA32 passes arguments on the
3936 // stack, the number of arguments in EAX, the address of the runtime function
3937 // in EBX, and the context in ESI.
3938
3939 // --{ before the call instruction }--------------------------------------------
3940 // | caller frame |
3941 // ^ esp ^ ebp
3942
3943 // --{ push arguments and setup EAX, EBX, and ESI }-----------------------------
3944 // | args + receiver | caller frame |
3945 // ^ esp ^ ebp
3946 // [eax = #args, ebx = runtime function, esi = context]
3947
3948 // --{ call #CEntry }-----------------------------------------------------------
3949 // | RET | args + receiver | caller frame |
3950 // ^ esp ^ ebp
3951
3952 // =={ body of runtime function }===============================================
3953
3954 // --{ runtime returns }--------------------------------------------------------
3955 // | caller frame |
3956 // ^ esp ^ ebp
3957
3958 // Other custom linkages (e.g. for calling directly into and out of C++) may
3959 // need to save callee-saved registers on the stack, which is done in the
3960 // function prologue of generated code.
3961
3962 // --{ before the call instruction }--------------------------------------------
3963 // | caller frame |
3964 // ^ esp ^ ebp
3965
3966 // --{ set up arguments in registers on stack }---------------------------------
3967 // | args | caller frame |
3968 // ^ esp ^ ebp
3969 // [r0 = arg0, r1 = arg1, ...]
3970
3971 // --{ call code }--------------------------------------------------------------
3972 // | RET | args | caller frame |
3973 // ^ esp ^ ebp
3974
3975 // =={ prologue of called function }============================================
3976 // --{ push ebp }---------------------------------------------------------------
3977 // | FP | RET | args | caller frame |
3978 // ^ esp ^ ebp
3979
3980 // --{ mov ebp, esp }-----------------------------------------------------------
3981 // | FP | RET | args | caller frame |
3982 // ^ ebp,esp
3983
3984 // --{ save registers }---------------------------------------------------------
3985 // | regs | FP | RET | args | caller frame |
3986 // ^ esp ^ ebp
3987
3988 // --{ subi esp, #N }-----------------------------------------------------------
3989 // | callee frame | regs | FP | RET | args | caller frame |
3990 // ^esp ^ ebp
3991
3992 // =={ body of called function }================================================
3993
3994 // =={ epilogue of called function }============================================
3995 // --{ restore registers }------------------------------------------------------
3996 // | regs | FP | RET | args | caller frame |
3997 // ^ esp ^ ebp
3998
3999 // --{ mov esp, ebp }-----------------------------------------------------------
4000 // | FP | RET | args | caller frame |
4001 // ^ esp,ebp
4002
4003 // --{ pop ebp }----------------------------------------------------------------
4004 // | RET | args | caller frame |
4005 // ^ esp ^ ebp
4006
FinishFrame(Frame * frame)4007 void CodeGenerator::FinishFrame(Frame* frame) {
4008 auto call_descriptor = linkage()->GetIncomingDescriptor();
4009 const RegList saves = call_descriptor->CalleeSavedRegisters();
4010 if (!saves.is_empty()) { // Save callee-saved registers.
4011 DCHECK(!info()->is_osr());
4012 frame->AllocateSavedCalleeRegisterSlots(saves.Count());
4013 }
4014 }
4015
AssembleConstructFrame()4016 void CodeGenerator::AssembleConstructFrame() {
4017 auto call_descriptor = linkage()->GetIncomingDescriptor();
4018 if (frame_access_state()->has_frame()) {
4019 if (call_descriptor->IsCFunctionCall()) {
4020 __ push(ebp);
4021 __ mov(ebp, esp);
4022 #if V8_ENABLE_WEBASSEMBLY
4023 if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
4024 __ Push(Immediate(StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY)));
4025 // Reserve stack space for saving the c_entry_fp later.
4026 __ AllocateStackSpace(kSystemPointerSize);
4027 }
4028 #endif // V8_ENABLE_WEBASSEMBLY
4029 } else if (call_descriptor->IsJSFunctionCall()) {
4030 __ Prologue();
4031 } else {
4032 __ StubPrologue(info()->GetOutputStackFrameType());
4033 #if V8_ENABLE_WEBASSEMBLY
4034 if (call_descriptor->IsWasmFunctionCall() ||
4035 call_descriptor->IsWasmImportWrapper() ||
4036 call_descriptor->IsWasmCapiFunction()) {
4037 __ push(kWasmInstanceRegister);
4038 }
4039 if (call_descriptor->IsWasmCapiFunction()) {
4040 // Reserve space for saving the PC later.
4041 __ AllocateStackSpace(kSystemPointerSize);
4042 }
4043 #endif // V8_ENABLE_WEBASSEMBLY
4044 }
4045 }
4046
4047 int required_slots =
4048 frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
4049
4050 if (info()->is_osr()) {
4051 // TurboFan OSR-compiled functions cannot be entered directly.
4052 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
4053
4054 // Unoptimized code jumps directly to this entrypoint while the unoptimized
4055 // frame is still on the stack. Optimized code uses OSR values directly from
4056 // the unoptimized frame. Thus, all that needs to be done is to allocate the
4057 // remaining stack slots.
4058 __ RecordComment("-- OSR entrypoint --");
4059 osr_pc_offset_ = __ pc_offset();
4060 required_slots -= osr_helper()->UnoptimizedFrameSlots();
4061 }
4062
4063 const RegList saves = call_descriptor->CalleeSavedRegisters();
4064 if (required_slots > 0) {
4065 DCHECK(frame_access_state()->has_frame());
4066 #if V8_ENABLE_WEBASSEMBLY
4067 if (info()->IsWasm() && required_slots * kSystemPointerSize > 4 * KB) {
4068 // For WebAssembly functions with big frames we have to do the stack
4069 // overflow check before we construct the frame. Otherwise we may not
4070 // have enough space on the stack to call the runtime for the stack
4071 // overflow.
4072 Label done;
4073
4074 // If the frame is bigger than the stack, we throw the stack overflow
4075 // exception unconditionally. Thereby we can avoid the integer overflow
4076 // check in the condition code.
4077 if (required_slots * kSystemPointerSize < FLAG_stack_size * KB) {
4078 Register scratch = esi;
4079 __ push(scratch);
4080 __ mov(scratch,
4081 FieldOperand(kWasmInstanceRegister,
4082 WasmInstanceObject::kRealStackLimitAddressOffset));
4083 __ mov(scratch, Operand(scratch, 0));
4084 __ add(scratch, Immediate(required_slots * kSystemPointerSize));
4085 __ cmp(esp, scratch);
4086 __ pop(scratch);
4087 __ j(above_equal, &done, Label::kNear);
4088 }
4089
4090 __ wasm_call(wasm::WasmCode::kWasmStackOverflow,
4091 RelocInfo::WASM_STUB_CALL);
4092 // The call does not return, hence we can ignore any references and just
4093 // define an empty safepoint.
4094 ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
4095 RecordSafepoint(reference_map);
4096 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4097 __ bind(&done);
4098 }
4099 #endif // V8_ENABLE_WEBASSEMBLY
4100
4101 // Skip callee-saved and return slots, which are created below.
4102 required_slots -= saves.Count();
4103 required_slots -= frame()->GetReturnSlotCount();
4104 if (required_slots > 0) {
4105 __ AllocateStackSpace(required_slots * kSystemPointerSize);
4106 }
4107 }
4108
4109 if (!saves.is_empty()) { // Save callee-saved registers.
4110 DCHECK(!info()->is_osr());
4111 for (Register reg : base::Reversed(saves)) {
4112 __ push(reg);
4113 }
4114 }
4115
4116 // Allocate return slots (located after callee-saved).
4117 if (frame()->GetReturnSlotCount() > 0) {
4118 __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize);
4119 }
4120 }
4121
AssembleReturn(InstructionOperand * additional_pop_count)4122 void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
4123 auto call_descriptor = linkage()->GetIncomingDescriptor();
4124
4125 const RegList saves = call_descriptor->CalleeSavedRegisters();
4126 // Restore registers.
4127 if (!saves.is_empty()) {
4128 const int returns = frame()->GetReturnSlotCount();
4129 if (returns != 0) {
4130 __ add(esp, Immediate(returns * kSystemPointerSize));
4131 }
4132 for (Register reg : saves) {
4133 __ pop(reg);
4134 }
4135 }
4136
4137 IA32OperandConverter g(this, nullptr);
4138 int parameter_slots = static_cast<int>(call_descriptor->ParameterSlotCount());
4139
4140 // {aditional_pop_count} is only greater than zero if {parameter_slots = 0}.
4141 // Check RawMachineAssembler::PopAndReturn.
4142 if (parameter_slots != 0) {
4143 if (additional_pop_count->IsImmediate()) {
4144 DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
4145 } else if (FLAG_debug_code) {
4146 __ cmp(g.ToRegister(additional_pop_count), Immediate(0));
4147 __ Assert(equal, AbortReason::kUnexpectedAdditionalPopValue);
4148 }
4149 }
4150
4151 Register argc_reg = ecx;
4152 // Functions with JS linkage have at least one parameter (the receiver).
4153 // If {parameter_slots} == 0, it means it is a builtin with
4154 // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
4155 // itself.
4156
4157 const bool drop_jsargs = parameter_slots != 0 &&
4158 frame_access_state()->has_frame() &&
4159 call_descriptor->IsJSFunctionCall();
4160 if (call_descriptor->IsCFunctionCall()) {
4161 AssembleDeconstructFrame();
4162 } else if (frame_access_state()->has_frame()) {
4163 // Canonicalize JSFunction return sites for now if they always have the same
4164 // number of return args.
4165 if (additional_pop_count->IsImmediate() &&
4166 g.ToConstant(additional_pop_count).ToInt32() == 0) {
4167 if (return_label_.is_bound()) {
4168 __ jmp(&return_label_);
4169 return;
4170 } else {
4171 __ bind(&return_label_);
4172 }
4173 }
4174 if (drop_jsargs) {
4175 // Get the actual argument count.
4176 __ mov(argc_reg, Operand(ebp, StandardFrameConstants::kArgCOffset));
4177 DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
4178 }
4179 AssembleDeconstructFrame();
4180 }
4181
4182 if (drop_jsargs) {
4183 // We must pop all arguments from the stack (including the receiver).
4184 // The number of arguments without the receiver is
4185 // max(argc_reg, parameter_slots-1), and the receiver is added in
4186 // DropArguments().
4187 Label mismatch_return;
4188 Register scratch_reg = edx;
4189 DCHECK_NE(argc_reg, scratch_reg);
4190 DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
4191 DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
4192 __ cmp(argc_reg, Immediate(parameter_slots));
4193 __ j(greater, &mismatch_return, Label::kNear);
4194 __ Ret(parameter_slots * kSystemPointerSize, scratch_reg);
4195 __ bind(&mismatch_return);
4196 __ DropArguments(argc_reg, scratch_reg, TurboAssembler::kCountIsInteger,
4197 TurboAssembler::kCountIncludesReceiver);
4198 // We use a return instead of a jump for better return address prediction.
4199 __ Ret();
4200 } else if (additional_pop_count->IsImmediate()) {
4201 int additional_count = g.ToConstant(additional_pop_count).ToInt32();
4202 size_t pop_size = (parameter_slots + additional_count) * kSystemPointerSize;
4203 if (is_uint16(pop_size)) {
4204 // Avoid the additional scratch register, it might clobber the
4205 // CalleeSavedRegisters.
4206 __ ret(static_cast<int>(pop_size));
4207 } else {
4208 Register scratch_reg = ecx;
4209 DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
4210 CHECK_LE(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
4211 __ Ret(static_cast<int>(pop_size), scratch_reg);
4212 }
4213 } else {
4214 Register pop_reg = g.ToRegister(additional_pop_count);
4215 Register scratch_reg = pop_reg == ecx ? edx : ecx;
4216 DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
4217 DCHECK(!call_descriptor->CalleeSavedRegisters().has(pop_reg));
4218 int pop_size = static_cast<int>(parameter_slots * kSystemPointerSize);
4219 __ PopReturnAddressTo(scratch_reg);
4220 __ lea(esp, Operand(esp, pop_reg, times_system_pointer_size,
4221 static_cast<int>(pop_size)));
4222 __ PushReturnAddressFrom(scratch_reg);
4223 __ Ret();
4224 }
4225 }
4226
FinishCode()4227 void CodeGenerator::FinishCode() {}
4228
PrepareForDeoptimizationExits(ZoneDeque<DeoptimizationExit * > * exits)4229 void CodeGenerator::PrepareForDeoptimizationExits(
4230 ZoneDeque<DeoptimizationExit*>* exits) {}
4231
AssembleMove(InstructionOperand * source,InstructionOperand * destination)4232 void CodeGenerator::AssembleMove(InstructionOperand* source,
4233 InstructionOperand* destination) {
4234 IA32OperandConverter g(this, nullptr);
4235 // Dispatch on the source and destination operand kinds.
4236 switch (MoveType::InferMove(source, destination)) {
4237 case MoveType::kRegisterToRegister:
4238 if (source->IsRegister()) {
4239 __ mov(g.ToRegister(destination), g.ToRegister(source));
4240 } else {
4241 DCHECK(source->IsFPRegister());
4242 __ Movaps(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4243 }
4244 return;
4245 case MoveType::kRegisterToStack: {
4246 Operand dst = g.ToOperand(destination);
4247 if (source->IsRegister()) {
4248 __ mov(dst, g.ToRegister(source));
4249 } else {
4250 DCHECK(source->IsFPRegister());
4251 XMMRegister src = g.ToDoubleRegister(source);
4252 MachineRepresentation rep =
4253 LocationOperand::cast(source)->representation();
4254 if (rep == MachineRepresentation::kFloat32) {
4255 __ Movss(dst, src);
4256 } else if (rep == MachineRepresentation::kFloat64) {
4257 __ Movsd(dst, src);
4258 } else {
4259 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4260 __ Movups(dst, src);
4261 }
4262 }
4263 return;
4264 }
4265 case MoveType::kStackToRegister: {
4266 Operand src = g.ToOperand(source);
4267 if (source->IsStackSlot()) {
4268 __ mov(g.ToRegister(destination), src);
4269 } else {
4270 DCHECK(source->IsFPStackSlot());
4271 XMMRegister dst = g.ToDoubleRegister(destination);
4272 MachineRepresentation rep =
4273 LocationOperand::cast(source)->representation();
4274 if (rep == MachineRepresentation::kFloat32) {
4275 __ Movss(dst, src);
4276 } else if (rep == MachineRepresentation::kFloat64) {
4277 __ Movsd(dst, src);
4278 } else {
4279 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4280 __ Movups(dst, src);
4281 }
4282 }
4283 return;
4284 }
4285 case MoveType::kStackToStack: {
4286 Operand src = g.ToOperand(source);
4287 Operand dst = g.ToOperand(destination);
4288 if (source->IsStackSlot()) {
4289 __ push(src);
4290 __ pop(dst);
4291 } else {
4292 MachineRepresentation rep =
4293 LocationOperand::cast(source)->representation();
4294 if (rep == MachineRepresentation::kFloat32) {
4295 __ Movss(kScratchDoubleReg, src);
4296 __ Movss(dst, kScratchDoubleReg);
4297 } else if (rep == MachineRepresentation::kFloat64) {
4298 __ Movsd(kScratchDoubleReg, src);
4299 __ Movsd(dst, kScratchDoubleReg);
4300 } else {
4301 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4302 __ Movups(kScratchDoubleReg, src);
4303 __ Movups(dst, kScratchDoubleReg);
4304 }
4305 }
4306 return;
4307 }
4308 case MoveType::kConstantToRegister: {
4309 Constant src = g.ToConstant(source);
4310 if (destination->IsRegister()) {
4311 Register dst = g.ToRegister(destination);
4312 if (src.type() == Constant::kHeapObject) {
4313 __ Move(dst, src.ToHeapObject());
4314 } else {
4315 __ Move(dst, g.ToImmediate(source));
4316 }
4317 } else {
4318 DCHECK(destination->IsFPRegister());
4319 XMMRegister dst = g.ToDoubleRegister(destination);
4320 if (src.type() == Constant::kFloat32) {
4321 // TODO(turbofan): Can we do better here?
4322 __ Move(dst, src.ToFloat32AsInt());
4323 } else {
4324 DCHECK_EQ(src.type(), Constant::kFloat64);
4325 __ Move(dst, src.ToFloat64().AsUint64());
4326 }
4327 }
4328 return;
4329 }
4330 case MoveType::kConstantToStack: {
4331 Constant src = g.ToConstant(source);
4332 Operand dst = g.ToOperand(destination);
4333 if (destination->IsStackSlot()) {
4334 __ Move(dst, g.ToImmediate(source));
4335 } else {
4336 DCHECK(destination->IsFPStackSlot());
4337 if (src.type() == Constant::kFloat32) {
4338 __ Move(dst, Immediate(src.ToFloat32AsInt()));
4339 } else {
4340 DCHECK_EQ(src.type(), Constant::kFloat64);
4341 uint64_t constant_value = src.ToFloat64().AsUint64();
4342 uint32_t lower = static_cast<uint32_t>(constant_value);
4343 uint32_t upper = static_cast<uint32_t>(constant_value >> 32);
4344 Operand dst0 = dst;
4345 Operand dst1 = g.ToOperand(destination, kSystemPointerSize);
4346 __ Move(dst0, Immediate(lower));
4347 __ Move(dst1, Immediate(upper));
4348 }
4349 }
4350 return;
4351 }
4352 }
4353 UNREACHABLE();
4354 }
4355
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)4356 void CodeGenerator::AssembleSwap(InstructionOperand* source,
4357 InstructionOperand* destination) {
4358 IA32OperandConverter g(this, nullptr);
4359 // Dispatch on the source and destination operand kinds. Not all
4360 // combinations are possible.
4361 switch (MoveType::InferSwap(source, destination)) {
4362 case MoveType::kRegisterToRegister: {
4363 if (source->IsRegister()) {
4364 Register src = g.ToRegister(source);
4365 Register dst = g.ToRegister(destination);
4366 __ push(src);
4367 __ mov(src, dst);
4368 __ pop(dst);
4369 } else {
4370 DCHECK(source->IsFPRegister());
4371 XMMRegister src = g.ToDoubleRegister(source);
4372 XMMRegister dst = g.ToDoubleRegister(destination);
4373 __ Movaps(kScratchDoubleReg, src);
4374 __ Movaps(src, dst);
4375 __ Movaps(dst, kScratchDoubleReg);
4376 }
4377 return;
4378 }
4379 case MoveType::kRegisterToStack: {
4380 if (source->IsRegister()) {
4381 Register src = g.ToRegister(source);
4382 __ push(src);
4383 frame_access_state()->IncreaseSPDelta(1);
4384 Operand dst = g.ToOperand(destination);
4385 __ mov(src, dst);
4386 frame_access_state()->IncreaseSPDelta(-1);
4387 dst = g.ToOperand(destination);
4388 __ pop(dst);
4389 } else {
4390 DCHECK(source->IsFPRegister());
4391 XMMRegister src = g.ToDoubleRegister(source);
4392 Operand dst = g.ToOperand(destination);
4393 MachineRepresentation rep =
4394 LocationOperand::cast(source)->representation();
4395 if (rep == MachineRepresentation::kFloat32) {
4396 __ Movss(kScratchDoubleReg, dst);
4397 __ Movss(dst, src);
4398 __ Movaps(src, kScratchDoubleReg);
4399 } else if (rep == MachineRepresentation::kFloat64) {
4400 __ Movsd(kScratchDoubleReg, dst);
4401 __ Movsd(dst, src);
4402 __ Movaps(src, kScratchDoubleReg);
4403 } else {
4404 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4405 __ Movups(kScratchDoubleReg, dst);
4406 __ Movups(dst, src);
4407 __ Movups(src, kScratchDoubleReg);
4408 }
4409 }
4410 return;
4411 }
4412 case MoveType::kStackToStack: {
4413 if (source->IsStackSlot()) {
4414 Operand dst1 = g.ToOperand(destination);
4415 __ push(dst1);
4416 frame_access_state()->IncreaseSPDelta(1);
4417 Operand src1 = g.ToOperand(source);
4418 __ push(src1);
4419 Operand dst2 = g.ToOperand(destination);
4420 __ pop(dst2);
4421 frame_access_state()->IncreaseSPDelta(-1);
4422 Operand src2 = g.ToOperand(source);
4423 __ pop(src2);
4424 } else {
4425 DCHECK(source->IsFPStackSlot());
4426 Operand src0 = g.ToOperand(source);
4427 Operand dst0 = g.ToOperand(destination);
4428 MachineRepresentation rep =
4429 LocationOperand::cast(source)->representation();
4430 if (rep == MachineRepresentation::kFloat32) {
4431 __ Movss(kScratchDoubleReg, dst0); // Save dst in scratch register.
4432 __ push(src0); // Then use stack to copy src to destination.
4433 __ pop(dst0);
4434 __ Movss(src0, kScratchDoubleReg);
4435 } else if (rep == MachineRepresentation::kFloat64) {
4436 __ Movsd(kScratchDoubleReg, dst0); // Save dst in scratch register.
4437 __ push(src0); // Then use stack to copy src to destination.
4438 __ pop(dst0);
4439 __ push(g.ToOperand(source, kSystemPointerSize));
4440 __ pop(g.ToOperand(destination, kSystemPointerSize));
4441 __ Movsd(src0, kScratchDoubleReg);
4442 } else {
4443 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4444 __ Movups(kScratchDoubleReg, dst0); // Save dst in scratch register.
4445 __ push(src0); // Then use stack to copy src to destination.
4446 __ pop(dst0);
4447 __ push(g.ToOperand(source, kSystemPointerSize));
4448 __ pop(g.ToOperand(destination, kSystemPointerSize));
4449 __ push(g.ToOperand(source, 2 * kSystemPointerSize));
4450 __ pop(g.ToOperand(destination, 2 * kSystemPointerSize));
4451 __ push(g.ToOperand(source, 3 * kSystemPointerSize));
4452 __ pop(g.ToOperand(destination, 3 * kSystemPointerSize));
4453 __ Movups(src0, kScratchDoubleReg);
4454 }
4455 }
4456 return;
4457 }
4458 default:
4459 UNREACHABLE();
4460 }
4461 }
4462
AssembleJumpTable(Label ** targets,size_t target_count)4463 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4464 for (size_t index = 0; index < target_count; ++index) {
4465 __ dd(targets[index]);
4466 }
4467 }
4468
4469 #undef __
4470 #undef kScratchDoubleReg
4471 #undef ASSEMBLE_COMPARE
4472 #undef ASSEMBLE_IEEE754_BINOP
4473 #undef ASSEMBLE_IEEE754_UNOP
4474 #undef ASSEMBLE_BINOP
4475 #undef ASSEMBLE_ATOMIC_BINOP
4476 #undef ASSEMBLE_I64ATOMIC_BINOP
4477 #undef ASSEMBLE_MOVX
4478 #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
4479 #undef ASSEMBLE_SIMD_IMM_SHUFFLE
4480 #undef ASSEMBLE_SIMD_ALL_TRUE
4481 #undef ASSEMBLE_SIMD_SHIFT
4482 #undef ASSEMBLE_SIMD_PINSR
4483
4484 } // namespace compiler
4485 } // namespace internal
4486 } // namespace v8
4487