1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/compiler/code-generator.h"
6
7 #include "src/assembler-inl.h"
8 #include "src/callable.h"
9 #include "src/compiler/code-generator-impl.h"
10 #include "src/compiler/gap-resolver.h"
11 #include "src/compiler/node-matchers.h"
12 #include "src/compiler/osr.h"
13 #include "src/frame-constants.h"
14 #include "src/frames.h"
15 #include "src/heap/heap-inl.h"
16 #include "src/ia32/assembler-ia32.h"
17 #include "src/ia32/macro-assembler-ia32.h"
18 #include "src/optimized-compilation-info.h"
19 #include "src/wasm/wasm-code-manager.h"
20 #include "src/wasm/wasm-objects.h"
21
22 namespace v8 {
23 namespace internal {
24 namespace compiler {
25
26 #define __ tasm()->
27
28 #define kScratchDoubleReg xmm0
29
30
31 // Adds IA-32 specific methods for decoding operands.
32 class IA32OperandConverter : public InstructionOperandConverter {
33 public:
IA32OperandConverter(CodeGenerator * gen,Instruction * instr)34 IA32OperandConverter(CodeGenerator* gen, Instruction* instr)
35 : InstructionOperandConverter(gen, instr) {}
36
InputOperand(size_t index,int extra=0)37 Operand InputOperand(size_t index, int extra = 0) {
38 return ToOperand(instr_->InputAt(index), extra);
39 }
40
InputImmediate(size_t index)41 Immediate InputImmediate(size_t index) {
42 return ToImmediate(instr_->InputAt(index));
43 }
44
OutputOperand()45 Operand OutputOperand() { return ToOperand(instr_->Output()); }
46
ToOperand(InstructionOperand * op,int extra=0)47 Operand ToOperand(InstructionOperand* op, int extra = 0) {
48 if (op->IsRegister()) {
49 DCHECK_EQ(0, extra);
50 return Operand(ToRegister(op));
51 } else if (op->IsFPRegister()) {
52 DCHECK_EQ(0, extra);
53 return Operand(ToDoubleRegister(op));
54 }
55 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
56 return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
57 }
58
SlotToOperand(int slot,int extra=0)59 Operand SlotToOperand(int slot, int extra = 0) {
60 FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
61 return Operand(offset.from_stack_pointer() ? esp : ebp,
62 offset.offset() + extra);
63 }
64
ToImmediate(InstructionOperand * operand)65 Immediate ToImmediate(InstructionOperand* operand) {
66 Constant constant = ToConstant(operand);
67 if (constant.type() == Constant::kInt32 &&
68 RelocInfo::IsWasmReference(constant.rmode())) {
69 return Immediate(static_cast<Address>(constant.ToInt32()),
70 constant.rmode());
71 }
72 switch (constant.type()) {
73 case Constant::kInt32:
74 return Immediate(constant.ToInt32());
75 case Constant::kFloat32:
76 return Immediate::EmbeddedNumber(constant.ToFloat32());
77 case Constant::kFloat64:
78 return Immediate::EmbeddedNumber(constant.ToFloat64().value());
79 case Constant::kExternalReference:
80 return Immediate(constant.ToExternalReference());
81 case Constant::kHeapObject:
82 return Immediate(constant.ToHeapObject());
83 case Constant::kInt64:
84 break;
85 case Constant::kRpoNumber:
86 return Immediate::CodeRelativeOffset(ToLabel(operand));
87 }
88 UNREACHABLE();
89 }
90
NextOffset(size_t * offset)91 static size_t NextOffset(size_t* offset) {
92 size_t i = *offset;
93 (*offset)++;
94 return i;
95 }
96
ScaleFor(AddressingMode one,AddressingMode mode)97 static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
98 STATIC_ASSERT(0 == static_cast<int>(times_1));
99 STATIC_ASSERT(1 == static_cast<int>(times_2));
100 STATIC_ASSERT(2 == static_cast<int>(times_4));
101 STATIC_ASSERT(3 == static_cast<int>(times_8));
102 int scale = static_cast<int>(mode - one);
103 DCHECK(scale >= 0 && scale < 4);
104 return static_cast<ScaleFactor>(scale);
105 }
106
MemoryOperand(size_t * offset)107 Operand MemoryOperand(size_t* offset) {
108 AddressingMode mode = AddressingModeField::decode(instr_->opcode());
109 switch (mode) {
110 case kMode_MR: {
111 Register base = InputRegister(NextOffset(offset));
112 int32_t disp = 0;
113 return Operand(base, disp);
114 }
115 case kMode_MRI: {
116 Register base = InputRegister(NextOffset(offset));
117 Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
118 return Operand(base, ctant.ToInt32(), ctant.rmode());
119 }
120 case kMode_MR1:
121 case kMode_MR2:
122 case kMode_MR4:
123 case kMode_MR8: {
124 Register base = InputRegister(NextOffset(offset));
125 Register index = InputRegister(NextOffset(offset));
126 ScaleFactor scale = ScaleFor(kMode_MR1, mode);
127 int32_t disp = 0;
128 return Operand(base, index, scale, disp);
129 }
130 case kMode_MR1I:
131 case kMode_MR2I:
132 case kMode_MR4I:
133 case kMode_MR8I: {
134 Register base = InputRegister(NextOffset(offset));
135 Register index = InputRegister(NextOffset(offset));
136 ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
137 Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
138 return Operand(base, index, scale, ctant.ToInt32(), ctant.rmode());
139 }
140 case kMode_M1:
141 case kMode_M2:
142 case kMode_M4:
143 case kMode_M8: {
144 Register index = InputRegister(NextOffset(offset));
145 ScaleFactor scale = ScaleFor(kMode_M1, mode);
146 int32_t disp = 0;
147 return Operand(index, scale, disp);
148 }
149 case kMode_M1I:
150 case kMode_M2I:
151 case kMode_M4I:
152 case kMode_M8I: {
153 Register index = InputRegister(NextOffset(offset));
154 ScaleFactor scale = ScaleFor(kMode_M1I, mode);
155 Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
156 return Operand(index, scale, ctant.ToInt32(), ctant.rmode());
157 }
158 case kMode_MI: {
159 Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
160 return Operand(ctant.ToInt32(), ctant.rmode());
161 }
162 case kMode_None:
163 UNREACHABLE();
164 }
165 UNREACHABLE();
166 }
167
MemoryOperand(size_t first_input=0)168 Operand MemoryOperand(size_t first_input = 0) {
169 return MemoryOperand(&first_input);
170 }
171
NextMemoryOperand(size_t offset=0)172 Operand NextMemoryOperand(size_t offset = 0) {
173 AddressingMode mode = AddressingModeField::decode(instr_->opcode());
174 Register base = InputRegister(NextOffset(&offset));
175 const int32_t disp = 4;
176 if (mode == kMode_MR1) {
177 Register index = InputRegister(NextOffset(&offset));
178 ScaleFactor scale = ScaleFor(kMode_MR1, kMode_MR1);
179 return Operand(base, index, scale, disp);
180 } else if (mode == kMode_MRI) {
181 Constant ctant = ToConstant(instr_->InputAt(NextOffset(&offset)));
182 return Operand(base, ctant.ToInt32() + disp, ctant.rmode());
183 } else {
184 UNREACHABLE();
185 }
186 }
187 };
188
189
190 namespace {
191
HasImmediateInput(Instruction * instr,size_t index)192 bool HasImmediateInput(Instruction* instr, size_t index) {
193 return instr->InputAt(index)->IsImmediate();
194 }
195
196 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
197 public:
OutOfLineLoadFloat32NaN(CodeGenerator * gen,XMMRegister result)198 OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
199 : OutOfLineCode(gen), result_(result) {}
200
Generate()201 void Generate() final {
202 __ xorps(result_, result_);
203 __ divss(result_, result_);
204 }
205
206 private:
207 XMMRegister const result_;
208 };
209
210 class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
211 public:
OutOfLineLoadFloat64NaN(CodeGenerator * gen,XMMRegister result)212 OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
213 : OutOfLineCode(gen), result_(result) {}
214
Generate()215 void Generate() final {
216 __ xorpd(result_, result_);
217 __ divsd(result_, result_);
218 }
219
220 private:
221 XMMRegister const result_;
222 };
223
224 class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
225 public:
OutOfLineTruncateDoubleToI(CodeGenerator * gen,Register result,XMMRegister input,StubCallMode stub_mode)226 OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
227 XMMRegister input, StubCallMode stub_mode)
228 : OutOfLineCode(gen),
229 result_(result),
230 input_(input),
231 stub_mode_(stub_mode),
232 isolate_(gen->isolate()),
233 zone_(gen->zone()) {}
234
Generate()235 void Generate() final {
236 __ sub(esp, Immediate(kDoubleSize));
237 __ movsd(MemOperand(esp, 0), input_);
238 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
239 // A direct call to a wasm runtime stub defined in this module.
240 // Just encode the stub index. This will be patched at relocation.
241 __ wasm_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
242 } else {
243 __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
244 }
245 __ mov(result_, MemOperand(esp, 0));
246 __ add(esp, Immediate(kDoubleSize));
247 }
248
249 private:
250 Register const result_;
251 XMMRegister const input_;
252 StubCallMode stub_mode_;
253 Isolate* isolate_;
254 Zone* zone_;
255 };
256
257
258 class OutOfLineRecordWrite final : public OutOfLineCode {
259 public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand operand,Register value,Register scratch0,Register scratch1,RecordWriteMode mode)260 OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
261 Register value, Register scratch0, Register scratch1,
262 RecordWriteMode mode)
263 : OutOfLineCode(gen),
264 object_(object),
265 operand_(operand),
266 value_(value),
267 scratch0_(scratch0),
268 scratch1_(scratch1),
269 mode_(mode),
270 zone_(gen->zone()) {}
271
SaveRegisters(RegList registers)272 void SaveRegisters(RegList registers) {
273 DCHECK_LT(0, NumRegs(registers));
274 for (int i = 0; i < Register::kNumRegisters; ++i) {
275 if ((registers >> i) & 1u) {
276 __ push(Register::from_code(i));
277 }
278 }
279 }
280
RestoreRegisters(RegList registers)281 void RestoreRegisters(RegList registers) {
282 DCHECK_LT(0, NumRegs(registers));
283 for (int i = Register::kNumRegisters - 1; i >= 0; --i) {
284 if ((registers >> i) & 1u) {
285 __ pop(Register::from_code(i));
286 }
287 }
288 }
289
Generate()290 void Generate() final {
291 if (mode_ > RecordWriteMode::kValueIsPointer) {
292 __ JumpIfSmi(value_, exit());
293 }
294 __ CheckPageFlag(value_, scratch0_,
295 MemoryChunk::kPointersToHereAreInterestingMask, zero,
296 exit());
297 __ lea(scratch1_, operand_);
298 RememberedSetAction const remembered_set_action =
299 mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
300 : OMIT_REMEMBERED_SET;
301 SaveFPRegsMode const save_fp_mode =
302 frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
303 __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
304 save_fp_mode);
305 }
306
307 private:
308 Register const object_;
309 Operand const operand_;
310 Register const value_;
311 Register const scratch0_;
312 Register const scratch1_;
313 RecordWriteMode const mode_;
314 Zone* zone_;
315 };
316
MoveOperandIfAliasedWithPoisonRegister(Instruction * call_instruction,CodeGenerator * gen)317 void MoveOperandIfAliasedWithPoisonRegister(Instruction* call_instruction,
318 CodeGenerator* gen) {
319 IA32OperandConverter i(gen, call_instruction);
320 int const poison_index = i.InputInt32(1);
321 if (poison_index == -1) {
322 // No aliasing -> nothing to move.
323 return;
324 }
325
326 InstructionOperand* op = call_instruction->InputAt(poison_index);
327 if (op->IsImmediate() || op->IsConstant()) {
328 gen->tasm()->mov(kSpeculationPoisonRegister, i.ToImmediate(op));
329 } else {
330 gen->tasm()->mov(kSpeculationPoisonRegister, i.InputOperand(poison_index));
331 }
332 }
333
EmitWordLoadPoisoningIfNeeded(CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,IA32OperandConverter & i)334 void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
335 InstructionCode opcode, Instruction* instr,
336 IA32OperandConverter& i) {
337 const MemoryAccessMode access_mode =
338 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
339 if (access_mode == kMemoryAccessPoisoned) {
340 Register value = i.OutputRegister();
341 codegen->tasm()->and_(value, kSpeculationPoisonRegister);
342 }
343 }
344
345 } // namespace
346
347 #define ASSEMBLE_COMPARE(asm_instr) \
348 do { \
349 if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
350 size_t index = 0; \
351 Operand left = i.MemoryOperand(&index); \
352 if (HasImmediateInput(instr, index)) { \
353 __ asm_instr(left, i.InputImmediate(index)); \
354 } else { \
355 __ asm_instr(left, i.InputRegister(index)); \
356 } \
357 } else { \
358 if (HasImmediateInput(instr, 1)) { \
359 if (instr->InputAt(0)->IsRegister()) { \
360 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
361 } else { \
362 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
363 } \
364 } else { \
365 if (instr->InputAt(1)->IsRegister()) { \
366 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
367 } else { \
368 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
369 } \
370 } \
371 } \
372 } while (0)
373
374 #define ASSEMBLE_IEEE754_BINOP(name) \
375 do { \
376 /* Pass two doubles as arguments on the stack. */ \
377 __ PrepareCallCFunction(4, eax); \
378 __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \
379 __ movsd(Operand(esp, 1 * kDoubleSize), i.InputDoubleRegister(1)); \
380 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 4); \
381 /* Return value is in st(0) on ia32. */ \
382 /* Store it into the result register. */ \
383 __ sub(esp, Immediate(kDoubleSize)); \
384 __ fstp_d(Operand(esp, 0)); \
385 __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \
386 __ add(esp, Immediate(kDoubleSize)); \
387 } while (false)
388
389 #define ASSEMBLE_IEEE754_UNOP(name) \
390 do { \
391 /* Pass one double as argument on the stack. */ \
392 __ PrepareCallCFunction(2, eax); \
393 __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \
394 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
395 /* Return value is in st(0) on ia32. */ \
396 /* Store it into the result register. */ \
397 __ sub(esp, Immediate(kDoubleSize)); \
398 __ fstp_d(Operand(esp, 0)); \
399 __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \
400 __ add(esp, Immediate(kDoubleSize)); \
401 } while (false)
402
403 #define ASSEMBLE_BINOP(asm_instr) \
404 do { \
405 if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
406 size_t index = 1; \
407 Operand right = i.MemoryOperand(&index); \
408 __ asm_instr(i.InputRegister(0), right); \
409 } else { \
410 if (HasImmediateInput(instr, 1)) { \
411 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
412 } else { \
413 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
414 } \
415 } \
416 } while (0)
417
418 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
419 do { \
420 Label binop; \
421 __ bind(&binop); \
422 __ mov_inst(eax, i.MemoryOperand(1)); \
423 __ Move(i.TempRegister(0), eax); \
424 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
425 __ lock(); \
426 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
427 __ j(not_equal, &binop); \
428 } while (false)
429
430 #define ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \
431 do { \
432 Label binop; \
433 __ bind(&binop); \
434 __ mov(i.OutputRegister(0), i.MemoryOperand(2)); \
435 __ mov(i.OutputRegister(1), i.NextMemoryOperand(2)); \
436 __ push(i.InputRegister(0)); \
437 __ push(i.InputRegister(1)); \
438 __ instr1(i.InputRegister(0), i.OutputRegister(0)); \
439 __ instr2(i.InputRegister(1), i.OutputRegister(1)); \
440 __ lock(); \
441 __ cmpxchg8b(i.MemoryOperand(2)); \
442 __ pop(i.InputRegister(1)); \
443 __ pop(i.InputRegister(0)); \
444 __ j(not_equal, &binop); \
445 } while (false);
446
447 #define ASSEMBLE_MOVX(mov_instr) \
448 do { \
449 if (instr->addressing_mode() != kMode_None) { \
450 __ mov_instr(i.OutputRegister(), i.MemoryOperand()); \
451 } else if (instr->InputAt(0)->IsRegister()) { \
452 __ mov_instr(i.OutputRegister(), i.InputRegister(0)); \
453 } else { \
454 __ mov_instr(i.OutputRegister(), i.InputOperand(0)); \
455 } \
456 } while (0)
457
458 #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
459 do { \
460 XMMRegister src0 = i.InputSimd128Register(0); \
461 Operand src1 = i.InputOperand(instr->InputCount() == 2 ? 1 : 0); \
462 if (CpuFeatures::IsSupported(AVX)) { \
463 CpuFeatureScope avx_scope(tasm(), AVX); \
464 __ v##opcode(i.OutputSimd128Register(), src0, src1); \
465 } else { \
466 DCHECK_EQ(i.OutputSimd128Register(), src0); \
467 __ opcode(i.OutputSimd128Register(), src1); \
468 } \
469 } while (false)
470
471 #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \
472 if (CpuFeatures::IsSupported(AVX)) { \
473 CpuFeatureScope avx_scope(tasm(), AVX); \
474 __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
475 i.InputOperand(1), imm); \
476 } else { \
477 CpuFeatureScope sse_scope(tasm(), SSELevel); \
478 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
479 __ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm); \
480 }
481
AssembleDeconstructFrame()482 void CodeGenerator::AssembleDeconstructFrame() {
483 __ mov(esp, ebp);
484 __ pop(ebp);
485 }
486
AssemblePrepareTailCall()487 void CodeGenerator::AssemblePrepareTailCall() {
488 if (frame_access_state()->has_frame()) {
489 __ mov(ebp, MemOperand(ebp, 0));
490 }
491 frame_access_state()->SetFrameAccessToSP();
492 }
493
AssemblePopArgumentsAdaptorFrame(Register args_reg,Register,Register,Register)494 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
495 Register, Register,
496 Register) {
497 // There are not enough temp registers left on ia32 for a call instruction
498 // so we pick some scratch registers and save/restore them manually here.
499 int scratch_count = 3;
500 Register scratch1 = ebx;
501 Register scratch2 = ecx;
502 Register scratch3 = edx;
503 DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
504 Label done;
505
506 // Check if current frame is an arguments adaptor frame.
507 __ cmp(Operand(ebp, StandardFrameConstants::kContextOffset),
508 Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
509 __ j(not_equal, &done, Label::kNear);
510
511 __ push(scratch1);
512 __ push(scratch2);
513 __ push(scratch3);
514
515 // Load arguments count from current arguments adaptor frame (note, it
516 // does not include receiver).
517 Register caller_args_count_reg = scratch1;
518 __ mov(caller_args_count_reg,
519 Operand(ebp, ArgumentsAdaptorFrameConstants::kLengthOffset));
520 __ SmiUntag(caller_args_count_reg);
521
522 ParameterCount callee_args_count(args_reg);
523 __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
524 scratch3, scratch_count);
525 __ pop(scratch3);
526 __ pop(scratch2);
527 __ pop(scratch1);
528
529 __ bind(&done);
530 }
531
532 namespace {
533
AdjustStackPointerForTailCall(TurboAssembler * tasm,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)534 void AdjustStackPointerForTailCall(TurboAssembler* tasm,
535 FrameAccessState* state,
536 int new_slot_above_sp,
537 bool allow_shrinkage = true) {
538 int current_sp_offset = state->GetSPToFPSlotCount() +
539 StandardFrameConstants::kFixedSlotCountAboveFp;
540 int stack_slot_delta = new_slot_above_sp - current_sp_offset;
541 if (stack_slot_delta > 0) {
542 tasm->sub(esp, Immediate(stack_slot_delta * kPointerSize));
543 state->IncreaseSPDelta(stack_slot_delta);
544 } else if (allow_shrinkage && stack_slot_delta < 0) {
545 tasm->add(esp, Immediate(-stack_slot_delta * kPointerSize));
546 state->IncreaseSPDelta(stack_slot_delta);
547 }
548 }
549
550 } // namespace
551
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_stack_slot)552 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
553 int first_unused_stack_slot) {
554 CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
555 ZoneVector<MoveOperands*> pushes(zone());
556 GetPushCompatibleMoves(instr, flags, &pushes);
557
558 if (!pushes.empty() &&
559 (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
560 first_unused_stack_slot)) {
561 IA32OperandConverter g(this, instr);
562 for (auto move : pushes) {
563 LocationOperand destination_location(
564 LocationOperand::cast(move->destination()));
565 InstructionOperand source(move->source());
566 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
567 destination_location.index());
568 if (source.IsStackSlot()) {
569 LocationOperand source_location(LocationOperand::cast(source));
570 __ push(g.SlotToOperand(source_location.index()));
571 } else if (source.IsRegister()) {
572 LocationOperand source_location(LocationOperand::cast(source));
573 __ push(source_location.GetRegister());
574 } else if (source.IsImmediate()) {
575 __ push(Immediate(ImmediateOperand::cast(source).inline_value()));
576 } else {
577 // Pushes of non-scalar data types is not supported.
578 UNIMPLEMENTED();
579 }
580 frame_access_state()->IncreaseSPDelta(1);
581 move->Eliminate();
582 }
583 }
584 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
585 first_unused_stack_slot, false);
586 }
587
AssembleTailCallAfterGap(Instruction * instr,int first_unused_stack_slot)588 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
589 int first_unused_stack_slot) {
590 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
591 first_unused_stack_slot);
592 }
593
594 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()595 void CodeGenerator::AssembleCodeStartRegisterCheck() {
596 __ push(eax); // Push eax so we can use it as a scratch register.
597 __ ComputeCodeStartAddress(eax);
598 __ cmp(eax, kJavaScriptCallCodeStartRegister);
599 __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
600 __ pop(eax); // Restore eax.
601 }
602
603 // Check if the code object is marked for deoptimization. If it is, then it
604 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
605 // to:
606 // 1. read from memory the word that contains that bit, which can be found in
607 // the flags in the referenced {CodeDataContainer} object;
608 // 2. test kMarkedForDeoptimizationBit in those flags; and
609 // 3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()610 void CodeGenerator::BailoutIfDeoptimized() {
611 int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
612 __ mov(ebx, Operand(kJavaScriptCallCodeStartRegister, offset));
613 __ test(FieldOperand(ebx, CodeDataContainer::kKindSpecificFlagsOffset),
614 Immediate(1 << Code::kMarkedForDeoptimizationBit));
615 // Ensure we're not serializing (otherwise we'd need to use an indirection to
616 // access the builtin below).
617 DCHECK(!isolate()->ShouldLoadConstantsFromRootList());
618 Handle<Code> code = isolate()->builtins()->builtin_handle(
619 Builtins::kCompileLazyDeoptimizedCode);
620 __ j(not_zero, code, RelocInfo::CODE_TARGET);
621 }
622
GenerateSpeculationPoisonFromCodeStartRegister()623 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
624 __ push(eax); // Push eax so we can use it as a scratch register.
625
626 // Set a mask which has all bits set in the normal case, but has all
627 // bits cleared if we are speculatively executing the wrong PC.
628 __ ComputeCodeStartAddress(eax);
629 __ mov(kSpeculationPoisonRegister, Immediate(0));
630 __ cmp(kJavaScriptCallCodeStartRegister, eax);
631 __ mov(eax, Immediate(-1));
632 __ cmov(equal, kSpeculationPoisonRegister, eax);
633
634 __ pop(eax); // Restore eax.
635 }
636
AssembleRegisterArgumentPoisoning()637 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
638 __ and_(kJSFunctionRegister, kSpeculationPoisonRegister);
639 __ and_(kContextRegister, kSpeculationPoisonRegister);
640 __ and_(esp, kSpeculationPoisonRegister);
641 }
642
643 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)644 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
645 Instruction* instr) {
646 IA32OperandConverter i(this, instr);
647 InstructionCode opcode = instr->opcode();
648 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
649 switch (arch_opcode) {
650 case kArchCallCodeObject: {
651 MoveOperandIfAliasedWithPoisonRegister(instr, this);
652 if (HasImmediateInput(instr, 0)) {
653 Handle<Code> code = i.InputCode(0);
654 __ call(code, RelocInfo::CODE_TARGET);
655 } else {
656 Register reg = i.InputRegister(0);
657 DCHECK_IMPLIES(
658 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
659 reg == kJavaScriptCallCodeStartRegister);
660 __ add(reg, Immediate(Code::kHeaderSize - kHeapObjectTag));
661 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
662 __ RetpolineCall(reg);
663 } else {
664 __ call(reg);
665 }
666 }
667 RecordCallPosition(instr);
668 frame_access_state()->ClearSPDelta();
669 break;
670 }
671 case kArchCallWasmFunction: {
672 MoveOperandIfAliasedWithPoisonRegister(instr, this);
673 if (HasImmediateInput(instr, 0)) {
674 Constant constant = i.ToConstant(instr->InputAt(0));
675 Address wasm_code = static_cast<Address>(constant.ToInt32());
676 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
677 __ wasm_call(wasm_code, constant.rmode());
678 } else {
679 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
680 __ RetpolineCall(wasm_code, constant.rmode());
681 } else {
682 __ call(wasm_code, constant.rmode());
683 }
684 }
685 } else {
686 Register reg = i.InputRegister(0);
687 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
688 __ RetpolineCall(reg);
689 } else {
690 __ call(reg);
691 }
692 }
693 RecordCallPosition(instr);
694 frame_access_state()->ClearSPDelta();
695 break;
696 }
697 case kArchTailCallCodeObjectFromJSFunction:
698 case kArchTailCallCodeObject: {
699 MoveOperandIfAliasedWithPoisonRegister(instr, this);
700 if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
701 AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
702 no_reg, no_reg, no_reg);
703 }
704 if (HasImmediateInput(instr, 0)) {
705 Handle<Code> code = i.InputCode(0);
706 __ jmp(code, RelocInfo::CODE_TARGET);
707 } else {
708 Register reg = i.InputRegister(0);
709 DCHECK_IMPLIES(
710 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
711 reg == kJavaScriptCallCodeStartRegister);
712 __ add(reg, Immediate(Code::kHeaderSize - kHeapObjectTag));
713 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
714 __ RetpolineJump(reg);
715 } else {
716 __ jmp(reg);
717 }
718 }
719 frame_access_state()->ClearSPDelta();
720 frame_access_state()->SetFrameAccessToDefault();
721 break;
722 }
723 case kArchTailCallWasm: {
724 MoveOperandIfAliasedWithPoisonRegister(instr, this);
725 if (HasImmediateInput(instr, 0)) {
726 Constant constant = i.ToConstant(instr->InputAt(0));
727 Address wasm_code = static_cast<Address>(constant.ToInt32());
728 __ jmp(wasm_code, constant.rmode());
729 } else {
730 Register reg = i.InputRegister(0);
731 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
732 __ RetpolineJump(reg);
733 } else {
734 __ jmp(reg);
735 }
736 }
737 frame_access_state()->ClearSPDelta();
738 frame_access_state()->SetFrameAccessToDefault();
739 break;
740 }
741 case kArchTailCallAddress: {
742 MoveOperandIfAliasedWithPoisonRegister(instr, this);
743 CHECK(!HasImmediateInput(instr, 0));
744 Register reg = i.InputRegister(0);
745 DCHECK_IMPLIES(
746 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
747 reg == kJavaScriptCallCodeStartRegister);
748 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
749 __ RetpolineJump(reg);
750 } else {
751 __ jmp(reg);
752 }
753 frame_access_state()->ClearSPDelta();
754 frame_access_state()->SetFrameAccessToDefault();
755 break;
756 }
757 case kArchCallJSFunction: {
758 MoveOperandIfAliasedWithPoisonRegister(instr, this);
759 Register func = i.InputRegister(0);
760 if (FLAG_debug_code) {
761 // Check the function's context matches the context argument.
762 __ cmp(esi, FieldOperand(func, JSFunction::kContextOffset));
763 __ Assert(equal, AbortReason::kWrongFunctionContext);
764 }
765 static_assert(kJavaScriptCallCodeStartRegister == ecx, "ABI mismatch");
766 __ mov(ecx, FieldOperand(func, JSFunction::kCodeOffset));
767 __ add(ecx, Immediate(Code::kHeaderSize - kHeapObjectTag));
768 __ call(ecx);
769 RecordCallPosition(instr);
770 frame_access_state()->ClearSPDelta();
771 break;
772 }
773 case kArchPrepareCallCFunction: {
774 // Frame alignment requires using FP-relative frame addressing.
775 frame_access_state()->SetFrameAccessToFP();
776 int const num_parameters = MiscField::decode(instr->opcode());
777 __ PrepareCallCFunction(num_parameters, i.TempRegister(0));
778 break;
779 }
780 case kArchSaveCallerRegisters: {
781 fp_mode_ =
782 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
783 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
784 // kReturnRegister0 should have been saved before entering the stub.
785 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
786 DCHECK_EQ(0, bytes % kPointerSize);
787 DCHECK_EQ(0, frame_access_state()->sp_delta());
788 frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
789 DCHECK(!caller_registers_saved_);
790 caller_registers_saved_ = true;
791 break;
792 }
793 case kArchRestoreCallerRegisters: {
794 DCHECK(fp_mode_ ==
795 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
796 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
797 // Don't overwrite the returned value.
798 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
799 frame_access_state()->IncreaseSPDelta(-(bytes / kPointerSize));
800 DCHECK_EQ(0, frame_access_state()->sp_delta());
801 DCHECK(caller_registers_saved_);
802 caller_registers_saved_ = false;
803 break;
804 }
805 case kArchPrepareTailCall:
806 AssemblePrepareTailCall();
807 break;
808 case kArchCallCFunction: {
809 MoveOperandIfAliasedWithPoisonRegister(instr, this);
810 int const num_parameters = MiscField::decode(instr->opcode());
811 if (HasImmediateInput(instr, 0)) {
812 ExternalReference ref = i.InputExternalReference(0);
813 __ CallCFunction(ref, num_parameters);
814 } else {
815 Register func = i.InputRegister(0);
816 __ CallCFunction(func, num_parameters);
817 }
818 frame_access_state()->SetFrameAccessToDefault();
819 // Ideally, we should decrement SP delta to match the change of stack
820 // pointer in CallCFunction. However, for certain architectures (e.g.
821 // ARM), there may be more strict alignment requirement, causing old SP
822 // to be saved on the stack. In those cases, we can not calculate the SP
823 // delta statically.
824 frame_access_state()->ClearSPDelta();
825 if (caller_registers_saved_) {
826 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
827 // Here, we assume the sequence to be:
828 // kArchSaveCallerRegisters;
829 // kArchCallCFunction;
830 // kArchRestoreCallerRegisters;
831 int bytes =
832 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
833 frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
834 }
835 break;
836 }
837 case kArchJmp:
838 AssembleArchJump(i.InputRpo(0));
839 break;
840 case kArchBinarySearchSwitch:
841 AssembleArchBinarySearchSwitch(instr);
842 break;
843 case kArchLookupSwitch:
844 AssembleArchLookupSwitch(instr);
845 break;
846 case kArchTableSwitch:
847 AssembleArchTableSwitch(instr);
848 break;
849 case kArchComment:
850 __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
851 break;
852 case kArchDebugAbort:
853 DCHECK(i.InputRegister(0) == edx);
854 if (!frame_access_state()->has_frame()) {
855 // We don't actually want to generate a pile of code for this, so just
856 // claim there is a stack frame, without generating one.
857 FrameScope scope(tasm(), StackFrame::NONE);
858 __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
859 RelocInfo::CODE_TARGET);
860 } else {
861 __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
862 RelocInfo::CODE_TARGET);
863 }
864 __ int3();
865 break;
866 case kArchDebugBreak:
867 __ int3();
868 break;
869 case kArchNop:
870 case kArchThrowTerminator:
871 // don't emit code for nops.
872 break;
873 case kArchDeoptimize: {
874 int deopt_state_id =
875 BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
876 CodeGenResult result =
877 AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
878 if (result != kSuccess) return result;
879 break;
880 }
881 case kArchRet:
882 AssembleReturn(instr->InputAt(0));
883 break;
884 case kArchStackPointer:
885 __ mov(i.OutputRegister(), esp);
886 break;
887 case kArchFramePointer:
888 __ mov(i.OutputRegister(), ebp);
889 break;
890 case kArchParentFramePointer:
891 if (frame_access_state()->has_frame()) {
892 __ mov(i.OutputRegister(), Operand(ebp, 0));
893 } else {
894 __ mov(i.OutputRegister(), ebp);
895 }
896 break;
897 case kArchTruncateDoubleToI: {
898 auto result = i.OutputRegister();
899 auto input = i.InputDoubleRegister(0);
900 auto ool = new (zone()) OutOfLineTruncateDoubleToI(
901 this, result, input, DetermineStubCallMode());
902 __ cvttsd2si(result, Operand(input));
903 __ cmp(result, 1);
904 __ j(overflow, ool->entry());
905 __ bind(ool->exit());
906 break;
907 }
908 case kArchStoreWithWriteBarrier: {
909 RecordWriteMode mode =
910 static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
911 Register object = i.InputRegister(0);
912 size_t index = 0;
913 Operand operand = i.MemoryOperand(&index);
914 Register value = i.InputRegister(index);
915 Register scratch0 = i.TempRegister(0);
916 Register scratch1 = i.TempRegister(1);
917 auto ool = new (zone()) OutOfLineRecordWrite(this, object, operand, value,
918 scratch0, scratch1, mode);
919 __ mov(operand, value);
920 __ CheckPageFlag(object, scratch0,
921 MemoryChunk::kPointersFromHereAreInterestingMask,
922 not_zero, ool->entry());
923 __ bind(ool->exit());
924 break;
925 }
926 case kArchStackSlot: {
927 FrameOffset offset =
928 frame_access_state()->GetFrameOffset(i.InputInt32(0));
929 Register base = offset.from_stack_pointer() ? esp : ebp;
930 __ lea(i.OutputRegister(), Operand(base, offset.offset()));
931 break;
932 }
933 case kIeee754Float64Acos:
934 ASSEMBLE_IEEE754_UNOP(acos);
935 break;
936 case kIeee754Float64Acosh:
937 ASSEMBLE_IEEE754_UNOP(acosh);
938 break;
939 case kIeee754Float64Asin:
940 ASSEMBLE_IEEE754_UNOP(asin);
941 break;
942 case kIeee754Float64Asinh:
943 ASSEMBLE_IEEE754_UNOP(asinh);
944 break;
945 case kIeee754Float64Atan:
946 ASSEMBLE_IEEE754_UNOP(atan);
947 break;
948 case kIeee754Float64Atanh:
949 ASSEMBLE_IEEE754_UNOP(atanh);
950 break;
951 case kIeee754Float64Atan2:
952 ASSEMBLE_IEEE754_BINOP(atan2);
953 break;
954 case kIeee754Float64Cbrt:
955 ASSEMBLE_IEEE754_UNOP(cbrt);
956 break;
957 case kIeee754Float64Cos:
958 ASSEMBLE_IEEE754_UNOP(cos);
959 break;
960 case kIeee754Float64Cosh:
961 ASSEMBLE_IEEE754_UNOP(cosh);
962 break;
963 case kIeee754Float64Expm1:
964 ASSEMBLE_IEEE754_UNOP(expm1);
965 break;
966 case kIeee754Float64Exp:
967 ASSEMBLE_IEEE754_UNOP(exp);
968 break;
969 case kIeee754Float64Log:
970 ASSEMBLE_IEEE754_UNOP(log);
971 break;
972 case kIeee754Float64Log1p:
973 ASSEMBLE_IEEE754_UNOP(log1p);
974 break;
975 case kIeee754Float64Log2:
976 ASSEMBLE_IEEE754_UNOP(log2);
977 break;
978 case kIeee754Float64Log10:
979 ASSEMBLE_IEEE754_UNOP(log10);
980 break;
981 case kIeee754Float64Pow: {
982 // TODO(bmeurer): Improve integration of the stub.
983 if (i.InputDoubleRegister(1) != xmm2) {
984 __ movaps(xmm2, i.InputDoubleRegister(0));
985 __ movaps(xmm1, i.InputDoubleRegister(1));
986 } else {
987 __ movaps(xmm0, i.InputDoubleRegister(0));
988 __ movaps(xmm1, xmm2);
989 __ movaps(xmm2, xmm0);
990 }
991 __ Call(BUILTIN_CODE(isolate(), MathPowInternal), RelocInfo::CODE_TARGET);
992 __ movaps(i.OutputDoubleRegister(), xmm3);
993 break;
994 }
995 case kIeee754Float64Sin:
996 ASSEMBLE_IEEE754_UNOP(sin);
997 break;
998 case kIeee754Float64Sinh:
999 ASSEMBLE_IEEE754_UNOP(sinh);
1000 break;
1001 case kIeee754Float64Tan:
1002 ASSEMBLE_IEEE754_UNOP(tan);
1003 break;
1004 case kIeee754Float64Tanh:
1005 ASSEMBLE_IEEE754_UNOP(tanh);
1006 break;
1007 case kIA32Add:
1008 ASSEMBLE_BINOP(add);
1009 break;
1010 case kIA32And:
1011 ASSEMBLE_BINOP(and_);
1012 break;
1013 case kIA32Cmp:
1014 ASSEMBLE_COMPARE(cmp);
1015 break;
1016 case kIA32Cmp16:
1017 ASSEMBLE_COMPARE(cmpw);
1018 break;
1019 case kIA32Cmp8:
1020 ASSEMBLE_COMPARE(cmpb);
1021 break;
1022 case kIA32Test:
1023 ASSEMBLE_COMPARE(test);
1024 break;
1025 case kIA32Test16:
1026 ASSEMBLE_COMPARE(test_w);
1027 break;
1028 case kIA32Test8:
1029 ASSEMBLE_COMPARE(test_b);
1030 break;
1031 case kIA32Imul:
1032 if (HasImmediateInput(instr, 1)) {
1033 __ imul(i.OutputRegister(), i.InputOperand(0), i.InputInt32(1));
1034 } else {
1035 __ imul(i.OutputRegister(), i.InputOperand(1));
1036 }
1037 break;
1038 case kIA32ImulHigh:
1039 __ imul(i.InputRegister(1));
1040 break;
1041 case kIA32UmulHigh:
1042 __ mul(i.InputRegister(1));
1043 break;
1044 case kIA32Idiv:
1045 __ cdq();
1046 __ idiv(i.InputOperand(1));
1047 break;
1048 case kIA32Udiv:
1049 __ Move(edx, Immediate(0));
1050 __ div(i.InputOperand(1));
1051 break;
1052 case kIA32Not:
1053 __ not_(i.OutputOperand());
1054 break;
1055 case kIA32Neg:
1056 __ neg(i.OutputOperand());
1057 break;
1058 case kIA32Or:
1059 ASSEMBLE_BINOP(or_);
1060 break;
1061 case kIA32Xor:
1062 ASSEMBLE_BINOP(xor_);
1063 break;
1064 case kIA32Sub:
1065 ASSEMBLE_BINOP(sub);
1066 break;
1067 case kIA32Shl:
1068 if (HasImmediateInput(instr, 1)) {
1069 __ shl(i.OutputOperand(), i.InputInt5(1));
1070 } else {
1071 __ shl_cl(i.OutputOperand());
1072 }
1073 break;
1074 case kIA32Shr:
1075 if (HasImmediateInput(instr, 1)) {
1076 __ shr(i.OutputOperand(), i.InputInt5(1));
1077 } else {
1078 __ shr_cl(i.OutputOperand());
1079 }
1080 break;
1081 case kIA32Sar:
1082 if (HasImmediateInput(instr, 1)) {
1083 __ sar(i.OutputOperand(), i.InputInt5(1));
1084 } else {
1085 __ sar_cl(i.OutputOperand());
1086 }
1087 break;
1088 case kIA32AddPair: {
1089 // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
1090 // i.InputRegister(1) ... left high word.
1091 // i.InputRegister(2) ... right low word.
1092 // i.InputRegister(3) ... right high word.
1093 bool use_temp = false;
1094 if (i.OutputRegister(0).code() == i.InputRegister(1).code() ||
1095 i.OutputRegister(0).code() == i.InputRegister(3).code()) {
1096 // We cannot write to the output register directly, because it would
1097 // overwrite an input for adc. We have to use the temp register.
1098 use_temp = true;
1099 __ Move(i.TempRegister(0), i.InputRegister(0));
1100 __ add(i.TempRegister(0), i.InputRegister(2));
1101 } else {
1102 __ add(i.OutputRegister(0), i.InputRegister(2));
1103 }
1104 if (i.OutputRegister(1).code() != i.InputRegister(1).code()) {
1105 __ Move(i.OutputRegister(1), i.InputRegister(1));
1106 }
1107 __ adc(i.OutputRegister(1), Operand(i.InputRegister(3)));
1108 if (use_temp) {
1109 __ Move(i.OutputRegister(0), i.TempRegister(0));
1110 }
1111 break;
1112 }
1113 case kIA32SubPair: {
1114 // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
1115 // i.InputRegister(1) ... left high word.
1116 // i.InputRegister(2) ... right low word.
1117 // i.InputRegister(3) ... right high word.
1118 bool use_temp = false;
1119 if (i.OutputRegister(0).code() == i.InputRegister(1).code() ||
1120 i.OutputRegister(0).code() == i.InputRegister(3).code()) {
1121 // We cannot write to the output register directly, because it would
1122 // overwrite an input for adc. We have to use the temp register.
1123 use_temp = true;
1124 __ Move(i.TempRegister(0), i.InputRegister(0));
1125 __ sub(i.TempRegister(0), i.InputRegister(2));
1126 } else {
1127 __ sub(i.OutputRegister(0), i.InputRegister(2));
1128 }
1129 if (i.OutputRegister(1).code() != i.InputRegister(1).code()) {
1130 __ Move(i.OutputRegister(1), i.InputRegister(1));
1131 }
1132 __ sbb(i.OutputRegister(1), Operand(i.InputRegister(3)));
1133 if (use_temp) {
1134 __ Move(i.OutputRegister(0), i.TempRegister(0));
1135 }
1136 break;
1137 }
1138 case kIA32MulPair: {
1139 __ imul(i.OutputRegister(1), i.InputOperand(0));
1140 __ mov(i.TempRegister(0), i.InputOperand(1));
1141 __ imul(i.TempRegister(0), i.InputOperand(2));
1142 __ add(i.OutputRegister(1), i.TempRegister(0));
1143 __ mov(i.OutputRegister(0), i.InputOperand(0));
1144 // Multiplies the low words and stores them in eax and edx.
1145 __ mul(i.InputRegister(2));
1146 __ add(i.OutputRegister(1), i.TempRegister(0));
1147
1148 break;
1149 }
1150 case kIA32ShlPair:
1151 if (HasImmediateInput(instr, 2)) {
1152 __ ShlPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1153 } else {
1154 // Shift has been loaded into CL by the register allocator.
1155 __ ShlPair_cl(i.InputRegister(1), i.InputRegister(0));
1156 }
1157 break;
1158 case kIA32ShrPair:
1159 if (HasImmediateInput(instr, 2)) {
1160 __ ShrPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1161 } else {
1162 // Shift has been loaded into CL by the register allocator.
1163 __ ShrPair_cl(i.InputRegister(1), i.InputRegister(0));
1164 }
1165 break;
1166 case kIA32SarPair:
1167 if (HasImmediateInput(instr, 2)) {
1168 __ SarPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1169 } else {
1170 // Shift has been loaded into CL by the register allocator.
1171 __ SarPair_cl(i.InputRegister(1), i.InputRegister(0));
1172 }
1173 break;
1174 case kIA32Ror:
1175 if (HasImmediateInput(instr, 1)) {
1176 __ ror(i.OutputOperand(), i.InputInt5(1));
1177 } else {
1178 __ ror_cl(i.OutputOperand());
1179 }
1180 break;
1181 case kIA32Lzcnt:
1182 __ Lzcnt(i.OutputRegister(), i.InputOperand(0));
1183 break;
1184 case kIA32Tzcnt:
1185 __ Tzcnt(i.OutputRegister(), i.InputOperand(0));
1186 break;
1187 case kIA32Popcnt:
1188 __ Popcnt(i.OutputRegister(), i.InputOperand(0));
1189 break;
1190 case kIA32Bswap:
1191 __ bswap(i.OutputRegister());
1192 break;
1193 case kArchWordPoisonOnSpeculation:
1194 DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
1195 __ and_(i.InputRegister(0), kSpeculationPoisonRegister);
1196 break;
1197 case kLFence:
1198 __ lfence();
1199 break;
1200 case kSSEFloat32Cmp:
1201 __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1202 break;
1203 case kSSEFloat32Add:
1204 __ addss(i.InputDoubleRegister(0), i.InputOperand(1));
1205 break;
1206 case kSSEFloat32Sub:
1207 __ subss(i.InputDoubleRegister(0), i.InputOperand(1));
1208 break;
1209 case kSSEFloat32Mul:
1210 __ mulss(i.InputDoubleRegister(0), i.InputOperand(1));
1211 break;
1212 case kSSEFloat32Div:
1213 __ divss(i.InputDoubleRegister(0), i.InputOperand(1));
1214 // Don't delete this mov. It may improve performance on some CPUs,
1215 // when there is a (v)mulss depending on the result.
1216 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1217 break;
1218 case kSSEFloat32Sqrt:
1219 __ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
1220 break;
1221 case kSSEFloat32Abs: {
1222 // TODO(bmeurer): Use 128-bit constants.
1223 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1224 __ psrlq(kScratchDoubleReg, 33);
1225 __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
1226 break;
1227 }
1228 case kSSEFloat32Neg: {
1229 // TODO(bmeurer): Use 128-bit constants.
1230 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1231 __ psllq(kScratchDoubleReg, 31);
1232 __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
1233 break;
1234 }
1235 case kSSEFloat32Round: {
1236 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1237 RoundingMode const mode =
1238 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1239 __ roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1240 break;
1241 }
1242 case kSSEFloat64Cmp:
1243 __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1244 break;
1245 case kSSEFloat64Add:
1246 __ addsd(i.InputDoubleRegister(0), i.InputOperand(1));
1247 break;
1248 case kSSEFloat64Sub:
1249 __ subsd(i.InputDoubleRegister(0), i.InputOperand(1));
1250 break;
1251 case kSSEFloat64Mul:
1252 __ mulsd(i.InputDoubleRegister(0), i.InputOperand(1));
1253 break;
1254 case kSSEFloat64Div:
1255 __ divsd(i.InputDoubleRegister(0), i.InputOperand(1));
1256 // Don't delete this mov. It may improve performance on some CPUs,
1257 // when there is a (v)mulsd depending on the result.
1258 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1259 break;
1260 case kSSEFloat32Max: {
1261 Label compare_nan, compare_swap, done_compare;
1262 if (instr->InputAt(1)->IsFPRegister()) {
1263 __ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1264 } else {
1265 __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1266 }
1267 auto ool =
1268 new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1269 __ j(parity_even, ool->entry());
1270 __ j(above, &done_compare, Label::kNear);
1271 __ j(below, &compare_swap, Label::kNear);
1272 __ movmskps(i.TempRegister(0), i.InputDoubleRegister(0));
1273 __ test(i.TempRegister(0), Immediate(1));
1274 __ j(zero, &done_compare, Label::kNear);
1275 __ bind(&compare_swap);
1276 if (instr->InputAt(1)->IsFPRegister()) {
1277 __ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1278 } else {
1279 __ movss(i.InputDoubleRegister(0), i.InputOperand(1));
1280 }
1281 __ bind(&done_compare);
1282 __ bind(ool->exit());
1283 break;
1284 }
1285
1286 case kSSEFloat64Max: {
1287 Label compare_nan, compare_swap, done_compare;
1288 if (instr->InputAt(1)->IsFPRegister()) {
1289 __ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1290 } else {
1291 __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1292 }
1293 auto ool =
1294 new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1295 __ j(parity_even, ool->entry());
1296 __ j(above, &done_compare, Label::kNear);
1297 __ j(below, &compare_swap, Label::kNear);
1298 __ movmskpd(i.TempRegister(0), i.InputDoubleRegister(0));
1299 __ test(i.TempRegister(0), Immediate(1));
1300 __ j(zero, &done_compare, Label::kNear);
1301 __ bind(&compare_swap);
1302 if (instr->InputAt(1)->IsFPRegister()) {
1303 __ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1304 } else {
1305 __ movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1306 }
1307 __ bind(&done_compare);
1308 __ bind(ool->exit());
1309 break;
1310 }
1311 case kSSEFloat32Min: {
1312 Label compare_swap, done_compare;
1313 if (instr->InputAt(1)->IsFPRegister()) {
1314 __ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1315 } else {
1316 __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1317 }
1318 auto ool =
1319 new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1320 __ j(parity_even, ool->entry());
1321 __ j(below, &done_compare, Label::kNear);
1322 __ j(above, &compare_swap, Label::kNear);
1323 if (instr->InputAt(1)->IsFPRegister()) {
1324 __ movmskps(i.TempRegister(0), i.InputDoubleRegister(1));
1325 } else {
1326 __ movss(kScratchDoubleReg, i.InputOperand(1));
1327 __ movmskps(i.TempRegister(0), kScratchDoubleReg);
1328 }
1329 __ test(i.TempRegister(0), Immediate(1));
1330 __ j(zero, &done_compare, Label::kNear);
1331 __ bind(&compare_swap);
1332 if (instr->InputAt(1)->IsFPRegister()) {
1333 __ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1334 } else {
1335 __ movss(i.InputDoubleRegister(0), i.InputOperand(1));
1336 }
1337 __ bind(&done_compare);
1338 __ bind(ool->exit());
1339 break;
1340 }
1341 case kSSEFloat64Min: {
1342 Label compare_swap, done_compare;
1343 if (instr->InputAt(1)->IsFPRegister()) {
1344 __ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1345 } else {
1346 __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1347 }
1348 auto ool =
1349 new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1350 __ j(parity_even, ool->entry());
1351 __ j(below, &done_compare, Label::kNear);
1352 __ j(above, &compare_swap, Label::kNear);
1353 if (instr->InputAt(1)->IsFPRegister()) {
1354 __ movmskpd(i.TempRegister(0), i.InputDoubleRegister(1));
1355 } else {
1356 __ movsd(kScratchDoubleReg, i.InputOperand(1));
1357 __ movmskpd(i.TempRegister(0), kScratchDoubleReg);
1358 }
1359 __ test(i.TempRegister(0), Immediate(1));
1360 __ j(zero, &done_compare, Label::kNear);
1361 __ bind(&compare_swap);
1362 if (instr->InputAt(1)->IsFPRegister()) {
1363 __ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1364 } else {
1365 __ movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1366 }
1367 __ bind(&done_compare);
1368 __ bind(ool->exit());
1369 break;
1370 }
1371 case kSSEFloat64Mod: {
1372 // TODO(dcarney): alignment is wrong.
1373 __ sub(esp, Immediate(kDoubleSize));
1374 // Move values to st(0) and st(1).
1375 __ movsd(Operand(esp, 0), i.InputDoubleRegister(1));
1376 __ fld_d(Operand(esp, 0));
1377 __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
1378 __ fld_d(Operand(esp, 0));
1379 // Loop while fprem isn't done.
1380 Label mod_loop;
1381 __ bind(&mod_loop);
1382 // This instructions traps on all kinds inputs, but we are assuming the
1383 // floating point control word is set to ignore them all.
1384 __ fprem();
1385 // The following 2 instruction implicitly use eax.
1386 __ fnstsw_ax();
1387 __ sahf();
1388 __ j(parity_even, &mod_loop);
1389 // Move output to stack and clean up.
1390 __ fstp(1);
1391 __ fstp_d(Operand(esp, 0));
1392 __ movsd(i.OutputDoubleRegister(), Operand(esp, 0));
1393 __ add(esp, Immediate(kDoubleSize));
1394 break;
1395 }
1396 case kSSEFloat64Abs: {
1397 // TODO(bmeurer): Use 128-bit constants.
1398 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1399 __ psrlq(kScratchDoubleReg, 1);
1400 __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1401 break;
1402 }
1403 case kSSEFloat64Neg: {
1404 // TODO(bmeurer): Use 128-bit constants.
1405 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1406 __ psllq(kScratchDoubleReg, 63);
1407 __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1408 break;
1409 }
1410 case kSSEFloat64Sqrt:
1411 __ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
1412 break;
1413 case kSSEFloat64Round: {
1414 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1415 RoundingMode const mode =
1416 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1417 __ roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1418 break;
1419 }
1420 case kSSEFloat32ToFloat64:
1421 __ cvtss2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1422 break;
1423 case kSSEFloat64ToFloat32:
1424 __ cvtsd2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1425 break;
1426 case kSSEFloat32ToInt32:
1427 __ cvttss2si(i.OutputRegister(), i.InputOperand(0));
1428 break;
1429 case kSSEFloat32ToUint32:
1430 __ Cvttss2ui(i.OutputRegister(), i.InputOperand(0), kScratchDoubleReg);
1431 break;
1432 case kSSEFloat64ToInt32:
1433 __ cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1434 break;
1435 case kSSEFloat64ToUint32:
1436 __ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0), kScratchDoubleReg);
1437 break;
1438 case kSSEInt32ToFloat32:
1439 __ cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1440 break;
1441 case kSSEUint32ToFloat32:
1442 __ Cvtui2ss(i.OutputDoubleRegister(), i.InputOperand(0),
1443 i.TempRegister(0));
1444 break;
1445 case kSSEInt32ToFloat64:
1446 __ cvtsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1447 break;
1448 case kSSEUint32ToFloat64:
1449 __ Cvtui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1450 break;
1451 case kSSEFloat64ExtractLowWord32:
1452 if (instr->InputAt(0)->IsFPStackSlot()) {
1453 __ mov(i.OutputRegister(), i.InputOperand(0));
1454 } else {
1455 __ movd(i.OutputRegister(), i.InputDoubleRegister(0));
1456 }
1457 break;
1458 case kSSEFloat64ExtractHighWord32:
1459 if (instr->InputAt(0)->IsFPStackSlot()) {
1460 __ mov(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1461 } else {
1462 __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1463 }
1464 break;
1465 case kSSEFloat64InsertLowWord32:
1466 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0, true);
1467 break;
1468 case kSSEFloat64InsertHighWord32:
1469 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1, true);
1470 break;
1471 case kSSEFloat64LoadLowWord32:
1472 __ movd(i.OutputDoubleRegister(), i.InputOperand(0));
1473 break;
1474 case kAVXFloat32Add: {
1475 CpuFeatureScope avx_scope(tasm(), AVX);
1476 __ vaddss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1477 i.InputOperand(1));
1478 break;
1479 }
1480 case kAVXFloat32Sub: {
1481 CpuFeatureScope avx_scope(tasm(), AVX);
1482 __ vsubss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1483 i.InputOperand(1));
1484 break;
1485 }
1486 case kAVXFloat32Mul: {
1487 CpuFeatureScope avx_scope(tasm(), AVX);
1488 __ vmulss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1489 i.InputOperand(1));
1490 break;
1491 }
1492 case kAVXFloat32Div: {
1493 CpuFeatureScope avx_scope(tasm(), AVX);
1494 __ vdivss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1495 i.InputOperand(1));
1496 // Don't delete this mov. It may improve performance on some CPUs,
1497 // when there is a (v)mulss depending on the result.
1498 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1499 break;
1500 }
1501 case kAVXFloat64Add: {
1502 CpuFeatureScope avx_scope(tasm(), AVX);
1503 __ vaddsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1504 i.InputOperand(1));
1505 break;
1506 }
1507 case kAVXFloat64Sub: {
1508 CpuFeatureScope avx_scope(tasm(), AVX);
1509 __ vsubsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1510 i.InputOperand(1));
1511 break;
1512 }
1513 case kAVXFloat64Mul: {
1514 CpuFeatureScope avx_scope(tasm(), AVX);
1515 __ vmulsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1516 i.InputOperand(1));
1517 break;
1518 }
1519 case kAVXFloat64Div: {
1520 CpuFeatureScope avx_scope(tasm(), AVX);
1521 __ vdivsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1522 i.InputOperand(1));
1523 // Don't delete this mov. It may improve performance on some CPUs,
1524 // when there is a (v)mulsd depending on the result.
1525 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1526 break;
1527 }
1528 case kAVXFloat32Abs: {
1529 // TODO(bmeurer): Use RIP relative 128-bit constants.
1530 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1531 __ psrlq(kScratchDoubleReg, 33);
1532 CpuFeatureScope avx_scope(tasm(), AVX);
1533 __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
1534 break;
1535 }
1536 case kAVXFloat32Neg: {
1537 // TODO(bmeurer): Use RIP relative 128-bit constants.
1538 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1539 __ psllq(kScratchDoubleReg, 31);
1540 CpuFeatureScope avx_scope(tasm(), AVX);
1541 __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
1542 break;
1543 }
1544 case kAVXFloat64Abs: {
1545 // TODO(bmeurer): Use RIP relative 128-bit constants.
1546 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1547 __ psrlq(kScratchDoubleReg, 1);
1548 CpuFeatureScope avx_scope(tasm(), AVX);
1549 __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
1550 break;
1551 }
1552 case kAVXFloat64Neg: {
1553 // TODO(bmeurer): Use RIP relative 128-bit constants.
1554 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1555 __ psllq(kScratchDoubleReg, 63);
1556 CpuFeatureScope avx_scope(tasm(), AVX);
1557 __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
1558 break;
1559 }
1560 case kSSEFloat64SilenceNaN:
1561 __ xorpd(kScratchDoubleReg, kScratchDoubleReg);
1562 __ subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1563 break;
1564 case kIA32Movsxbl:
1565 ASSEMBLE_MOVX(movsx_b);
1566 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1567 break;
1568 case kIA32Movzxbl:
1569 ASSEMBLE_MOVX(movzx_b);
1570 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1571 break;
1572 case kIA32Movb: {
1573 size_t index = 0;
1574 Operand operand = i.MemoryOperand(&index);
1575 if (HasImmediateInput(instr, index)) {
1576 __ mov_b(operand, i.InputInt8(index));
1577 } else {
1578 __ mov_b(operand, i.InputRegister(index));
1579 }
1580 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1581 break;
1582 }
1583 case kIA32Movsxwl:
1584 ASSEMBLE_MOVX(movsx_w);
1585 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1586 break;
1587 case kIA32Movzxwl:
1588 ASSEMBLE_MOVX(movzx_w);
1589 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1590 break;
1591 case kIA32Movw: {
1592 size_t index = 0;
1593 Operand operand = i.MemoryOperand(&index);
1594 if (HasImmediateInput(instr, index)) {
1595 __ mov_w(operand, i.InputInt16(index));
1596 } else {
1597 __ mov_w(operand, i.InputRegister(index));
1598 }
1599 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1600 break;
1601 }
1602 case kIA32Movl:
1603 if (instr->HasOutput()) {
1604 __ mov(i.OutputRegister(), i.MemoryOperand());
1605 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1606 } else {
1607 size_t index = 0;
1608 Operand operand = i.MemoryOperand(&index);
1609 if (HasImmediateInput(instr, index)) {
1610 __ mov(operand, i.InputImmediate(index));
1611 } else {
1612 __ mov(operand, i.InputRegister(index));
1613 }
1614 }
1615 break;
1616 case kIA32Movsd:
1617 if (instr->HasOutput()) {
1618 __ movsd(i.OutputDoubleRegister(), i.MemoryOperand());
1619 } else {
1620 size_t index = 0;
1621 Operand operand = i.MemoryOperand(&index);
1622 __ movsd(operand, i.InputDoubleRegister(index));
1623 }
1624 break;
1625 case kIA32Movss:
1626 if (instr->HasOutput()) {
1627 __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
1628 } else {
1629 size_t index = 0;
1630 Operand operand = i.MemoryOperand(&index);
1631 __ movss(operand, i.InputDoubleRegister(index));
1632 }
1633 break;
1634 case kIA32Movdqu:
1635 if (instr->HasOutput()) {
1636 __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
1637 } else {
1638 size_t index = 0;
1639 Operand operand = i.MemoryOperand(&index);
1640 __ Movdqu(operand, i.InputSimd128Register(index));
1641 }
1642 break;
1643 case kIA32BitcastFI:
1644 if (instr->InputAt(0)->IsFPStackSlot()) {
1645 __ mov(i.OutputRegister(), i.InputOperand(0));
1646 } else {
1647 __ movd(i.OutputRegister(), i.InputDoubleRegister(0));
1648 }
1649 break;
1650 case kIA32BitcastIF:
1651 if (instr->InputAt(0)->IsRegister()) {
1652 __ movd(i.OutputDoubleRegister(), i.InputRegister(0));
1653 } else {
1654 __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
1655 }
1656 break;
1657 case kIA32Lea: {
1658 AddressingMode mode = AddressingModeField::decode(instr->opcode());
1659 // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
1660 // and addressing mode just happens to work out. The "addl"/"subl" forms
1661 // in these cases are faster based on measurements.
1662 if (mode == kMode_MI) {
1663 __ Move(i.OutputRegister(), Immediate(i.InputInt32(0)));
1664 } else if (i.InputRegister(0) == i.OutputRegister()) {
1665 if (mode == kMode_MRI) {
1666 int32_t constant_summand = i.InputInt32(1);
1667 if (constant_summand > 0) {
1668 __ add(i.OutputRegister(), Immediate(constant_summand));
1669 } else if (constant_summand < 0) {
1670 __ sub(i.OutputRegister(), Immediate(-constant_summand));
1671 }
1672 } else if (mode == kMode_MR1) {
1673 if (i.InputRegister(1) == i.OutputRegister()) {
1674 __ shl(i.OutputRegister(), 1);
1675 } else {
1676 __ add(i.OutputRegister(), i.InputRegister(1));
1677 }
1678 } else if (mode == kMode_M2) {
1679 __ shl(i.OutputRegister(), 1);
1680 } else if (mode == kMode_M4) {
1681 __ shl(i.OutputRegister(), 2);
1682 } else if (mode == kMode_M8) {
1683 __ shl(i.OutputRegister(), 3);
1684 } else {
1685 __ lea(i.OutputRegister(), i.MemoryOperand());
1686 }
1687 } else if (mode == kMode_MR1 &&
1688 i.InputRegister(1) == i.OutputRegister()) {
1689 __ add(i.OutputRegister(), i.InputRegister(0));
1690 } else {
1691 __ lea(i.OutputRegister(), i.MemoryOperand());
1692 }
1693 break;
1694 }
1695 case kIA32PushFloat32:
1696 if (instr->InputAt(0)->IsFPRegister()) {
1697 __ sub(esp, Immediate(kFloatSize));
1698 __ movss(Operand(esp, 0), i.InputDoubleRegister(0));
1699 frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1700 } else if (HasImmediateInput(instr, 0)) {
1701 __ Move(kScratchDoubleReg, i.InputFloat32(0));
1702 __ sub(esp, Immediate(kFloatSize));
1703 __ movss(Operand(esp, 0), kScratchDoubleReg);
1704 frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1705 } else {
1706 __ movss(kScratchDoubleReg, i.InputOperand(0));
1707 __ sub(esp, Immediate(kFloatSize));
1708 __ movss(Operand(esp, 0), kScratchDoubleReg);
1709 frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1710 }
1711 break;
1712 case kIA32PushFloat64:
1713 if (instr->InputAt(0)->IsFPRegister()) {
1714 __ sub(esp, Immediate(kDoubleSize));
1715 __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
1716 frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
1717 } else if (HasImmediateInput(instr, 0)) {
1718 __ Move(kScratchDoubleReg, i.InputDouble(0));
1719 __ sub(esp, Immediate(kDoubleSize));
1720 __ movsd(Operand(esp, 0), kScratchDoubleReg);
1721 frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
1722 } else {
1723 __ movsd(kScratchDoubleReg, i.InputOperand(0));
1724 __ sub(esp, Immediate(kDoubleSize));
1725 __ movsd(Operand(esp, 0), kScratchDoubleReg);
1726 frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
1727 }
1728 break;
1729 case kIA32PushSimd128:
1730 if (instr->InputAt(0)->IsFPRegister()) {
1731 __ sub(esp, Immediate(kSimd128Size));
1732 __ movups(Operand(esp, 0), i.InputSimd128Register(0));
1733 } else {
1734 __ movups(kScratchDoubleReg, i.InputOperand(0));
1735 __ sub(esp, Immediate(kSimd128Size));
1736 __ movups(Operand(esp, 0), kScratchDoubleReg);
1737 }
1738 frame_access_state()->IncreaseSPDelta(kSimd128Size / kPointerSize);
1739 break;
1740 case kIA32Push:
1741 if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
1742 size_t index = 0;
1743 Operand operand = i.MemoryOperand(&index);
1744 __ push(operand);
1745 frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1746 } else if (instr->InputAt(0)->IsFPRegister()) {
1747 __ sub(esp, Immediate(kFloatSize));
1748 __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
1749 frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1750 } else if (HasImmediateInput(instr, 0)) {
1751 __ push(i.InputImmediate(0));
1752 frame_access_state()->IncreaseSPDelta(1);
1753 } else {
1754 __ push(i.InputOperand(0));
1755 frame_access_state()->IncreaseSPDelta(1);
1756 }
1757 break;
1758 case kIA32Poke: {
1759 int slot = MiscField::decode(instr->opcode());
1760 if (HasImmediateInput(instr, 0)) {
1761 __ mov(Operand(esp, slot * kPointerSize), i.InputImmediate(0));
1762 } else {
1763 __ mov(Operand(esp, slot * kPointerSize), i.InputRegister(0));
1764 }
1765 break;
1766 }
1767 case kIA32Peek: {
1768 int reverse_slot = i.InputInt32(0) + 1;
1769 int offset =
1770 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1771 if (instr->OutputAt(0)->IsFPRegister()) {
1772 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1773 if (op->representation() == MachineRepresentation::kFloat64) {
1774 __ movsd(i.OutputDoubleRegister(), Operand(ebp, offset));
1775 } else {
1776 DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
1777 __ movss(i.OutputFloatRegister(), Operand(ebp, offset));
1778 }
1779 } else {
1780 __ mov(i.OutputRegister(), Operand(ebp, offset));
1781 }
1782 break;
1783 }
1784 case kSSEF32x4Splat: {
1785 DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1786 XMMRegister dst = i.OutputSimd128Register();
1787 __ shufps(dst, dst, 0x0);
1788 break;
1789 }
1790 case kAVXF32x4Splat: {
1791 CpuFeatureScope avx_scope(tasm(), AVX);
1792 XMMRegister src = i.InputFloatRegister(0);
1793 __ vshufps(i.OutputSimd128Register(), src, src, 0x0);
1794 break;
1795 }
1796 case kSSEF32x4ExtractLane: {
1797 DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1798 XMMRegister dst = i.OutputFloatRegister();
1799 int8_t lane = i.InputInt8(1);
1800 if (lane != 0) {
1801 DCHECK_LT(lane, 4);
1802 __ shufps(dst, dst, lane);
1803 }
1804 break;
1805 }
1806 case kAVXF32x4ExtractLane: {
1807 CpuFeatureScope avx_scope(tasm(), AVX);
1808 XMMRegister dst = i.OutputFloatRegister();
1809 XMMRegister src = i.InputSimd128Register(0);
1810 int8_t lane = i.InputInt8(1);
1811 if (lane == 0) {
1812 if (dst != src) __ vmovaps(dst, src);
1813 } else {
1814 DCHECK_LT(lane, 4);
1815 __ vshufps(dst, src, src, lane);
1816 }
1817 break;
1818 }
1819 case kSSEF32x4ReplaceLane: {
1820 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1821 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1822 __ insertps(i.OutputSimd128Register(), i.InputOperand(2),
1823 i.InputInt8(1) << 4);
1824 break;
1825 }
1826 case kAVXF32x4ReplaceLane: {
1827 CpuFeatureScope avx_scope(tasm(), AVX);
1828 __ vinsertps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1829 i.InputOperand(2), i.InputInt8(1) << 4);
1830 break;
1831 }
1832 case kIA32F32x4SConvertI32x4: {
1833 __ Cvtdq2ps(i.OutputSimd128Register(), i.InputOperand(0));
1834 break;
1835 }
1836 case kSSEF32x4UConvertI32x4: {
1837 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1838 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1839 XMMRegister dst = i.OutputSimd128Register();
1840 __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
1841 __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
1842 __ psubd(dst, kScratchDoubleReg); // get hi 16 bits
1843 __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
1844 __ psrld(dst, 1); // divide by 2 to get in unsigned range
1845 __ cvtdq2ps(dst, dst); // convert hi exactly
1846 __ addps(dst, dst); // double hi, exactly
1847 __ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
1848 break;
1849 }
1850 case kAVXF32x4UConvertI32x4: {
1851 CpuFeatureScope avx_scope(tasm(), AVX);
1852 XMMRegister dst = i.OutputSimd128Register();
1853 XMMRegister src = i.InputSimd128Register(0);
1854 __ vpxor(kScratchDoubleReg, kScratchDoubleReg,
1855 kScratchDoubleReg); // zeros
1856 __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, src,
1857 0x55); // get lo 16 bits
1858 __ vpsubd(dst, src, kScratchDoubleReg); // get hi 16 bits
1859 __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
1860 __ vpsrld(dst, dst, 1); // divide by 2 to get in unsigned range
1861 __ vcvtdq2ps(dst, dst); // convert hi exactly
1862 __ vaddps(dst, dst, dst); // double hi, exactly
1863 __ vaddps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
1864 break;
1865 }
1866 case kSSEF32x4Abs: {
1867 XMMRegister dst = i.OutputSimd128Register();
1868 Operand src = i.InputOperand(0);
1869 if (src.is_reg(dst)) {
1870 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1871 __ psrld(kScratchDoubleReg, 1);
1872 __ andps(dst, kScratchDoubleReg);
1873 } else {
1874 __ pcmpeqd(dst, dst);
1875 __ psrld(dst, 1);
1876 __ andps(dst, src);
1877 }
1878 break;
1879 }
1880 case kAVXF32x4Abs: {
1881 CpuFeatureScope avx_scope(tasm(), AVX);
1882 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1883 __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
1884 __ vandps(i.OutputSimd128Register(), kScratchDoubleReg,
1885 i.InputOperand(0));
1886 break;
1887 }
1888 case kSSEF32x4Neg: {
1889 XMMRegister dst = i.OutputSimd128Register();
1890 Operand src = i.InputOperand(0);
1891 if (src.is_reg(dst)) {
1892 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1893 __ pslld(kScratchDoubleReg, 31);
1894 __ xorps(dst, kScratchDoubleReg);
1895 } else {
1896 __ pcmpeqd(dst, dst);
1897 __ pslld(dst, 31);
1898 __ xorps(dst, src);
1899 }
1900 break;
1901 }
1902 case kAVXF32x4Neg: {
1903 CpuFeatureScope avx_scope(tasm(), AVX);
1904 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1905 __ vpslld(kScratchDoubleReg, kScratchDoubleReg, 31);
1906 __ vxorps(i.OutputSimd128Register(), kScratchDoubleReg,
1907 i.InputOperand(0));
1908 break;
1909 }
1910 case kIA32F32x4RecipApprox: {
1911 __ Rcpps(i.OutputSimd128Register(), i.InputOperand(0));
1912 break;
1913 }
1914 case kIA32F32x4RecipSqrtApprox: {
1915 __ Rsqrtps(i.OutputSimd128Register(), i.InputOperand(0));
1916 break;
1917 }
1918 case kSSEF32x4Add: {
1919 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1920 __ addps(i.OutputSimd128Register(), i.InputOperand(1));
1921 break;
1922 }
1923 case kAVXF32x4Add: {
1924 CpuFeatureScope avx_scope(tasm(), AVX);
1925 __ vaddps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1926 i.InputOperand(1));
1927 break;
1928 }
1929 case kSSEF32x4AddHoriz: {
1930 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1931 CpuFeatureScope sse_scope(tasm(), SSE3);
1932 __ haddps(i.OutputSimd128Register(), i.InputOperand(1));
1933 break;
1934 }
1935 case kAVXF32x4AddHoriz: {
1936 CpuFeatureScope avx_scope(tasm(), AVX);
1937 __ vhaddps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1938 i.InputOperand(1));
1939 break;
1940 }
1941 case kSSEF32x4Sub: {
1942 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1943 __ subps(i.OutputSimd128Register(), i.InputOperand(1));
1944 break;
1945 }
1946 case kAVXF32x4Sub: {
1947 CpuFeatureScope avx_scope(tasm(), AVX);
1948 __ vsubps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1949 i.InputOperand(1));
1950 break;
1951 }
1952 case kSSEF32x4Mul: {
1953 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1954 __ mulps(i.OutputSimd128Register(), i.InputOperand(1));
1955 break;
1956 }
1957 case kAVXF32x4Mul: {
1958 CpuFeatureScope avx_scope(tasm(), AVX);
1959 __ vmulps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1960 i.InputOperand(1));
1961 break;
1962 }
1963 case kSSEF32x4Min: {
1964 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1965 __ minps(i.OutputSimd128Register(), i.InputOperand(1));
1966 break;
1967 }
1968 case kAVXF32x4Min: {
1969 CpuFeatureScope avx_scope(tasm(), AVX);
1970 __ vminps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1971 i.InputOperand(1));
1972 break;
1973 }
1974 case kSSEF32x4Max: {
1975 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1976 __ maxps(i.OutputSimd128Register(), i.InputOperand(1));
1977 break;
1978 }
1979 case kAVXF32x4Max: {
1980 CpuFeatureScope avx_scope(tasm(), AVX);
1981 __ vmaxps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1982 i.InputOperand(1));
1983 break;
1984 }
1985 case kSSEF32x4Eq: {
1986 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1987 __ cmpeqps(i.OutputSimd128Register(), i.InputOperand(1));
1988 break;
1989 }
1990 case kAVXF32x4Eq: {
1991 CpuFeatureScope avx_scope(tasm(), AVX);
1992 __ vcmpeqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1993 i.InputOperand(1));
1994 break;
1995 }
1996 case kSSEF32x4Ne: {
1997 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1998 __ cmpneqps(i.OutputSimd128Register(), i.InputOperand(1));
1999 break;
2000 }
2001 case kAVXF32x4Ne: {
2002 CpuFeatureScope avx_scope(tasm(), AVX);
2003 __ vcmpneqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2004 i.InputOperand(1));
2005 break;
2006 }
2007 case kSSEF32x4Lt: {
2008 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2009 __ cmpltps(i.OutputSimd128Register(), i.InputOperand(1));
2010 break;
2011 }
2012 case kAVXF32x4Lt: {
2013 CpuFeatureScope avx_scope(tasm(), AVX);
2014 __ vcmpltps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2015 i.InputOperand(1));
2016 break;
2017 }
2018 case kSSEF32x4Le: {
2019 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2020 __ cmpleps(i.OutputSimd128Register(), i.InputOperand(1));
2021 break;
2022 }
2023 case kAVXF32x4Le: {
2024 CpuFeatureScope avx_scope(tasm(), AVX);
2025 __ vcmpleps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2026 i.InputOperand(1));
2027 break;
2028 }
2029 case kIA32I32x4Splat: {
2030 XMMRegister dst = i.OutputSimd128Register();
2031 __ Movd(dst, i.InputOperand(0));
2032 __ Pshufd(dst, dst, 0x0);
2033 break;
2034 }
2035 case kIA32I32x4ExtractLane: {
2036 __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2037 break;
2038 }
2039 case kSSEI32x4ReplaceLane: {
2040 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2041 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2042 __ pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2043 break;
2044 }
2045 case kAVXI32x4ReplaceLane: {
2046 CpuFeatureScope avx_scope(tasm(), AVX);
2047 __ vpinsrd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2048 i.InputOperand(2), i.InputInt8(1));
2049 break;
2050 }
2051 case kSSEI32x4SConvertF32x4: {
2052 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2053 XMMRegister dst = i.OutputSimd128Register();
2054 // NAN->0
2055 __ movaps(kScratchDoubleReg, dst);
2056 __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
2057 __ pand(dst, kScratchDoubleReg);
2058 // Set top bit if >= 0 (but not -0.0!)
2059 __ pxor(kScratchDoubleReg, dst);
2060 // Convert
2061 __ cvttps2dq(dst, dst);
2062 // Set top bit if >=0 is now < 0
2063 __ pand(kScratchDoubleReg, dst);
2064 __ psrad(kScratchDoubleReg, 31);
2065 // Set positive overflow lanes to 0x7FFFFFFF
2066 __ pxor(dst, kScratchDoubleReg);
2067 break;
2068 }
2069 case kAVXI32x4SConvertF32x4: {
2070 CpuFeatureScope avx_scope(tasm(), AVX);
2071 XMMRegister dst = i.OutputSimd128Register();
2072 XMMRegister src = i.InputSimd128Register(0);
2073 // NAN->0
2074 __ vcmpeqps(kScratchDoubleReg, src, src);
2075 __ vpand(dst, src, kScratchDoubleReg);
2076 // Set top bit if >= 0 (but not -0.0!)
2077 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, dst);
2078 // Convert
2079 __ vcvttps2dq(dst, dst);
2080 // Set top bit if >=0 is now < 0
2081 __ vpand(kScratchDoubleReg, kScratchDoubleReg, dst);
2082 __ vpsrad(kScratchDoubleReg, kScratchDoubleReg, 31);
2083 // Set positive overflow lanes to 0x7FFFFFFF
2084 __ vpxor(dst, dst, kScratchDoubleReg);
2085 break;
2086 }
2087 case kIA32I32x4SConvertI16x8Low: {
2088 __ Pmovsxwd(i.OutputSimd128Register(), i.InputOperand(0));
2089 break;
2090 }
2091 case kIA32I32x4SConvertI16x8High: {
2092 XMMRegister dst = i.OutputSimd128Register();
2093 __ Palignr(dst, i.InputOperand(0), 8);
2094 __ Pmovsxwd(dst, dst);
2095 break;
2096 }
2097 case kIA32I32x4Neg: {
2098 XMMRegister dst = i.OutputSimd128Register();
2099 Operand src = i.InputOperand(0);
2100 if (src.is_reg(dst)) {
2101 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2102 __ Psignd(dst, kScratchDoubleReg);
2103 } else {
2104 __ Pxor(dst, dst);
2105 __ Psubd(dst, src);
2106 }
2107 break;
2108 }
2109 case kSSEI32x4Shl: {
2110 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2111 __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
2112 break;
2113 }
2114 case kAVXI32x4Shl: {
2115 CpuFeatureScope avx_scope(tasm(), AVX);
2116 __ vpslld(i.OutputSimd128Register(), i.InputSimd128Register(0),
2117 i.InputInt8(1));
2118 break;
2119 }
2120 case kSSEI32x4ShrS: {
2121 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2122 __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
2123 break;
2124 }
2125 case kAVXI32x4ShrS: {
2126 CpuFeatureScope avx_scope(tasm(), AVX);
2127 __ vpsrad(i.OutputSimd128Register(), i.InputSimd128Register(0),
2128 i.InputInt8(1));
2129 break;
2130 }
2131 case kSSEI32x4Add: {
2132 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2133 __ paddd(i.OutputSimd128Register(), i.InputOperand(1));
2134 break;
2135 }
2136 case kAVXI32x4Add: {
2137 CpuFeatureScope avx_scope(tasm(), AVX);
2138 __ vpaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2139 i.InputOperand(1));
2140 break;
2141 }
2142 case kSSEI32x4AddHoriz: {
2143 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2144 CpuFeatureScope sse_scope(tasm(), SSSE3);
2145 __ phaddd(i.OutputSimd128Register(), i.InputOperand(1));
2146 break;
2147 }
2148 case kAVXI32x4AddHoriz: {
2149 CpuFeatureScope avx_scope(tasm(), AVX);
2150 __ vphaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2151 i.InputOperand(1));
2152 break;
2153 }
2154 case kSSEI32x4Sub: {
2155 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2156 __ psubd(i.OutputSimd128Register(), i.InputOperand(1));
2157 break;
2158 }
2159 case kAVXI32x4Sub: {
2160 CpuFeatureScope avx_scope(tasm(), AVX);
2161 __ vpsubd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2162 i.InputOperand(1));
2163 break;
2164 }
2165 case kSSEI32x4Mul: {
2166 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2167 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2168 __ pmulld(i.OutputSimd128Register(), i.InputOperand(1));
2169 break;
2170 }
2171 case kAVXI32x4Mul: {
2172 CpuFeatureScope avx_scope(tasm(), AVX);
2173 __ vpmulld(i.OutputSimd128Register(), i.InputSimd128Register(0),
2174 i.InputOperand(1));
2175 break;
2176 }
2177 case kSSEI32x4MinS: {
2178 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2179 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2180 __ pminsd(i.OutputSimd128Register(), i.InputOperand(1));
2181 break;
2182 }
2183 case kAVXI32x4MinS: {
2184 CpuFeatureScope avx_scope(tasm(), AVX);
2185 __ vpminsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2186 i.InputOperand(1));
2187 break;
2188 }
2189 case kSSEI32x4MaxS: {
2190 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2191 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2192 __ pmaxsd(i.OutputSimd128Register(), i.InputOperand(1));
2193 break;
2194 }
2195 case kAVXI32x4MaxS: {
2196 CpuFeatureScope avx_scope(tasm(), AVX);
2197 __ vpmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2198 i.InputOperand(1));
2199 break;
2200 }
2201 case kSSEI32x4Eq: {
2202 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2203 __ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1));
2204 break;
2205 }
2206 case kAVXI32x4Eq: {
2207 CpuFeatureScope avx_scope(tasm(), AVX);
2208 __ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2209 i.InputOperand(1));
2210 break;
2211 }
2212 case kSSEI32x4Ne: {
2213 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2214 __ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1));
2215 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2216 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2217 break;
2218 }
2219 case kAVXI32x4Ne: {
2220 CpuFeatureScope avx_scope(tasm(), AVX);
2221 __ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2222 i.InputOperand(1));
2223 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2224 __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2225 kScratchDoubleReg);
2226 break;
2227 }
2228 case kSSEI32x4GtS: {
2229 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2230 __ pcmpgtd(i.OutputSimd128Register(), i.InputOperand(1));
2231 break;
2232 }
2233 case kAVXI32x4GtS: {
2234 CpuFeatureScope avx_scope(tasm(), AVX);
2235 __ vpcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2236 i.InputOperand(1));
2237 break;
2238 }
2239 case kSSEI32x4GeS: {
2240 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2241 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2242 XMMRegister dst = i.OutputSimd128Register();
2243 Operand src = i.InputOperand(1);
2244 __ pminsd(dst, src);
2245 __ pcmpeqd(dst, src);
2246 break;
2247 }
2248 case kAVXI32x4GeS: {
2249 CpuFeatureScope avx_scope(tasm(), AVX);
2250 XMMRegister src1 = i.InputSimd128Register(0);
2251 Operand src2 = i.InputOperand(1);
2252 __ vpminsd(kScratchDoubleReg, src1, src2);
2253 __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2254 break;
2255 }
2256 case kSSEI32x4UConvertF32x4: {
2257 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2258 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2259 XMMRegister dst = i.OutputSimd128Register();
2260 XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2261 // NAN->0, negative->0
2262 __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2263 __ maxps(dst, kScratchDoubleReg);
2264 // scratch: float representation of max_signed
2265 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2266 __ psrld(kScratchDoubleReg, 1); // 0x7fffffff
2267 __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
2268 // tmp: convert (src-max_signed).
2269 // Positive overflow lanes -> 0x7FFFFFFF
2270 // Negative lanes -> 0
2271 __ movaps(tmp, dst);
2272 __ subps(tmp, kScratchDoubleReg);
2273 __ cmpleps(kScratchDoubleReg, tmp);
2274 __ cvttps2dq(tmp, tmp);
2275 __ pxor(tmp, kScratchDoubleReg);
2276 __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2277 __ pmaxsd(tmp, kScratchDoubleReg);
2278 // convert. Overflow lanes above max_signed will be 0x80000000
2279 __ cvttps2dq(dst, dst);
2280 // Add (src-max_signed) for overflow lanes.
2281 __ paddd(dst, tmp);
2282 break;
2283 }
2284 case kAVXI32x4UConvertF32x4: {
2285 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2286 CpuFeatureScope avx_scope(tasm(), AVX);
2287 XMMRegister dst = i.OutputSimd128Register();
2288 XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2289 // NAN->0, negative->0
2290 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2291 __ vmaxps(dst, dst, kScratchDoubleReg);
2292 // scratch: float representation of max_signed
2293 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2294 __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1); // 0x7fffffff
2295 __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
2296 // tmp: convert (src-max_signed).
2297 // Positive overflow lanes -> 0x7FFFFFFF
2298 // Negative lanes -> 0
2299 __ vsubps(tmp, dst, kScratchDoubleReg);
2300 __ vcmpleps(kScratchDoubleReg, kScratchDoubleReg, tmp);
2301 __ vcvttps2dq(tmp, tmp);
2302 __ vpxor(tmp, tmp, kScratchDoubleReg);
2303 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2304 __ vpmaxsd(tmp, tmp, kScratchDoubleReg);
2305 // convert. Overflow lanes above max_signed will be 0x80000000
2306 __ vcvttps2dq(dst, dst);
2307 // Add (src-max_signed) for overflow lanes.
2308 __ vpaddd(dst, dst, tmp);
2309 break;
2310 }
2311 case kIA32I32x4UConvertI16x8Low: {
2312 __ Pmovzxwd(i.OutputSimd128Register(), i.InputOperand(0));
2313 break;
2314 }
2315 case kIA32I32x4UConvertI16x8High: {
2316 XMMRegister dst = i.OutputSimd128Register();
2317 __ Palignr(dst, i.InputOperand(0), 8);
2318 __ Pmovzxwd(dst, dst);
2319 break;
2320 }
2321 case kSSEI32x4ShrU: {
2322 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2323 __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
2324 break;
2325 }
2326 case kAVXI32x4ShrU: {
2327 CpuFeatureScope avx_scope(tasm(), AVX);
2328 __ vpsrld(i.OutputSimd128Register(), i.InputSimd128Register(0),
2329 i.InputInt8(1));
2330 break;
2331 }
2332 case kSSEI32x4MinU: {
2333 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2334 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2335 __ pminud(i.OutputSimd128Register(), i.InputOperand(1));
2336 break;
2337 }
2338 case kAVXI32x4MinU: {
2339 CpuFeatureScope avx_scope(tasm(), AVX);
2340 __ vpminud(i.OutputSimd128Register(), i.InputSimd128Register(0),
2341 i.InputOperand(1));
2342 break;
2343 }
2344 case kSSEI32x4MaxU: {
2345 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2346 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2347 __ pmaxud(i.OutputSimd128Register(), i.InputOperand(1));
2348 break;
2349 }
2350 case kAVXI32x4MaxU: {
2351 CpuFeatureScope avx_scope(tasm(), AVX);
2352 __ vpmaxud(i.OutputSimd128Register(), i.InputSimd128Register(0),
2353 i.InputOperand(1));
2354 break;
2355 }
2356 case kSSEI32x4GtU: {
2357 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2358 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2359 XMMRegister dst = i.OutputSimd128Register();
2360 Operand src = i.InputOperand(1);
2361 __ pmaxud(dst, src);
2362 __ pcmpeqd(dst, src);
2363 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2364 __ pxor(dst, kScratchDoubleReg);
2365 break;
2366 }
2367 case kAVXI32x4GtU: {
2368 CpuFeatureScope avx_scope(tasm(), AVX);
2369 XMMRegister dst = i.OutputSimd128Register();
2370 XMMRegister src1 = i.InputSimd128Register(0);
2371 Operand src2 = i.InputOperand(1);
2372 __ vpmaxud(kScratchDoubleReg, src1, src2);
2373 __ vpcmpeqd(dst, kScratchDoubleReg, src2);
2374 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2375 __ vpxor(dst, dst, kScratchDoubleReg);
2376 break;
2377 }
2378 case kSSEI32x4GeU: {
2379 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2380 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2381 XMMRegister dst = i.OutputSimd128Register();
2382 Operand src = i.InputOperand(1);
2383 __ pminud(dst, src);
2384 __ pcmpeqd(dst, src);
2385 break;
2386 }
2387 case kAVXI32x4GeU: {
2388 CpuFeatureScope avx_scope(tasm(), AVX);
2389 XMMRegister src1 = i.InputSimd128Register(0);
2390 Operand src2 = i.InputOperand(1);
2391 __ vpminud(kScratchDoubleReg, src1, src2);
2392 __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2393 break;
2394 }
2395 case kIA32I16x8Splat: {
2396 XMMRegister dst = i.OutputSimd128Register();
2397 __ Movd(dst, i.InputOperand(0));
2398 __ Pshuflw(dst, dst, 0x0);
2399 __ Pshufd(dst, dst, 0x0);
2400 break;
2401 }
2402 case kIA32I16x8ExtractLane: {
2403 Register dst = i.OutputRegister();
2404 __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
2405 __ movsx_w(dst, dst);
2406 break;
2407 }
2408 case kSSEI16x8ReplaceLane: {
2409 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2410 __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2411 break;
2412 }
2413 case kAVXI16x8ReplaceLane: {
2414 CpuFeatureScope avx_scope(tasm(), AVX);
2415 __ vpinsrw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2416 i.InputOperand(2), i.InputInt8(1));
2417 break;
2418 }
2419 case kIA32I16x8SConvertI8x16Low: {
2420 __ Pmovsxbw(i.OutputSimd128Register(), i.InputOperand(0));
2421 break;
2422 }
2423 case kIA32I16x8SConvertI8x16High: {
2424 XMMRegister dst = i.OutputSimd128Register();
2425 __ Palignr(dst, i.InputOperand(0), 8);
2426 __ Pmovsxbw(dst, dst);
2427 break;
2428 }
2429 case kIA32I16x8Neg: {
2430 XMMRegister dst = i.OutputSimd128Register();
2431 Operand src = i.InputOperand(0);
2432 if (src.is_reg(dst)) {
2433 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2434 __ Psignw(dst, kScratchDoubleReg);
2435 } else {
2436 __ Pxor(dst, dst);
2437 __ Psubw(dst, src);
2438 }
2439 break;
2440 }
2441 case kSSEI16x8Shl: {
2442 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2443 __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
2444 break;
2445 }
2446 case kAVXI16x8Shl: {
2447 CpuFeatureScope avx_scope(tasm(), AVX);
2448 __ vpsllw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2449 i.InputInt8(1));
2450 break;
2451 }
2452 case kSSEI16x8ShrS: {
2453 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2454 __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
2455 break;
2456 }
2457 case kAVXI16x8ShrS: {
2458 CpuFeatureScope avx_scope(tasm(), AVX);
2459 __ vpsraw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2460 i.InputInt8(1));
2461 break;
2462 }
2463 case kSSEI16x8SConvertI32x4: {
2464 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2465 __ packssdw(i.OutputSimd128Register(), i.InputOperand(1));
2466 break;
2467 }
2468 case kAVXI16x8SConvertI32x4: {
2469 CpuFeatureScope avx_scope(tasm(), AVX);
2470 __ vpackssdw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2471 i.InputOperand(1));
2472 break;
2473 }
2474 case kSSEI16x8Add: {
2475 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2476 __ paddw(i.OutputSimd128Register(), i.InputOperand(1));
2477 break;
2478 }
2479 case kAVXI16x8Add: {
2480 CpuFeatureScope avx_scope(tasm(), AVX);
2481 __ vpaddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2482 i.InputOperand(1));
2483 break;
2484 }
2485 case kSSEI16x8AddSaturateS: {
2486 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2487 __ paddsw(i.OutputSimd128Register(), i.InputOperand(1));
2488 break;
2489 }
2490 case kAVXI16x8AddSaturateS: {
2491 CpuFeatureScope avx_scope(tasm(), AVX);
2492 __ vpaddsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2493 i.InputOperand(1));
2494 break;
2495 }
2496 case kSSEI16x8AddHoriz: {
2497 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2498 CpuFeatureScope sse_scope(tasm(), SSSE3);
2499 __ phaddw(i.OutputSimd128Register(), i.InputOperand(1));
2500 break;
2501 }
2502 case kAVXI16x8AddHoriz: {
2503 CpuFeatureScope avx_scope(tasm(), AVX);
2504 __ vphaddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2505 i.InputOperand(1));
2506 break;
2507 }
2508 case kSSEI16x8Sub: {
2509 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2510 __ psubw(i.OutputSimd128Register(), i.InputOperand(1));
2511 break;
2512 }
2513 case kAVXI16x8Sub: {
2514 CpuFeatureScope avx_scope(tasm(), AVX);
2515 __ vpsubw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2516 i.InputOperand(1));
2517 break;
2518 }
2519 case kSSEI16x8SubSaturateS: {
2520 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2521 __ psubsw(i.OutputSimd128Register(), i.InputOperand(1));
2522 break;
2523 }
2524 case kAVXI16x8SubSaturateS: {
2525 CpuFeatureScope avx_scope(tasm(), AVX);
2526 __ vpsubsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2527 i.InputOperand(1));
2528 break;
2529 }
2530 case kSSEI16x8Mul: {
2531 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2532 __ pmullw(i.OutputSimd128Register(), i.InputOperand(1));
2533 break;
2534 }
2535 case kAVXI16x8Mul: {
2536 CpuFeatureScope avx_scope(tasm(), AVX);
2537 __ vpmullw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2538 i.InputOperand(1));
2539 break;
2540 }
2541 case kSSEI16x8MinS: {
2542 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2543 __ pminsw(i.OutputSimd128Register(), i.InputOperand(1));
2544 break;
2545 }
2546 case kAVXI16x8MinS: {
2547 CpuFeatureScope avx_scope(tasm(), AVX);
2548 __ vpminsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2549 i.InputOperand(1));
2550 break;
2551 }
2552 case kSSEI16x8MaxS: {
2553 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2554 __ pmaxsw(i.OutputSimd128Register(), i.InputOperand(1));
2555 break;
2556 }
2557 case kAVXI16x8MaxS: {
2558 CpuFeatureScope avx_scope(tasm(), AVX);
2559 __ vpmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2560 i.InputOperand(1));
2561 break;
2562 }
2563 case kSSEI16x8Eq: {
2564 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2565 __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1));
2566 break;
2567 }
2568 case kAVXI16x8Eq: {
2569 CpuFeatureScope avx_scope(tasm(), AVX);
2570 __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2571 i.InputOperand(1));
2572 break;
2573 }
2574 case kSSEI16x8Ne: {
2575 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2576 __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1));
2577 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2578 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2579 break;
2580 }
2581 case kAVXI16x8Ne: {
2582 CpuFeatureScope avx_scope(tasm(), AVX);
2583 __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2584 i.InputOperand(1));
2585 __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2586 __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2587 kScratchDoubleReg);
2588 break;
2589 }
2590 case kSSEI16x8GtS: {
2591 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2592 __ pcmpgtw(i.OutputSimd128Register(), i.InputOperand(1));
2593 break;
2594 }
2595 case kAVXI16x8GtS: {
2596 CpuFeatureScope avx_scope(tasm(), AVX);
2597 __ vpcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2598 i.InputOperand(1));
2599 break;
2600 }
2601 case kSSEI16x8GeS: {
2602 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2603 XMMRegister dst = i.OutputSimd128Register();
2604 Operand src = i.InputOperand(1);
2605 __ pminsw(dst, src);
2606 __ pcmpeqw(dst, src);
2607 break;
2608 }
2609 case kAVXI16x8GeS: {
2610 CpuFeatureScope avx_scope(tasm(), AVX);
2611 XMMRegister src1 = i.InputSimd128Register(0);
2612 Operand src2 = i.InputOperand(1);
2613 __ vpminsw(kScratchDoubleReg, src1, src2);
2614 __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2615 break;
2616 }
2617 case kIA32I16x8UConvertI8x16Low: {
2618 __ Pmovzxbw(i.OutputSimd128Register(), i.InputOperand(0));
2619 break;
2620 }
2621 case kIA32I16x8UConvertI8x16High: {
2622 XMMRegister dst = i.OutputSimd128Register();
2623 __ Palignr(dst, i.InputOperand(0), 8);
2624 __ Pmovzxbw(dst, dst);
2625 break;
2626 }
2627 case kSSEI16x8ShrU: {
2628 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2629 __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
2630 break;
2631 }
2632 case kAVXI16x8ShrU: {
2633 CpuFeatureScope avx_scope(tasm(), AVX);
2634 __ vpsrlw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2635 i.InputInt8(1));
2636 break;
2637 }
2638 case kSSEI16x8UConvertI32x4: {
2639 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2640 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2641 XMMRegister dst = i.OutputSimd128Register();
2642 // Change negative lanes to 0x7FFFFFFF
2643 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2644 __ psrld(kScratchDoubleReg, 1);
2645 __ pminud(dst, kScratchDoubleReg);
2646 __ pminud(kScratchDoubleReg, i.InputOperand(1));
2647 __ packusdw(dst, kScratchDoubleReg);
2648 break;
2649 }
2650 case kAVXI16x8UConvertI32x4: {
2651 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2652 CpuFeatureScope avx_scope(tasm(), AVX);
2653 XMMRegister dst = i.OutputSimd128Register();
2654 // Change negative lanes to 0x7FFFFFFF
2655 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2656 __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
2657 __ vpminud(dst, kScratchDoubleReg, i.InputSimd128Register(0));
2658 __ vpminud(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1));
2659 __ vpackusdw(dst, dst, kScratchDoubleReg);
2660 break;
2661 }
2662 case kSSEI16x8AddSaturateU: {
2663 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2664 __ paddusw(i.OutputSimd128Register(), i.InputOperand(1));
2665 break;
2666 }
2667 case kAVXI16x8AddSaturateU: {
2668 CpuFeatureScope avx_scope(tasm(), AVX);
2669 __ vpaddusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2670 i.InputOperand(1));
2671 break;
2672 }
2673 case kSSEI16x8SubSaturateU: {
2674 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2675 __ psubusw(i.OutputSimd128Register(), i.InputOperand(1));
2676 break;
2677 }
2678 case kAVXI16x8SubSaturateU: {
2679 CpuFeatureScope avx_scope(tasm(), AVX);
2680 __ vpsubusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2681 i.InputOperand(1));
2682 break;
2683 }
2684 case kSSEI16x8MinU: {
2685 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2686 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2687 __ pminuw(i.OutputSimd128Register(), i.InputOperand(1));
2688 break;
2689 }
2690 case kAVXI16x8MinU: {
2691 CpuFeatureScope avx_scope(tasm(), AVX);
2692 __ vpminuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2693 i.InputOperand(1));
2694 break;
2695 }
2696 case kSSEI16x8MaxU: {
2697 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2698 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2699 __ pmaxuw(i.OutputSimd128Register(), i.InputOperand(1));
2700 break;
2701 }
2702 case kAVXI16x8MaxU: {
2703 CpuFeatureScope avx_scope(tasm(), AVX);
2704 __ vpmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2705 i.InputOperand(1));
2706 break;
2707 }
2708 case kSSEI16x8GtU: {
2709 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2710 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2711 XMMRegister dst = i.OutputSimd128Register();
2712 Operand src = i.InputOperand(1);
2713 __ pmaxuw(dst, src);
2714 __ pcmpeqw(dst, src);
2715 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2716 __ pxor(dst, kScratchDoubleReg);
2717 break;
2718 }
2719 case kAVXI16x8GtU: {
2720 CpuFeatureScope avx_scope(tasm(), AVX);
2721 XMMRegister dst = i.OutputSimd128Register();
2722 XMMRegister src1 = i.InputSimd128Register(0);
2723 Operand src2 = i.InputOperand(1);
2724 __ vpmaxuw(kScratchDoubleReg, src1, src2);
2725 __ vpcmpeqw(dst, kScratchDoubleReg, src2);
2726 __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2727 __ vpxor(dst, dst, kScratchDoubleReg);
2728 break;
2729 }
2730 case kSSEI16x8GeU: {
2731 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2732 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2733 XMMRegister dst = i.OutputSimd128Register();
2734 Operand src = i.InputOperand(1);
2735 __ pminuw(dst, src);
2736 __ pcmpeqw(dst, src);
2737 break;
2738 }
2739 case kAVXI16x8GeU: {
2740 CpuFeatureScope avx_scope(tasm(), AVX);
2741 XMMRegister src1 = i.InputSimd128Register(0);
2742 Operand src2 = i.InputOperand(1);
2743 __ vpminuw(kScratchDoubleReg, src1, src2);
2744 __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2745 break;
2746 }
2747 case kIA32I8x16Splat: {
2748 XMMRegister dst = i.OutputSimd128Register();
2749 __ Movd(dst, i.InputOperand(0));
2750 __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
2751 __ Pshufb(dst, kScratchDoubleReg);
2752 break;
2753 }
2754 case kIA32I8x16ExtractLane: {
2755 Register dst = i.OutputRegister();
2756 __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
2757 __ movsx_b(dst, dst);
2758 break;
2759 }
2760 case kSSEI8x16ReplaceLane: {
2761 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2762 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2763 __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2764 break;
2765 }
2766 case kAVXI8x16ReplaceLane: {
2767 CpuFeatureScope avx_scope(tasm(), AVX);
2768 __ vpinsrb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2769 i.InputOperand(2), i.InputInt8(1));
2770 break;
2771 }
2772 case kSSEI8x16SConvertI16x8: {
2773 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2774 __ packsswb(i.OutputSimd128Register(), i.InputOperand(1));
2775 break;
2776 }
2777 case kAVXI8x16SConvertI16x8: {
2778 CpuFeatureScope avx_scope(tasm(), AVX);
2779 __ vpacksswb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2780 i.InputOperand(1));
2781 break;
2782 }
2783 case kIA32I8x16Neg: {
2784 XMMRegister dst = i.OutputSimd128Register();
2785 Operand src = i.InputOperand(0);
2786 if (src.is_reg(dst)) {
2787 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2788 __ Psignb(dst, kScratchDoubleReg);
2789 } else {
2790 __ Pxor(dst, dst);
2791 __ Psubb(dst, src);
2792 }
2793 break;
2794 }
2795 case kSSEI8x16Shl: {
2796 XMMRegister dst = i.OutputSimd128Register();
2797 DCHECK_EQ(dst, i.InputSimd128Register(0));
2798 int8_t shift = i.InputInt8(1) & 0x7;
2799 if (shift < 4) {
2800 // For small shifts, doubling is faster.
2801 for (int i = 0; i < shift; ++i) {
2802 __ paddb(dst, dst);
2803 }
2804 } else {
2805 // Mask off the unwanted bits before word-shifting.
2806 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2807 __ psrlw(kScratchDoubleReg, 8 + shift);
2808 __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
2809 __ pand(dst, kScratchDoubleReg);
2810 __ psllw(dst, shift);
2811 }
2812 break;
2813 }
2814 case kAVXI8x16Shl: {
2815 CpuFeatureScope avx_scope(tasm(), AVX);
2816 XMMRegister dst = i.OutputSimd128Register();
2817 XMMRegister src = i.InputSimd128Register(0);
2818 int8_t shift = i.InputInt8(1) & 0x7;
2819 if (shift < 4) {
2820 // For small shifts, doubling is faster.
2821 for (int i = 0; i < shift; ++i) {
2822 __ vpaddb(dst, src, src);
2823 src = dst;
2824 }
2825 } else {
2826 // Mask off the unwanted bits before word-shifting.
2827 __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2828 __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8 + shift);
2829 __ vpackuswb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2830 __ vpand(dst, src, kScratchDoubleReg);
2831 __ vpsllw(dst, dst, shift);
2832 }
2833 break;
2834 }
2835 case kIA32I8x16ShrS: {
2836 XMMRegister dst = i.OutputSimd128Register();
2837 XMMRegister src = i.InputSimd128Register(0);
2838 int8_t shift = i.InputInt8(1) & 0x7;
2839 // Unpack the bytes into words, do arithmetic shifts, and repack.
2840 __ Punpckhbw(kScratchDoubleReg, src);
2841 __ Punpcklbw(dst, src);
2842 __ Psraw(kScratchDoubleReg, 8 + shift);
2843 __ Psraw(dst, 8 + shift);
2844 __ Packsswb(dst, kScratchDoubleReg);
2845 break;
2846 }
2847 case kSSEI8x16Add: {
2848 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2849 __ paddb(i.OutputSimd128Register(), i.InputOperand(1));
2850 break;
2851 }
2852 case kAVXI8x16Add: {
2853 CpuFeatureScope avx_scope(tasm(), AVX);
2854 __ vpaddb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2855 i.InputOperand(1));
2856 break;
2857 }
2858 case kSSEI8x16AddSaturateS: {
2859 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2860 __ paddsb(i.OutputSimd128Register(), i.InputOperand(1));
2861 break;
2862 }
2863 case kAVXI8x16AddSaturateS: {
2864 CpuFeatureScope avx_scope(tasm(), AVX);
2865 __ vpaddsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2866 i.InputOperand(1));
2867 break;
2868 }
2869 case kSSEI8x16Sub: {
2870 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2871 __ psubb(i.OutputSimd128Register(), i.InputOperand(1));
2872 break;
2873 }
2874 case kAVXI8x16Sub: {
2875 CpuFeatureScope avx_scope(tasm(), AVX);
2876 __ vpsubb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2877 i.InputOperand(1));
2878 break;
2879 }
2880 case kSSEI8x16SubSaturateS: {
2881 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2882 __ psubsb(i.OutputSimd128Register(), i.InputOperand(1));
2883 break;
2884 }
2885 case kAVXI8x16SubSaturateS: {
2886 CpuFeatureScope avx_scope(tasm(), AVX);
2887 __ vpsubsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2888 i.InputOperand(1));
2889 break;
2890 }
2891 case kSSEI8x16Mul: {
2892 XMMRegister dst = i.OutputSimd128Register();
2893 DCHECK_EQ(dst, i.InputSimd128Register(0));
2894 XMMRegister right = i.InputSimd128Register(1);
2895 XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2896
2897 // I16x8 view of I8x16
2898 // left = AAaa AAaa ... AAaa AAaa
2899 // right= BBbb BBbb ... BBbb BBbb
2900
2901 // t = 00AA 00AA ... 00AA 00AA
2902 // s = 00BB 00BB ... 00BB 00BB
2903 __ movaps(tmp, dst);
2904 __ movaps(kScratchDoubleReg, right);
2905 __ psrlw(tmp, 8);
2906 __ psrlw(kScratchDoubleReg, 8);
2907 // dst = left * 256
2908 __ psllw(dst, 8);
2909
2910 // t = I16x8Mul(t, s)
2911 // => __PP __PP ... __PP __PP
2912 __ pmullw(tmp, kScratchDoubleReg);
2913 // dst = I16x8Mul(left * 256, right)
2914 // => pp__ pp__ ... pp__ pp__
2915 __ pmullw(dst, right);
2916
2917 // t = I16x8Shl(t, 8)
2918 // => PP00 PP00 ... PP00 PP00
2919 __ psllw(tmp, 8);
2920
2921 // dst = I16x8Shr(dst, 8)
2922 // => 00pp 00pp ... 00pp 00pp
2923 __ psrlw(dst, 8);
2924
2925 // dst = I16x8Or(dst, t)
2926 // => PPpp PPpp ... PPpp PPpp
2927 __ por(dst, tmp);
2928 break;
2929 }
2930 case kAVXI8x16Mul: {
2931 CpuFeatureScope avx_scope(tasm(), AVX);
2932 XMMRegister dst = i.OutputSimd128Register();
2933 XMMRegister left = i.InputSimd128Register(0);
2934 XMMRegister right = i.InputSimd128Register(1);
2935 XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2936
2937 // I16x8 view of I8x16
2938 // left = AAaa AAaa ... AAaa AAaa
2939 // right= BBbb BBbb ... BBbb BBbb
2940
2941 // t = 00AA 00AA ... 00AA 00AA
2942 // s = 00BB 00BB ... 00BB 00BB
2943 __ vpsrlw(tmp, left, 8);
2944 __ vpsrlw(kScratchDoubleReg, right, 8);
2945
2946 // t = I16x8Mul(t0, t1)
2947 // => __PP __PP ... __PP __PP
2948 __ vpmullw(tmp, tmp, kScratchDoubleReg);
2949
2950 // s = left * 256
2951 __ vpsllw(kScratchDoubleReg, left, 8);
2952
2953 // dst = I16x8Mul(left * 256, right)
2954 // => pp__ pp__ ... pp__ pp__
2955 __ vpmullw(dst, kScratchDoubleReg, right);
2956
2957 // dst = I16x8Shr(dst, 8)
2958 // => 00pp 00pp ... 00pp 00pp
2959 __ vpsrlw(dst, dst, 8);
2960
2961 // t = I16x8Shl(t, 8)
2962 // => PP00 PP00 ... PP00 PP00
2963 __ vpsllw(tmp, tmp, 8);
2964
2965 // dst = I16x8Or(dst, t)
2966 // => PPpp PPpp ... PPpp PPpp
2967 __ vpor(dst, dst, tmp);
2968 break;
2969 }
2970 case kSSEI8x16MinS: {
2971 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2972 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2973 __ pminsb(i.OutputSimd128Register(), i.InputOperand(1));
2974 break;
2975 }
2976 case kAVXI8x16MinS: {
2977 CpuFeatureScope avx_scope(tasm(), AVX);
2978 __ vpminsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2979 i.InputOperand(1));
2980 break;
2981 }
2982 case kSSEI8x16MaxS: {
2983 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2984 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2985 __ pmaxsb(i.OutputSimd128Register(), i.InputOperand(1));
2986 break;
2987 }
2988 case kAVXI8x16MaxS: {
2989 CpuFeatureScope avx_scope(tasm(), AVX);
2990 __ vpmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2991 i.InputOperand(1));
2992 break;
2993 }
2994 case kSSEI8x16Eq: {
2995 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2996 __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
2997 break;
2998 }
2999 case kAVXI8x16Eq: {
3000 CpuFeatureScope avx_scope(tasm(), AVX);
3001 __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3002 i.InputOperand(1));
3003 break;
3004 }
3005 case kSSEI8x16Ne: {
3006 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3007 __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
3008 __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3009 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
3010 break;
3011 }
3012 case kAVXI8x16Ne: {
3013 CpuFeatureScope avx_scope(tasm(), AVX);
3014 __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3015 i.InputOperand(1));
3016 __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3017 __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
3018 kScratchDoubleReg);
3019 break;
3020 }
3021 case kSSEI8x16GtS: {
3022 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3023 __ pcmpgtb(i.OutputSimd128Register(), i.InputOperand(1));
3024 break;
3025 }
3026 case kAVXI8x16GtS: {
3027 CpuFeatureScope avx_scope(tasm(), AVX);
3028 __ vpcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3029 i.InputOperand(1));
3030 break;
3031 }
3032 case kSSEI8x16GeS: {
3033 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3034 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3035 XMMRegister dst = i.OutputSimd128Register();
3036 Operand src = i.InputOperand(1);
3037 __ pminsb(dst, src);
3038 __ pcmpeqb(dst, src);
3039 break;
3040 }
3041 case kAVXI8x16GeS: {
3042 CpuFeatureScope avx_scope(tasm(), AVX);
3043 XMMRegister src1 = i.InputSimd128Register(0);
3044 Operand src2 = i.InputOperand(1);
3045 __ vpminsb(kScratchDoubleReg, src1, src2);
3046 __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
3047 break;
3048 }
3049 case kSSEI8x16UConvertI16x8: {
3050 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3051 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3052 XMMRegister dst = i.OutputSimd128Register();
3053 // Change negative lanes to 0x7FFF
3054 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3055 __ psrlw(kScratchDoubleReg, 1);
3056 __ pminuw(dst, kScratchDoubleReg);
3057 __ pminuw(kScratchDoubleReg, i.InputOperand(1));
3058 __ packuswb(dst, kScratchDoubleReg);
3059 break;
3060 }
3061 case kAVXI8x16UConvertI16x8: {
3062 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3063 CpuFeatureScope avx_scope(tasm(), AVX);
3064 XMMRegister dst = i.OutputSimd128Register();
3065 // Change negative lanes to 0x7FFF
3066 __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3067 __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 1);
3068 __ vpminuw(dst, kScratchDoubleReg, i.InputSimd128Register(0));
3069 __ vpminuw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1));
3070 __ vpackuswb(dst, dst, kScratchDoubleReg);
3071 break;
3072 }
3073 case kSSEI8x16AddSaturateU: {
3074 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3075 __ paddusb(i.OutputSimd128Register(), i.InputOperand(1));
3076 break;
3077 }
3078 case kAVXI8x16AddSaturateU: {
3079 CpuFeatureScope avx_scope(tasm(), AVX);
3080 __ vpaddusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3081 i.InputOperand(1));
3082 break;
3083 }
3084 case kSSEI8x16SubSaturateU: {
3085 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3086 __ psubusb(i.OutputSimd128Register(), i.InputOperand(1));
3087 break;
3088 }
3089 case kAVXI8x16SubSaturateU: {
3090 CpuFeatureScope avx_scope(tasm(), AVX);
3091 __ vpsubusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3092 i.InputOperand(1));
3093 break;
3094 }
3095 case kIA32I8x16ShrU: {
3096 XMMRegister dst = i.OutputSimd128Register();
3097 XMMRegister src = i.InputSimd128Register(0);
3098 int8_t shift = i.InputInt8(1) & 0x7;
3099 // Unpack the bytes into words, do logical shifts, and repack.
3100 __ Punpckhbw(kScratchDoubleReg, src);
3101 __ Punpcklbw(dst, src);
3102 __ Psrlw(kScratchDoubleReg, 8 + shift);
3103 __ Psrlw(dst, 8 + shift);
3104 __ Packuswb(dst, kScratchDoubleReg);
3105 break;
3106 }
3107 case kSSEI8x16MinU: {
3108 XMMRegister dst = i.OutputSimd128Register();
3109 DCHECK_EQ(dst, i.InputSimd128Register(0));
3110 __ pminub(dst, i.InputOperand(1));
3111 break;
3112 }
3113 case kAVXI8x16MinU: {
3114 CpuFeatureScope avx_scope(tasm(), AVX);
3115 __ vpminub(i.OutputSimd128Register(), i.InputSimd128Register(0),
3116 i.InputOperand(1));
3117 break;
3118 }
3119 case kSSEI8x16MaxU: {
3120 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3121 __ pmaxub(i.OutputSimd128Register(), i.InputOperand(1));
3122 break;
3123 }
3124 case kAVXI8x16MaxU: {
3125 CpuFeatureScope avx_scope(tasm(), AVX);
3126 __ vpmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0),
3127 i.InputOperand(1));
3128 break;
3129 }
3130 case kSSEI8x16GtU: {
3131 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3132 XMMRegister dst = i.OutputSimd128Register();
3133 Operand src = i.InputOperand(1);
3134 __ pmaxub(dst, src);
3135 __ pcmpeqb(dst, src);
3136 __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3137 __ pxor(dst, kScratchDoubleReg);
3138 break;
3139 }
3140 case kAVXI8x16GtU: {
3141 CpuFeatureScope avx_scope(tasm(), AVX);
3142 XMMRegister dst = i.OutputSimd128Register();
3143 XMMRegister src1 = i.InputSimd128Register(0);
3144 Operand src2 = i.InputOperand(1);
3145 __ vpmaxub(kScratchDoubleReg, src1, src2);
3146 __ vpcmpeqb(dst, kScratchDoubleReg, src2);
3147 __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3148 __ vpxor(dst, dst, kScratchDoubleReg);
3149 break;
3150 }
3151 case kSSEI8x16GeU: {
3152 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3153 XMMRegister dst = i.OutputSimd128Register();
3154 Operand src = i.InputOperand(1);
3155 __ pminub(dst, src);
3156 __ pcmpeqb(dst, src);
3157 break;
3158 }
3159 case kAVXI8x16GeU: {
3160 CpuFeatureScope avx_scope(tasm(), AVX);
3161 XMMRegister src1 = i.InputSimd128Register(0);
3162 Operand src2 = i.InputOperand(1);
3163 __ vpminub(kScratchDoubleReg, src1, src2);
3164 __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
3165 break;
3166 }
3167 case kIA32S128Zero: {
3168 XMMRegister dst = i.OutputSimd128Register();
3169 __ Pxor(dst, dst);
3170 break;
3171 }
3172 case kSSES128Not: {
3173 XMMRegister dst = i.OutputSimd128Register();
3174 Operand src = i.InputOperand(0);
3175 if (src.is_reg(dst)) {
3176 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3177 __ pxor(dst, kScratchDoubleReg);
3178 } else {
3179 __ pcmpeqd(dst, dst);
3180 __ pxor(dst, src);
3181 }
3182 break;
3183 }
3184 case kAVXS128Not: {
3185 CpuFeatureScope avx_scope(tasm(), AVX);
3186 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3187 __ vpxor(i.OutputSimd128Register(), kScratchDoubleReg, i.InputOperand(0));
3188 break;
3189 }
3190 case kSSES128And: {
3191 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3192 __ pand(i.OutputSimd128Register(), i.InputOperand(1));
3193 break;
3194 }
3195 case kAVXS128And: {
3196 CpuFeatureScope avx_scope(tasm(), AVX);
3197 __ vpand(i.OutputSimd128Register(), i.InputSimd128Register(0),
3198 i.InputOperand(1));
3199 break;
3200 }
3201 case kSSES128Or: {
3202 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3203 __ por(i.OutputSimd128Register(), i.InputOperand(1));
3204 break;
3205 }
3206 case kAVXS128Or: {
3207 CpuFeatureScope avx_scope(tasm(), AVX);
3208 __ vpor(i.OutputSimd128Register(), i.InputSimd128Register(0),
3209 i.InputOperand(1));
3210 break;
3211 }
3212 case kSSES128Xor: {
3213 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3214 __ pxor(i.OutputSimd128Register(), i.InputOperand(1));
3215 break;
3216 }
3217 case kAVXS128Xor: {
3218 CpuFeatureScope avx_scope(tasm(), AVX);
3219 __ vpxor(i.OutputSimd128Register(), i.InputSimd128Register(0),
3220 i.InputOperand(1));
3221 break;
3222 }
3223 case kSSES128Select: {
3224 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3225 // Mask used here is stored in dst.
3226 XMMRegister dst = i.OutputSimd128Register();
3227 __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
3228 __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
3229 __ andps(dst, kScratchDoubleReg);
3230 __ xorps(dst, i.InputSimd128Register(2));
3231 break;
3232 }
3233 case kAVXS128Select: {
3234 CpuFeatureScope avx_scope(tasm(), AVX);
3235 XMMRegister dst = i.OutputSimd128Register();
3236 __ vxorps(kScratchDoubleReg, i.InputSimd128Register(2),
3237 i.InputOperand(1));
3238 __ vandps(dst, kScratchDoubleReg, i.InputOperand(0));
3239 __ vxorps(dst, dst, i.InputSimd128Register(2));
3240 break;
3241 }
3242 case kIA32S8x16Shuffle: {
3243 XMMRegister dst = i.OutputSimd128Register();
3244 Operand src0 = i.InputOperand(0);
3245 Register tmp = i.TempRegister(0);
3246 // Prepare 16 byte aligned buffer for shuffle control mask
3247 __ mov(tmp, esp);
3248 __ and_(esp, -16);
3249 if (instr->InputCount() == 5) { // only one input operand
3250 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3251 for (int j = 4; j > 0; j--) {
3252 uint32_t mask = i.InputUint32(j);
3253 __ push(Immediate(mask));
3254 }
3255 __ Pshufb(dst, Operand(esp, 0));
3256 } else { // two input operands
3257 DCHECK_EQ(6, instr->InputCount());
3258 __ movups(kScratchDoubleReg, src0);
3259 for (int j = 5; j > 1; j--) {
3260 uint32_t lanes = i.InputUint32(j);
3261 uint32_t mask = 0;
3262 for (int k = 0; k < 32; k += 8) {
3263 uint8_t lane = lanes >> k;
3264 mask |= (lane < kSimd128Size ? lane : 0x80) << k;
3265 }
3266 __ push(Immediate(mask));
3267 }
3268 __ Pshufb(kScratchDoubleReg, Operand(esp, 0));
3269 Operand src1 = i.InputOperand(1);
3270 if (!src1.is_reg(dst)) __ movups(dst, src1);
3271 for (int j = 5; j > 1; j--) {
3272 uint32_t lanes = i.InputUint32(j);
3273 uint32_t mask = 0;
3274 for (int k = 0; k < 32; k += 8) {
3275 uint8_t lane = lanes >> k;
3276 mask |= (lane >= kSimd128Size ? (lane & 0xF) : 0x80) << k;
3277 }
3278 __ push(Immediate(mask));
3279 }
3280 __ Pshufb(dst, Operand(esp, 0));
3281 __ por(dst, kScratchDoubleReg);
3282 }
3283 __ mov(esp, tmp);
3284 break;
3285 }
3286 case kIA32S32x4Swizzle: {
3287 DCHECK_EQ(2, instr->InputCount());
3288 __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));
3289 break;
3290 }
3291 case kIA32S32x4Shuffle: {
3292 DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
3293 int8_t shuffle = i.InputInt8(2);
3294 DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
3295 __ Pshufd(kScratchDoubleReg, i.InputOperand(1), shuffle);
3296 __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), shuffle);
3297 __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
3298 break;
3299 }
3300 case kIA32S16x8Blend:
3301 ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
3302 break;
3303 case kIA32S16x8HalfShuffle1: {
3304 XMMRegister dst = i.OutputSimd128Register();
3305 __ Pshuflw(dst, i.InputOperand(0), i.InputInt8(1));
3306 __ Pshufhw(dst, dst, i.InputInt8(2));
3307 break;
3308 }
3309 case kIA32S16x8HalfShuffle2: {
3310 XMMRegister dst = i.OutputSimd128Register();
3311 __ Pshuflw(kScratchDoubleReg, i.InputOperand(1), i.InputInt8(2));
3312 __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
3313 __ Pshuflw(dst, i.InputOperand(0), i.InputInt8(2));
3314 __ Pshufhw(dst, dst, i.InputInt8(3));
3315 __ Pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
3316 break;
3317 }
3318 case kIA32S8x16Alignr:
3319 ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
3320 break;
3321 case kIA32S16x8Dup: {
3322 XMMRegister dst = i.OutputSimd128Register();
3323 Operand src = i.InputOperand(0);
3324 int8_t lane = i.InputInt8(1) & 0x7;
3325 int8_t lane4 = lane & 0x3;
3326 int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3327 if (lane < 4) {
3328 __ Pshuflw(dst, src, half_dup);
3329 __ Pshufd(dst, dst, 0);
3330 } else {
3331 __ Pshufhw(dst, src, half_dup);
3332 __ Pshufd(dst, dst, 0xaa);
3333 }
3334 break;
3335 }
3336 case kIA32S8x16Dup: {
3337 XMMRegister dst = i.OutputSimd128Register();
3338 XMMRegister src = i.InputSimd128Register(0);
3339 int8_t lane = i.InputInt8(1) & 0xf;
3340 if (CpuFeatures::IsSupported(AVX)) {
3341 CpuFeatureScope avx_scope(tasm(), AVX);
3342 if (lane < 8) {
3343 __ vpunpcklbw(dst, src, src);
3344 } else {
3345 __ vpunpckhbw(dst, src, src);
3346 }
3347 } else {
3348 DCHECK_EQ(dst, src);
3349 if (lane < 8) {
3350 __ punpcklbw(dst, dst);
3351 } else {
3352 __ punpckhbw(dst, dst);
3353 }
3354 }
3355 lane &= 0x7;
3356 int8_t lane4 = lane & 0x3;
3357 int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3358 if (lane < 4) {
3359 __ Pshuflw(dst, dst, half_dup);
3360 __ Pshufd(dst, dst, 0);
3361 } else {
3362 __ Pshufhw(dst, dst, half_dup);
3363 __ Pshufd(dst, dst, 0xaa);
3364 }
3365 break;
3366 }
3367 case kIA32S64x2UnpackHigh:
3368 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
3369 break;
3370 case kIA32S32x4UnpackHigh:
3371 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
3372 break;
3373 case kIA32S16x8UnpackHigh:
3374 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
3375 break;
3376 case kIA32S8x16UnpackHigh:
3377 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
3378 break;
3379 case kIA32S64x2UnpackLow:
3380 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
3381 break;
3382 case kIA32S32x4UnpackLow:
3383 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
3384 break;
3385 case kIA32S16x8UnpackLow:
3386 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
3387 break;
3388 case kIA32S8x16UnpackLow:
3389 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
3390 break;
3391 case kSSES16x8UnzipHigh: {
3392 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3393 XMMRegister dst = i.OutputSimd128Register();
3394 XMMRegister src2 = dst;
3395 DCHECK_EQ(dst, i.InputSimd128Register(0));
3396 if (instr->InputCount() == 2) {
3397 __ movups(kScratchDoubleReg, i.InputOperand(1));
3398 __ psrld(kScratchDoubleReg, 16);
3399 src2 = kScratchDoubleReg;
3400 }
3401 __ psrld(dst, 16);
3402 __ packusdw(dst, src2);
3403 break;
3404 }
3405 case kAVXS16x8UnzipHigh: {
3406 CpuFeatureScope avx_scope(tasm(), AVX);
3407 XMMRegister dst = i.OutputSimd128Register();
3408 XMMRegister src2 = dst;
3409 if (instr->InputCount() == 2) {
3410 __ vpsrld(kScratchDoubleReg, i.InputSimd128Register(1), 16);
3411 src2 = kScratchDoubleReg;
3412 }
3413 __ vpsrld(dst, i.InputSimd128Register(0), 16);
3414 __ vpackusdw(dst, dst, src2);
3415 break;
3416 }
3417 case kSSES16x8UnzipLow: {
3418 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3419 XMMRegister dst = i.OutputSimd128Register();
3420 XMMRegister src2 = dst;
3421 DCHECK_EQ(dst, i.InputSimd128Register(0));
3422 __ pxor(kScratchDoubleReg, kScratchDoubleReg);
3423 if (instr->InputCount() == 2) {
3424 __ pblendw(kScratchDoubleReg, i.InputOperand(1), 0x55);
3425 src2 = kScratchDoubleReg;
3426 }
3427 __ pblendw(dst, kScratchDoubleReg, 0xaa);
3428 __ packusdw(dst, src2);
3429 break;
3430 }
3431 case kAVXS16x8UnzipLow: {
3432 CpuFeatureScope avx_scope(tasm(), AVX);
3433 XMMRegister dst = i.OutputSimd128Register();
3434 XMMRegister src2 = dst;
3435 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3436 if (instr->InputCount() == 2) {
3437 __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1),
3438 0x55);
3439 src2 = kScratchDoubleReg;
3440 }
3441 __ vpblendw(dst, kScratchDoubleReg, i.InputSimd128Register(0), 0x55);
3442 __ vpackusdw(dst, dst, src2);
3443 break;
3444 }
3445 case kSSES8x16UnzipHigh: {
3446 XMMRegister dst = i.OutputSimd128Register();
3447 XMMRegister src2 = dst;
3448 DCHECK_EQ(dst, i.InputSimd128Register(0));
3449 if (instr->InputCount() == 2) {
3450 __ movups(kScratchDoubleReg, i.InputOperand(1));
3451 __ psrlw(kScratchDoubleReg, 8);
3452 src2 = kScratchDoubleReg;
3453 }
3454 __ psrlw(dst, 8);
3455 __ packuswb(dst, src2);
3456 break;
3457 }
3458 case kAVXS8x16UnzipHigh: {
3459 CpuFeatureScope avx_scope(tasm(), AVX);
3460 XMMRegister dst = i.OutputSimd128Register();
3461 XMMRegister src2 = dst;
3462 if (instr->InputCount() == 2) {
3463 __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3464 src2 = kScratchDoubleReg;
3465 }
3466 __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3467 __ vpackuswb(dst, dst, src2);
3468 break;
3469 }
3470 case kSSES8x16UnzipLow: {
3471 XMMRegister dst = i.OutputSimd128Register();
3472 XMMRegister src2 = dst;
3473 DCHECK_EQ(dst, i.InputSimd128Register(0));
3474 if (instr->InputCount() == 2) {
3475 __ movups(kScratchDoubleReg, i.InputOperand(1));
3476 __ psllw(kScratchDoubleReg, 8);
3477 __ psrlw(kScratchDoubleReg, 8);
3478 src2 = kScratchDoubleReg;
3479 }
3480 __ psllw(dst, 8);
3481 __ psrlw(dst, 8);
3482 __ packuswb(dst, src2);
3483 break;
3484 }
3485 case kAVXS8x16UnzipLow: {
3486 CpuFeatureScope avx_scope(tasm(), AVX);
3487 XMMRegister dst = i.OutputSimd128Register();
3488 XMMRegister src2 = dst;
3489 if (instr->InputCount() == 2) {
3490 __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3491 __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8);
3492 src2 = kScratchDoubleReg;
3493 }
3494 __ vpsllw(dst, i.InputSimd128Register(0), 8);
3495 __ vpsrlw(dst, dst, 8);
3496 __ vpackuswb(dst, dst, src2);
3497 break;
3498 }
3499 case kSSES8x16TransposeLow: {
3500 XMMRegister dst = i.OutputSimd128Register();
3501 DCHECK_EQ(dst, i.InputSimd128Register(0));
3502 __ psllw(dst, 8);
3503 if (instr->InputCount() == 1) {
3504 __ movups(kScratchDoubleReg, dst);
3505 } else {
3506 DCHECK_EQ(2, instr->InputCount());
3507 __ movups(kScratchDoubleReg, i.InputOperand(1));
3508 __ psllw(kScratchDoubleReg, 8);
3509 }
3510 __ psrlw(dst, 8);
3511 __ por(dst, kScratchDoubleReg);
3512 break;
3513 }
3514 case kAVXS8x16TransposeLow: {
3515 CpuFeatureScope avx_scope(tasm(), AVX);
3516 XMMRegister dst = i.OutputSimd128Register();
3517 if (instr->InputCount() == 1) {
3518 __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(0), 8);
3519 __ vpsrlw(dst, kScratchDoubleReg, 8);
3520 } else {
3521 DCHECK_EQ(2, instr->InputCount());
3522 __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3523 __ vpsllw(dst, i.InputSimd128Register(0), 8);
3524 __ vpsrlw(dst, dst, 8);
3525 }
3526 __ vpor(dst, dst, kScratchDoubleReg);
3527 break;
3528 }
3529 case kSSES8x16TransposeHigh: {
3530 XMMRegister dst = i.OutputSimd128Register();
3531 DCHECK_EQ(dst, i.InputSimd128Register(0));
3532 __ psrlw(dst, 8);
3533 if (instr->InputCount() == 1) {
3534 __ movups(kScratchDoubleReg, dst);
3535 } else {
3536 DCHECK_EQ(2, instr->InputCount());
3537 __ movups(kScratchDoubleReg, i.InputOperand(1));
3538 __ psrlw(kScratchDoubleReg, 8);
3539 }
3540 __ psllw(kScratchDoubleReg, 8);
3541 __ por(dst, kScratchDoubleReg);
3542 break;
3543 }
3544 case kAVXS8x16TransposeHigh: {
3545 CpuFeatureScope avx_scope(tasm(), AVX);
3546 XMMRegister dst = i.OutputSimd128Register();
3547 if (instr->InputCount() == 1) {
3548 __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3549 __ vpsllw(kScratchDoubleReg, dst, 8);
3550 } else {
3551 DCHECK_EQ(2, instr->InputCount());
3552 __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3553 __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3554 __ vpsllw(kScratchDoubleReg, kScratchDoubleReg, 8);
3555 }
3556 __ vpor(dst, dst, kScratchDoubleReg);
3557 break;
3558 }
3559 case kSSES8x8Reverse:
3560 case kSSES8x4Reverse:
3561 case kSSES8x2Reverse: {
3562 DCHECK_EQ(1, instr->InputCount());
3563 XMMRegister dst = i.OutputSimd128Register();
3564 DCHECK_EQ(dst, i.InputSimd128Register(0));
3565 if (arch_opcode != kSSES8x2Reverse) {
3566 // First shuffle words into position.
3567 int8_t shuffle_mask = arch_opcode == kSSES8x4Reverse ? 0xB1 : 0x1B;
3568 __ pshuflw(dst, dst, shuffle_mask);
3569 __ pshufhw(dst, dst, shuffle_mask);
3570 }
3571 __ movaps(kScratchDoubleReg, dst);
3572 __ psrlw(kScratchDoubleReg, 8);
3573 __ psllw(dst, 8);
3574 __ por(dst, kScratchDoubleReg);
3575 break;
3576 }
3577 case kAVXS8x2Reverse:
3578 case kAVXS8x4Reverse:
3579 case kAVXS8x8Reverse: {
3580 DCHECK_EQ(1, instr->InputCount());
3581 CpuFeatureScope avx_scope(tasm(), AVX);
3582 XMMRegister dst = i.OutputSimd128Register();
3583 XMMRegister src = dst;
3584 if (arch_opcode != kAVXS8x2Reverse) {
3585 // First shuffle words into position.
3586 int8_t shuffle_mask = arch_opcode == kAVXS8x4Reverse ? 0xB1 : 0x1B;
3587 __ vpshuflw(dst, i.InputOperand(0), shuffle_mask);
3588 __ vpshufhw(dst, dst, shuffle_mask);
3589 } else {
3590 src = i.InputSimd128Register(0);
3591 }
3592 // Reverse each 16 bit lane.
3593 __ vpsrlw(kScratchDoubleReg, src, 8);
3594 __ vpsllw(dst, src, 8);
3595 __ vpor(dst, dst, kScratchDoubleReg);
3596 break;
3597 }
3598 case kIA32S1x4AnyTrue:
3599 case kIA32S1x8AnyTrue:
3600 case kIA32S1x16AnyTrue: {
3601 Register dst = i.OutputRegister();
3602 XMMRegister src = i.InputSimd128Register(0);
3603 Register tmp = i.TempRegister(0);
3604 __ xor_(tmp, tmp);
3605 __ mov(dst, Immediate(-1));
3606 __ Ptest(src, src);
3607 __ cmov(zero, dst, tmp);
3608 break;
3609 }
3610 case kIA32S1x4AllTrue:
3611 case kIA32S1x8AllTrue:
3612 case kIA32S1x16AllTrue: {
3613 Register dst = i.OutputRegister();
3614 Operand src = i.InputOperand(0);
3615 Register tmp = i.TempRegister(0);
3616 __ mov(tmp, Immediate(-1));
3617 __ xor_(dst, dst);
3618 // Compare all src lanes to false.
3619 __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
3620 if (arch_opcode == kIA32S1x4AllTrue) {
3621 __ Pcmpeqd(kScratchDoubleReg, src);
3622 } else if (arch_opcode == kIA32S1x8AllTrue) {
3623 __ Pcmpeqw(kScratchDoubleReg, src);
3624 } else {
3625 __ Pcmpeqb(kScratchDoubleReg, src);
3626 }
3627 // If kScratchDoubleReg is all zero, none of src lanes are false.
3628 __ Ptest(kScratchDoubleReg, kScratchDoubleReg);
3629 __ cmov(zero, dst, tmp);
3630 break;
3631 }
3632 case kIA32StackCheck: {
3633 ExternalReference const stack_limit =
3634 ExternalReference::address_of_stack_limit(__ isolate());
3635 __ VerifyRootRegister();
3636 __ cmp(esp, tasm()->StaticVariable(stack_limit));
3637 break;
3638 }
3639 case kIA32Word32AtomicPairLoad: {
3640 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
3641 __ movq(tmp, i.MemoryOperand());
3642 __ Pextrd(i.OutputRegister(0), tmp, 0);
3643 __ Pextrd(i.OutputRegister(1), tmp, 1);
3644 break;
3645 }
3646 case kIA32Word32AtomicPairStore: {
3647 __ mov(i.TempRegister(0), i.MemoryOperand(2));
3648 __ mov(i.TempRegister(1), i.NextMemoryOperand(2));
3649 __ lock();
3650 __ cmpxchg8b(i.MemoryOperand(2));
3651 break;
3652 }
3653 case kWord32AtomicExchangeInt8: {
3654 __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
3655 __ movsx_b(i.InputRegister(0), i.InputRegister(0));
3656 break;
3657 }
3658 case kWord32AtomicExchangeUint8: {
3659 __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
3660 __ movzx_b(i.InputRegister(0), i.InputRegister(0));
3661 break;
3662 }
3663 case kWord32AtomicExchangeInt16: {
3664 __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
3665 __ movsx_w(i.InputRegister(0), i.InputRegister(0));
3666 break;
3667 }
3668 case kWord32AtomicExchangeUint16: {
3669 __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
3670 __ movzx_w(i.InputRegister(0), i.InputRegister(0));
3671 break;
3672 }
3673 case kWord32AtomicExchangeWord32: {
3674 __ xchg(i.InputRegister(0), i.MemoryOperand(1));
3675 break;
3676 }
3677 // For the narrow Word64 operations below, i.OutputRegister(1) contains
3678 // the high-order 32 bits for the 64bit operation. As the data exchange
3679 // fits in one register, the i.OutputRegister(1) needs to be cleared for
3680 // the correct return value to be propagated back.
3681 case kIA32Word64AtomicNarrowExchangeUint8: {
3682 __ xchg_b(i.OutputRegister(0), i.MemoryOperand(1));
3683 __ movzx_b(i.OutputRegister(0), i.OutputRegister(0));
3684 __ xor_(i.OutputRegister(1), i.OutputRegister(1));
3685 break;
3686 }
3687 case kIA32Word64AtomicNarrowExchangeUint16: {
3688 __ xchg_w(i.OutputRegister(0), i.MemoryOperand(1));
3689 __ movzx_w(i.OutputRegister(0), i.OutputRegister(0));
3690 __ xor_(i.OutputRegister(1), i.OutputRegister(1));
3691 break;
3692 }
3693 case kIA32Word64AtomicNarrowExchangeUint32: {
3694 __ xchg(i.OutputRegister(0), i.MemoryOperand(1));
3695 __ xor_(i.OutputRegister(1), i.OutputRegister(1));
3696 break;
3697 }
3698 case kIA32Word32AtomicPairExchange: {
3699 __ mov(i.OutputRegister(0), i.MemoryOperand(2));
3700 __ mov(i.OutputRegister(1), i.NextMemoryOperand(2));
3701 __ lock();
3702 __ cmpxchg8b(i.MemoryOperand(2));
3703 break;
3704 }
3705 case kWord32AtomicCompareExchangeInt8: {
3706 __ lock();
3707 __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
3708 __ movsx_b(eax, eax);
3709 break;
3710 }
3711 case kWord32AtomicCompareExchangeUint8: {
3712 __ lock();
3713 __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
3714 __ movzx_b(eax, eax);
3715 break;
3716 }
3717 case kWord32AtomicCompareExchangeInt16: {
3718 __ lock();
3719 __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
3720 __ movsx_w(eax, eax);
3721 break;
3722 }
3723 case kWord32AtomicCompareExchangeUint16: {
3724 __ lock();
3725 __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
3726 __ movzx_w(eax, eax);
3727 break;
3728 }
3729 case kWord32AtomicCompareExchangeWord32: {
3730 __ lock();
3731 __ cmpxchg(i.MemoryOperand(2), i.InputRegister(1));
3732 break;
3733 }
3734 case kIA32Word64AtomicNarrowCompareExchangeUint8: {
3735 __ lock();
3736 __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
3737 __ movzx_b(i.OutputRegister(0), i.OutputRegister(0));
3738 __ xor_(i.OutputRegister(1), i.OutputRegister(1));
3739 break;
3740 }
3741 case kIA32Word64AtomicNarrowCompareExchangeUint16: {
3742 __ lock();
3743 __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
3744 __ movzx_w(i.OutputRegister(0), i.OutputRegister(0));
3745 __ xor_(i.OutputRegister(1), i.OutputRegister(1));
3746 break;
3747 }
3748 case kIA32Word64AtomicNarrowCompareExchangeUint32: {
3749 __ lock();
3750 __ cmpxchg(i.MemoryOperand(2), i.InputRegister(1));
3751 __ xor_(i.OutputRegister(1), i.OutputRegister(1));
3752 break;
3753 }
3754 case kIA32Word32AtomicPairCompareExchange: {
3755 __ lock();
3756 __ cmpxchg8b(i.MemoryOperand(4));
3757 break;
3758 }
3759 #define ATOMIC_BINOP_CASE(op, inst) \
3760 case kWord32Atomic##op##Int8: { \
3761 ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
3762 __ movsx_b(eax, eax); \
3763 break; \
3764 } \
3765 case kIA32Word64AtomicNarrow##op##Uint8: { \
3766 ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
3767 __ movzx_b(i.OutputRegister(0), i.OutputRegister(0)); \
3768 __ xor_(i.OutputRegister(1), i.OutputRegister(1)); \
3769 break; \
3770 } \
3771 case kWord32Atomic##op##Uint8: { \
3772 ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
3773 __ movzx_b(eax, eax); \
3774 break; \
3775 } \
3776 case kWord32Atomic##op##Int16: { \
3777 ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
3778 __ movsx_w(eax, eax); \
3779 break; \
3780 } \
3781 case kIA32Word64AtomicNarrow##op##Uint16: { \
3782 ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
3783 __ movzx_w(i.OutputRegister(0), i.OutputRegister(0)); \
3784 __ xor_(i.OutputRegister(1), i.OutputRegister(1)); \
3785 break; \
3786 } \
3787 case kWord32Atomic##op##Uint16: { \
3788 ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
3789 __ movzx_w(eax, eax); \
3790 break; \
3791 } \
3792 case kIA32Word64AtomicNarrow##op##Uint32: { \
3793 ASSEMBLE_ATOMIC_BINOP(inst, mov, cmpxchg); \
3794 __ xor_(i.OutputRegister(1), i.OutputRegister(1)); \
3795 break; \
3796 } \
3797 case kWord32Atomic##op##Word32: { \
3798 ASSEMBLE_ATOMIC_BINOP(inst, mov, cmpxchg); \
3799 break; \
3800 }
3801 ATOMIC_BINOP_CASE(Add, add)
3802 ATOMIC_BINOP_CASE(Sub, sub)
3803 ATOMIC_BINOP_CASE(And, and_)
3804 ATOMIC_BINOP_CASE(Or, or_)
3805 ATOMIC_BINOP_CASE(Xor, xor_)
3806 #undef ATOMIC_BINOP_CASE
3807 #define ATOMIC_BINOP_CASE(op, instr1, instr2) \
3808 case kIA32Word32AtomicPair##op: { \
3809 ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \
3810 break; \
3811 }
3812 ATOMIC_BINOP_CASE(Add, add, adc)
3813 ATOMIC_BINOP_CASE(And, and_, and_)
3814 ATOMIC_BINOP_CASE(Or, or_, or_)
3815 ATOMIC_BINOP_CASE(Xor, xor_, xor_)
3816 #undef ATOMIC_BINOP_CASE
3817 case kIA32Word32AtomicPairSub: {
3818 Label binop;
3819 __ bind(&binop);
3820 // Move memory operand into edx:eax
3821 __ mov(i.OutputRegister(0), i.MemoryOperand(2));
3822 __ mov(i.OutputRegister(1), i.NextMemoryOperand(2));
3823 // Save input registers temporarily on the stack.
3824 __ push(i.InputRegister(0));
3825 __ push(i.InputRegister(1));
3826 // Negate input in place
3827 __ neg(i.InputRegister(0));
3828 __ adc(i.InputRegister(1), 0);
3829 __ neg(i.InputRegister(1));
3830 // Add memory operand, negated input.
3831 __ add(i.InputRegister(0), i.OutputRegister(0));
3832 __ adc(i.InputRegister(1), i.OutputRegister(1));
3833 __ lock();
3834 __ cmpxchg8b(i.MemoryOperand(2));
3835 // Restore input registers
3836 __ pop(i.InputRegister(1));
3837 __ pop(i.InputRegister(0));
3838 __ j(not_equal, &binop);
3839 break;
3840 }
3841 case kWord32AtomicLoadInt8:
3842 case kWord32AtomicLoadUint8:
3843 case kWord32AtomicLoadInt16:
3844 case kWord32AtomicLoadUint16:
3845 case kWord32AtomicLoadWord32:
3846 case kWord32AtomicStoreWord8:
3847 case kWord32AtomicStoreWord16:
3848 case kWord32AtomicStoreWord32:
3849 UNREACHABLE(); // Won't be generated by instruction selector.
3850 break;
3851 }
3852 return kSuccess;
3853 } // NOLINT(readability/fn_size)
3854
FlagsConditionToCondition(FlagsCondition condition)3855 static Condition FlagsConditionToCondition(FlagsCondition condition) {
3856 switch (condition) {
3857 case kUnorderedEqual:
3858 case kEqual:
3859 return equal;
3860 break;
3861 case kUnorderedNotEqual:
3862 case kNotEqual:
3863 return not_equal;
3864 break;
3865 case kSignedLessThan:
3866 return less;
3867 break;
3868 case kSignedGreaterThanOrEqual:
3869 return greater_equal;
3870 break;
3871 case kSignedLessThanOrEqual:
3872 return less_equal;
3873 break;
3874 case kSignedGreaterThan:
3875 return greater;
3876 break;
3877 case kUnsignedLessThan:
3878 return below;
3879 break;
3880 case kUnsignedGreaterThanOrEqual:
3881 return above_equal;
3882 break;
3883 case kUnsignedLessThanOrEqual:
3884 return below_equal;
3885 break;
3886 case kUnsignedGreaterThan:
3887 return above;
3888 break;
3889 case kOverflow:
3890 return overflow;
3891 break;
3892 case kNotOverflow:
3893 return no_overflow;
3894 break;
3895 default:
3896 UNREACHABLE();
3897 break;
3898 }
3899 }
3900
3901 // Assembles a branch after an instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)3902 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3903 Label::Distance flabel_distance =
3904 branch->fallthru ? Label::kNear : Label::kFar;
3905 Label* tlabel = branch->true_label;
3906 Label* flabel = branch->false_label;
3907 if (branch->condition == kUnorderedEqual) {
3908 __ j(parity_even, flabel, flabel_distance);
3909 } else if (branch->condition == kUnorderedNotEqual) {
3910 __ j(parity_even, tlabel);
3911 }
3912 __ j(FlagsConditionToCondition(branch->condition), tlabel);
3913
3914 // Add a jump if not falling through to the next block.
3915 if (!branch->fallthru) __ jmp(flabel);
3916 }
3917
AssembleBranchPoisoning(FlagsCondition condition,Instruction * instr)3918 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
3919 Instruction* instr) {
3920 // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
3921 if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
3922 return;
3923 }
3924
3925 condition = NegateFlagsCondition(condition);
3926 __ setcc(FlagsConditionToCondition(condition), kSpeculationPoisonRegister);
3927 __ add(kSpeculationPoisonRegister, Immediate(255));
3928 __ sar(kSpeculationPoisonRegister, 31u);
3929 }
3930
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)3931 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
3932 BranchInfo* branch) {
3933 AssembleArchBranch(instr, branch);
3934 }
3935
AssembleArchJump(RpoNumber target)3936 void CodeGenerator::AssembleArchJump(RpoNumber target) {
3937 if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
3938 }
3939
AssembleArchTrap(Instruction * instr,FlagsCondition condition)3940 void CodeGenerator::AssembleArchTrap(Instruction* instr,
3941 FlagsCondition condition) {
3942 class OutOfLineTrap final : public OutOfLineCode {
3943 public:
3944 OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
3945 : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
3946
3947 void Generate() final {
3948 IA32OperandConverter i(gen_, instr_);
3949 TrapId trap_id =
3950 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
3951 GenerateCallToTrap(trap_id);
3952 }
3953
3954 private:
3955 void GenerateCallToTrap(TrapId trap_id) {
3956 if (trap_id == TrapId::kInvalid) {
3957 // We cannot test calls to the runtime in cctest/test-run-wasm.
3958 // Therefore we emit a call to C here instead of a call to the runtime.
3959 __ PrepareCallCFunction(0, esi);
3960 __ CallCFunction(
3961 ExternalReference::wasm_call_trap_callback_for_testing(), 0);
3962 __ LeaveFrame(StackFrame::WASM_COMPILED);
3963 auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
3964 size_t pop_size = call_descriptor->StackParameterCount() * kPointerSize;
3965 // Use ecx as a scratch register, we return anyways immediately.
3966 __ Ret(static_cast<int>(pop_size), ecx);
3967 } else {
3968 gen_->AssembleSourcePosition(instr_);
3969 // A direct call to a wasm runtime stub defined in this module.
3970 // Just encode the stub index. This will be patched at relocation.
3971 __ wasm_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
3972 ReferenceMap* reference_map =
3973 new (gen_->zone()) ReferenceMap(gen_->zone());
3974 gen_->RecordSafepoint(reference_map, Safepoint::kSimple, 0,
3975 Safepoint::kNoLazyDeopt);
3976 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
3977 }
3978 }
3979
3980 Instruction* instr_;
3981 CodeGenerator* gen_;
3982 };
3983 auto ool = new (zone()) OutOfLineTrap(this, instr);
3984 Label* tlabel = ool->entry();
3985 Label end;
3986 if (condition == kUnorderedEqual) {
3987 __ j(parity_even, &end);
3988 } else if (condition == kUnorderedNotEqual) {
3989 __ j(parity_even, tlabel);
3990 }
3991 __ j(FlagsConditionToCondition(condition), tlabel);
3992 __ bind(&end);
3993 }
3994
3995 // Assembles boolean materializations after an instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)3996 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
3997 FlagsCondition condition) {
3998 IA32OperandConverter i(this, instr);
3999 Label done;
4000
4001 // Materialize a full 32-bit 1 or 0 value. The result register is always the
4002 // last output of the instruction.
4003 Label check;
4004 DCHECK_NE(0u, instr->OutputCount());
4005 Register reg = i.OutputRegister(instr->OutputCount() - 1);
4006 if (condition == kUnorderedEqual) {
4007 __ j(parity_odd, &check, Label::kNear);
4008 __ Move(reg, Immediate(0));
4009 __ jmp(&done, Label::kNear);
4010 } else if (condition == kUnorderedNotEqual) {
4011 __ j(parity_odd, &check, Label::kNear);
4012 __ mov(reg, Immediate(1));
4013 __ jmp(&done, Label::kNear);
4014 }
4015 Condition cc = FlagsConditionToCondition(condition);
4016
4017 __ bind(&check);
4018 if (reg.is_byte_register()) {
4019 // setcc for byte registers (al, bl, cl, dl).
4020 __ setcc(cc, reg);
4021 __ movzx_b(reg, reg);
4022 } else {
4023 // Emit a branch to set a register to either 1 or 0.
4024 Label set;
4025 __ j(cc, &set, Label::kNear);
4026 __ Move(reg, Immediate(0));
4027 __ jmp(&done, Label::kNear);
4028 __ bind(&set);
4029 __ mov(reg, Immediate(1));
4030 }
4031 __ bind(&done);
4032 }
4033
AssembleArchBinarySearchSwitch(Instruction * instr)4034 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
4035 IA32OperandConverter i(this, instr);
4036 Register input = i.InputRegister(0);
4037 std::vector<std::pair<int32_t, Label*>> cases;
4038 for (size_t index = 2; index < instr->InputCount(); index += 2) {
4039 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
4040 }
4041 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
4042 cases.data() + cases.size());
4043 }
4044
AssembleArchLookupSwitch(Instruction * instr)4045 void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
4046 IA32OperandConverter i(this, instr);
4047 Register input = i.InputRegister(0);
4048 for (size_t index = 2; index < instr->InputCount(); index += 2) {
4049 __ cmp(input, Immediate(i.InputInt32(index + 0)));
4050 __ j(equal, GetLabel(i.InputRpo(index + 1)));
4051 }
4052 AssembleArchJump(i.InputRpo(1));
4053 }
4054
4055
AssembleArchTableSwitch(Instruction * instr)4056 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
4057 IA32OperandConverter i(this, instr);
4058 Register input = i.InputRegister(0);
4059 size_t const case_count = instr->InputCount() - 2;
4060 Label** cases = zone()->NewArray<Label*>(case_count);
4061 for (size_t index = 0; index < case_count; ++index) {
4062 cases[index] = GetLabel(i.InputRpo(index + 2));
4063 }
4064 Label* const table = AddJumpTable(cases, case_count);
4065 __ cmp(input, Immediate(case_count));
4066 __ j(above_equal, GetLabel(i.InputRpo(1)));
4067 __ jmp(Operand::JumpTable(input, times_4, table));
4068 }
4069
4070
4071 // The calling convention for JSFunctions on IA32 passes arguments on the
4072 // stack and the JSFunction and context in EDI and ESI, respectively, thus
4073 // the steps of the call look as follows:
4074
4075 // --{ before the call instruction }--------------------------------------------
4076 // | caller frame |
4077 // ^ esp ^ ebp
4078
4079 // --{ push arguments and setup ESI, EDI }--------------------------------------
4080 // | args + receiver | caller frame |
4081 // ^ esp ^ ebp
4082 // [edi = JSFunction, esi = context]
4083
4084 // --{ call [edi + kCodeEntryOffset] }------------------------------------------
4085 // | RET | args + receiver | caller frame |
4086 // ^ esp ^ ebp
4087
4088 // =={ prologue of called function }============================================
4089 // --{ push ebp }---------------------------------------------------------------
4090 // | FP | RET | args + receiver | caller frame |
4091 // ^ esp ^ ebp
4092
4093 // --{ mov ebp, esp }-----------------------------------------------------------
4094 // | FP | RET | args + receiver | caller frame |
4095 // ^ ebp,esp
4096
4097 // --{ push esi }---------------------------------------------------------------
4098 // | CTX | FP | RET | args + receiver | caller frame |
4099 // ^esp ^ ebp
4100
4101 // --{ push edi }---------------------------------------------------------------
4102 // | FNC | CTX | FP | RET | args + receiver | caller frame |
4103 // ^esp ^ ebp
4104
4105 // --{ subi esp, #N }-----------------------------------------------------------
4106 // | callee frame | FNC | CTX | FP | RET | args + receiver | caller frame |
4107 // ^esp ^ ebp
4108
4109 // =={ body of called function }================================================
4110
4111 // =={ epilogue of called function }============================================
4112 // --{ mov esp, ebp }-----------------------------------------------------------
4113 // | FP | RET | args + receiver | caller frame |
4114 // ^ esp,ebp
4115
4116 // --{ pop ebp }-----------------------------------------------------------
4117 // | | RET | args + receiver | caller frame |
4118 // ^ esp ^ ebp
4119
4120 // --{ ret #A+1 }-----------------------------------------------------------
4121 // | | caller frame |
4122 // ^ esp ^ ebp
4123
4124 // Runtime function calls are accomplished by doing a stub call to the
4125 // CEntry (a real code object). On IA32 passes arguments on the
4126 // stack, the number of arguments in EAX, the address of the runtime function
4127 // in EBX, and the context in ESI.
4128
4129 // --{ before the call instruction }--------------------------------------------
4130 // | caller frame |
4131 // ^ esp ^ ebp
4132
4133 // --{ push arguments and setup EAX, EBX, and ESI }-----------------------------
4134 // | args + receiver | caller frame |
4135 // ^ esp ^ ebp
4136 // [eax = #args, ebx = runtime function, esi = context]
4137
4138 // --{ call #CEntry }-----------------------------------------------------------
4139 // | RET | args + receiver | caller frame |
4140 // ^ esp ^ ebp
4141
4142 // =={ body of runtime function }===============================================
4143
4144 // --{ runtime returns }--------------------------------------------------------
4145 // | caller frame |
4146 // ^ esp ^ ebp
4147
4148 // Other custom linkages (e.g. for calling directly into and out of C++) may
4149 // need to save callee-saved registers on the stack, which is done in the
4150 // function prologue of generated code.
4151
4152 // --{ before the call instruction }--------------------------------------------
4153 // | caller frame |
4154 // ^ esp ^ ebp
4155
4156 // --{ set up arguments in registers on stack }---------------------------------
4157 // | args | caller frame |
4158 // ^ esp ^ ebp
4159 // [r0 = arg0, r1 = arg1, ...]
4160
4161 // --{ call code }--------------------------------------------------------------
4162 // | RET | args | caller frame |
4163 // ^ esp ^ ebp
4164
4165 // =={ prologue of called function }============================================
4166 // --{ push ebp }---------------------------------------------------------------
4167 // | FP | RET | args | caller frame |
4168 // ^ esp ^ ebp
4169
4170 // --{ mov ebp, esp }-----------------------------------------------------------
4171 // | FP | RET | args | caller frame |
4172 // ^ ebp,esp
4173
4174 // --{ save registers }---------------------------------------------------------
4175 // | regs | FP | RET | args | caller frame |
4176 // ^ esp ^ ebp
4177
4178 // --{ subi esp, #N }-----------------------------------------------------------
4179 // | callee frame | regs | FP | RET | args | caller frame |
4180 // ^esp ^ ebp
4181
4182 // =={ body of called function }================================================
4183
4184 // =={ epilogue of called function }============================================
4185 // --{ restore registers }------------------------------------------------------
4186 // | regs | FP | RET | args | caller frame |
4187 // ^ esp ^ ebp
4188
4189 // --{ mov esp, ebp }-----------------------------------------------------------
4190 // | FP | RET | args | caller frame |
4191 // ^ esp,ebp
4192
4193 // --{ pop ebp }----------------------------------------------------------------
4194 // | RET | args | caller frame |
4195 // ^ esp ^ ebp
4196
FinishFrame(Frame * frame)4197 void CodeGenerator::FinishFrame(Frame* frame) {
4198 auto call_descriptor = linkage()->GetIncomingDescriptor();
4199 const RegList saves = call_descriptor->CalleeSavedRegisters();
4200 if (saves != 0) { // Save callee-saved registers.
4201 DCHECK(!info()->is_osr());
4202 int pushed = 0;
4203 for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4204 if (!((1 << i) & saves)) continue;
4205 ++pushed;
4206 }
4207 frame->AllocateSavedCalleeRegisterSlots(pushed);
4208 }
4209 }
4210
AssembleConstructFrame()4211 void CodeGenerator::AssembleConstructFrame() {
4212 auto call_descriptor = linkage()->GetIncomingDescriptor();
4213 if (frame_access_state()->has_frame()) {
4214 if (call_descriptor->IsCFunctionCall()) {
4215 __ push(ebp);
4216 __ mov(ebp, esp);
4217 } else if (call_descriptor->IsJSFunctionCall()) {
4218 __ Prologue();
4219 if (call_descriptor->PushArgumentCount()) {
4220 __ push(kJavaScriptCallArgCountRegister);
4221 }
4222 } else {
4223 __ StubPrologue(info()->GetOutputStackFrameType());
4224 if (call_descriptor->IsWasmFunctionCall()) {
4225 __ push(kWasmInstanceRegister);
4226 }
4227 }
4228 }
4229
4230 int shrink_slots = frame()->GetTotalFrameSlotCount() -
4231 call_descriptor->CalculateFixedFrameSize();
4232
4233 if (info()->is_osr()) {
4234 // TurboFan OSR-compiled functions cannot be entered directly.
4235 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
4236
4237 // Unoptimized code jumps directly to this entrypoint while the unoptimized
4238 // frame is still on the stack. Optimized code uses OSR values directly from
4239 // the unoptimized frame. Thus, all that needs to be done is to allocate the
4240 // remaining stack slots.
4241 if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
4242 osr_pc_offset_ = __ pc_offset();
4243 shrink_slots -= osr_helper()->UnoptimizedFrameSlots();
4244 ResetSpeculationPoison();
4245 }
4246
4247 const RegList saves = call_descriptor->CalleeSavedRegisters();
4248 if (shrink_slots > 0) {
4249 DCHECK(frame_access_state()->has_frame());
4250 if (info()->IsWasm() && shrink_slots > 128) {
4251 // For WebAssembly functions with big frames we have to do the stack
4252 // overflow check before we construct the frame. Otherwise we may not
4253 // have enough space on the stack to call the runtime for the stack
4254 // overflow.
4255 Label done;
4256
4257 // If the frame is bigger than the stack, we throw the stack overflow
4258 // exception unconditionally. Thereby we can avoid the integer overflow
4259 // check in the condition code.
4260 if (shrink_slots * kPointerSize < FLAG_stack_size * 1024) {
4261 Register scratch = esi;
4262 __ push(scratch);
4263 __ mov(scratch,
4264 FieldOperand(kWasmInstanceRegister,
4265 WasmInstanceObject::kRealStackLimitAddressOffset));
4266 __ mov(scratch, Operand(scratch, 0));
4267 __ add(scratch, Immediate(shrink_slots * kPointerSize));
4268 __ cmp(esp, scratch);
4269 __ pop(scratch);
4270 __ j(above_equal, &done);
4271 }
4272 __ mov(ecx, FieldOperand(kWasmInstanceRegister,
4273 WasmInstanceObject::kCEntryStubOffset));
4274 __ Move(esi, Smi::kZero);
4275 __ CallRuntimeWithCEntry(Runtime::kThrowWasmStackOverflow, ecx);
4276 ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
4277 RecordSafepoint(reference_map, Safepoint::kSimple, 0,
4278 Safepoint::kNoLazyDeopt);
4279 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4280 __ bind(&done);
4281 }
4282
4283 // Skip callee-saved and return slots, which are created below.
4284 shrink_slots -= base::bits::CountPopulation(saves);
4285 shrink_slots -= frame()->GetReturnSlotCount();
4286 if (shrink_slots > 0) {
4287 __ sub(esp, Immediate(shrink_slots * kPointerSize));
4288 }
4289 }
4290
4291 if (saves != 0) { // Save callee-saved registers.
4292 DCHECK(!info()->is_osr());
4293 for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4294 if (((1 << i) & saves)) __ push(Register::from_code(i));
4295 }
4296 }
4297
4298 // Allocate return slots (located after callee-saved).
4299 if (frame()->GetReturnSlotCount() > 0) {
4300 __ sub(esp, Immediate(frame()->GetReturnSlotCount() * kPointerSize));
4301 }
4302 }
4303
AssembleReturn(InstructionOperand * pop)4304 void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
4305 auto call_descriptor = linkage()->GetIncomingDescriptor();
4306
4307 const RegList saves = call_descriptor->CalleeSavedRegisters();
4308 // Restore registers.
4309 if (saves != 0) {
4310 const int returns = frame()->GetReturnSlotCount();
4311 if (returns != 0) {
4312 __ add(esp, Immediate(returns * kPointerSize));
4313 }
4314 for (int i = 0; i < Register::kNumRegisters; i++) {
4315 if (!((1 << i) & saves)) continue;
4316 __ pop(Register::from_code(i));
4317 }
4318 }
4319
4320 // Might need ecx for scratch if pop_size is too big or if there is a variable
4321 // pop count.
4322 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & ecx.bit());
4323 size_t pop_size = call_descriptor->StackParameterCount() * kPointerSize;
4324 IA32OperandConverter g(this, nullptr);
4325 if (call_descriptor->IsCFunctionCall()) {
4326 AssembleDeconstructFrame();
4327 } else if (frame_access_state()->has_frame()) {
4328 // Canonicalize JSFunction return sites for now if they always have the same
4329 // number of return args.
4330 if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
4331 if (return_label_.is_bound()) {
4332 __ jmp(&return_label_);
4333 return;
4334 } else {
4335 __ bind(&return_label_);
4336 AssembleDeconstructFrame();
4337 }
4338 } else {
4339 AssembleDeconstructFrame();
4340 }
4341 }
4342 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & edx.bit());
4343 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & ecx.bit());
4344 if (pop->IsImmediate()) {
4345 DCHECK_EQ(Constant::kInt32, g.ToConstant(pop).type());
4346 pop_size += g.ToConstant(pop).ToInt32() * kPointerSize;
4347 __ Ret(static_cast<int>(pop_size), ecx);
4348 } else {
4349 Register pop_reg = g.ToRegister(pop);
4350 Register scratch_reg = pop_reg == ecx ? edx : ecx;
4351 __ pop(scratch_reg);
4352 __ lea(esp, Operand(esp, pop_reg, times_4, static_cast<int>(pop_size)));
4353 __ jmp(scratch_reg);
4354 }
4355 }
4356
FinishCode()4357 void CodeGenerator::FinishCode() {}
4358
AssembleMove(InstructionOperand * source,InstructionOperand * destination)4359 void CodeGenerator::AssembleMove(InstructionOperand* source,
4360 InstructionOperand* destination) {
4361 IA32OperandConverter g(this, nullptr);
4362 // Dispatch on the source and destination operand kinds.
4363 switch (MoveType::InferMove(source, destination)) {
4364 case MoveType::kRegisterToRegister:
4365 if (source->IsRegister()) {
4366 __ mov(g.ToRegister(destination), g.ToRegister(source));
4367 } else {
4368 DCHECK(source->IsFPRegister());
4369 __ movaps(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4370 }
4371 return;
4372 case MoveType::kRegisterToStack: {
4373 Operand dst = g.ToOperand(destination);
4374 if (source->IsRegister()) {
4375 __ mov(dst, g.ToRegister(source));
4376 } else {
4377 DCHECK(source->IsFPRegister());
4378 XMMRegister src = g.ToDoubleRegister(source);
4379 MachineRepresentation rep =
4380 LocationOperand::cast(source)->representation();
4381 if (rep == MachineRepresentation::kFloat32) {
4382 __ movss(dst, src);
4383 } else if (rep == MachineRepresentation::kFloat64) {
4384 __ movsd(dst, src);
4385 } else {
4386 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4387 __ movups(dst, src);
4388 }
4389 }
4390 return;
4391 }
4392 case MoveType::kStackToRegister: {
4393 Operand src = g.ToOperand(source);
4394 if (source->IsStackSlot()) {
4395 __ mov(g.ToRegister(destination), src);
4396 } else {
4397 DCHECK(source->IsFPStackSlot());
4398 XMMRegister dst = g.ToDoubleRegister(destination);
4399 MachineRepresentation rep =
4400 LocationOperand::cast(source)->representation();
4401 if (rep == MachineRepresentation::kFloat32) {
4402 __ movss(dst, src);
4403 } else if (rep == MachineRepresentation::kFloat64) {
4404 __ movsd(dst, src);
4405 } else {
4406 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4407 __ movups(dst, src);
4408 }
4409 }
4410 return;
4411 }
4412 case MoveType::kStackToStack: {
4413 Operand src = g.ToOperand(source);
4414 Operand dst = g.ToOperand(destination);
4415 if (source->IsStackSlot()) {
4416 __ push(src);
4417 __ pop(dst);
4418 } else {
4419 MachineRepresentation rep =
4420 LocationOperand::cast(source)->representation();
4421 if (rep == MachineRepresentation::kFloat32) {
4422 __ movss(kScratchDoubleReg, src);
4423 __ movss(dst, kScratchDoubleReg);
4424 } else if (rep == MachineRepresentation::kFloat64) {
4425 __ movsd(kScratchDoubleReg, src);
4426 __ movsd(dst, kScratchDoubleReg);
4427 } else {
4428 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4429 __ movups(kScratchDoubleReg, src);
4430 __ movups(dst, kScratchDoubleReg);
4431 }
4432 }
4433 return;
4434 }
4435 case MoveType::kConstantToRegister: {
4436 Constant src = g.ToConstant(source);
4437 if (destination->IsRegister()) {
4438 Register dst = g.ToRegister(destination);
4439 if (src.type() == Constant::kHeapObject) {
4440 __ Move(dst, src.ToHeapObject());
4441 } else {
4442 __ Move(dst, g.ToImmediate(source));
4443 }
4444 } else {
4445 DCHECK(destination->IsFPRegister());
4446 XMMRegister dst = g.ToDoubleRegister(destination);
4447 if (src.type() == Constant::kFloat32) {
4448 // TODO(turbofan): Can we do better here?
4449 __ Move(dst, src.ToFloat32AsInt());
4450 } else {
4451 DCHECK_EQ(src.type(), Constant::kFloat64);
4452 __ Move(dst, src.ToFloat64().AsUint64());
4453 }
4454 }
4455 return;
4456 }
4457 case MoveType::kConstantToStack: {
4458 Constant src = g.ToConstant(source);
4459 Operand dst = g.ToOperand(destination);
4460 if (destination->IsStackSlot()) {
4461 if (src.type() == Constant::kHeapObject) {
4462 __ mov(dst, src.ToHeapObject());
4463 } else {
4464 __ Move(dst, g.ToImmediate(source));
4465 }
4466 } else {
4467 DCHECK(destination->IsFPStackSlot());
4468 if (src.type() == Constant::kFloat32) {
4469 __ Move(dst, Immediate(src.ToFloat32AsInt()));
4470 } else {
4471 DCHECK_EQ(src.type(), Constant::kFloat64);
4472 uint64_t constant_value = src.ToFloat64().AsUint64();
4473 uint32_t lower = static_cast<uint32_t>(constant_value);
4474 uint32_t upper = static_cast<uint32_t>(constant_value >> 32);
4475 Operand dst0 = dst;
4476 Operand dst1 = g.ToOperand(destination, kPointerSize);
4477 __ Move(dst0, Immediate(lower));
4478 __ Move(dst1, Immediate(upper));
4479 }
4480 }
4481 return;
4482 }
4483 }
4484 UNREACHABLE();
4485 }
4486
4487
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)4488 void CodeGenerator::AssembleSwap(InstructionOperand* source,
4489 InstructionOperand* destination) {
4490 IA32OperandConverter g(this, nullptr);
4491 // Dispatch on the source and destination operand kinds. Not all
4492 // combinations are possible.
4493 switch (MoveType::InferSwap(source, destination)) {
4494 case MoveType::kRegisterToRegister: {
4495 if (source->IsRegister()) {
4496 Register src = g.ToRegister(source);
4497 Register dst = g.ToRegister(destination);
4498 __ push(src);
4499 __ mov(src, dst);
4500 __ pop(dst);
4501 } else {
4502 DCHECK(source->IsFPRegister());
4503 XMMRegister src = g.ToDoubleRegister(source);
4504 XMMRegister dst = g.ToDoubleRegister(destination);
4505 __ movaps(kScratchDoubleReg, src);
4506 __ movaps(src, dst);
4507 __ movaps(dst, kScratchDoubleReg);
4508 }
4509 return;
4510 }
4511 case MoveType::kRegisterToStack: {
4512 if (source->IsRegister()) {
4513 Register src = g.ToRegister(source);
4514 __ push(src);
4515 frame_access_state()->IncreaseSPDelta(1);
4516 Operand dst = g.ToOperand(destination);
4517 __ mov(src, dst);
4518 frame_access_state()->IncreaseSPDelta(-1);
4519 dst = g.ToOperand(destination);
4520 __ pop(dst);
4521 } else {
4522 DCHECK(source->IsFPRegister());
4523 XMMRegister src = g.ToDoubleRegister(source);
4524 Operand dst = g.ToOperand(destination);
4525 MachineRepresentation rep =
4526 LocationOperand::cast(source)->representation();
4527 if (rep == MachineRepresentation::kFloat32) {
4528 __ movss(kScratchDoubleReg, dst);
4529 __ movss(dst, src);
4530 __ movaps(src, kScratchDoubleReg);
4531 } else if (rep == MachineRepresentation::kFloat64) {
4532 __ movsd(kScratchDoubleReg, dst);
4533 __ movsd(dst, src);
4534 __ movaps(src, kScratchDoubleReg);
4535 } else {
4536 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4537 __ movups(kScratchDoubleReg, dst);
4538 __ movups(dst, src);
4539 __ movups(src, kScratchDoubleReg);
4540 }
4541 }
4542 return;
4543 }
4544 case MoveType::kStackToStack: {
4545 if (source->IsStackSlot()) {
4546 Operand dst1 = g.ToOperand(destination);
4547 __ push(dst1);
4548 frame_access_state()->IncreaseSPDelta(1);
4549 Operand src1 = g.ToOperand(source);
4550 __ push(src1);
4551 Operand dst2 = g.ToOperand(destination);
4552 __ pop(dst2);
4553 frame_access_state()->IncreaseSPDelta(-1);
4554 Operand src2 = g.ToOperand(source);
4555 __ pop(src2);
4556 } else {
4557 DCHECK(source->IsFPStackSlot());
4558 Operand src0 = g.ToOperand(source);
4559 Operand dst0 = g.ToOperand(destination);
4560 MachineRepresentation rep =
4561 LocationOperand::cast(source)->representation();
4562 if (rep == MachineRepresentation::kFloat32) {
4563 __ movss(kScratchDoubleReg, dst0); // Save dst in scratch register.
4564 __ push(src0); // Then use stack to copy src to destination.
4565 __ pop(dst0);
4566 __ movss(src0, kScratchDoubleReg);
4567 } else if (rep == MachineRepresentation::kFloat64) {
4568 __ movsd(kScratchDoubleReg, dst0); // Save dst in scratch register.
4569 __ push(src0); // Then use stack to copy src to destination.
4570 __ pop(dst0);
4571 __ push(g.ToOperand(source, kPointerSize));
4572 __ pop(g.ToOperand(destination, kPointerSize));
4573 __ movsd(src0, kScratchDoubleReg);
4574 } else {
4575 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4576 __ movups(kScratchDoubleReg, dst0); // Save dst in scratch register.
4577 __ push(src0); // Then use stack to copy src to destination.
4578 __ pop(dst0);
4579 __ push(g.ToOperand(source, kPointerSize));
4580 __ pop(g.ToOperand(destination, kPointerSize));
4581 __ push(g.ToOperand(source, 2 * kPointerSize));
4582 __ pop(g.ToOperand(destination, 2 * kPointerSize));
4583 __ push(g.ToOperand(source, 3 * kPointerSize));
4584 __ pop(g.ToOperand(destination, 3 * kPointerSize));
4585 __ movups(src0, kScratchDoubleReg);
4586 }
4587 }
4588 return;
4589 }
4590 default:
4591 UNREACHABLE();
4592 break;
4593 }
4594 }
4595
4596
AssembleJumpTable(Label ** targets,size_t target_count)4597 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4598 for (size_t index = 0; index < target_count; ++index) {
4599 __ dd(targets[index]);
4600 }
4601 }
4602
4603 #undef __
4604 #undef kScratchDoubleReg
4605 #undef ASSEMBLE_COMPARE
4606 #undef ASSEMBLE_IEEE754_BINOP
4607 #undef ASSEMBLE_IEEE754_UNOP
4608 #undef ASSEMBLE_BINOP
4609 #undef ASSEMBLE_ATOMIC_BINOP
4610 #undef ASSEMBLE_I64ATOMIC_BINOP
4611 #undef ASSEMBLE_MOVX
4612 #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
4613 #undef ASSEMBLE_SIMD_IMM_SHUFFLE
4614
4615 } // namespace compiler
4616 } // namespace internal
4617 } // namespace v8
4618