1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/base/overflowing-math.h"
6 #include "src/codegen/assembler-inl.h"
7 #include "src/codegen/callable.h"
8 #include "src/codegen/ia32/assembler-ia32.h"
9 #include "src/codegen/macro-assembler.h"
10 #include "src/codegen/optimized-compilation-info.h"
11 #include "src/compiler/backend/code-generator-impl.h"
12 #include "src/compiler/backend/code-generator.h"
13 #include "src/compiler/backend/gap-resolver.h"
14 #include "src/compiler/node-matchers.h"
15 #include "src/compiler/osr.h"
16 #include "src/execution/frame-constants.h"
17 #include "src/execution/frames.h"
18 #include "src/heap/memory-chunk.h"
19 #include "src/objects/smi.h"
20 #include "src/wasm/wasm-code-manager.h"
21 #include "src/wasm/wasm-objects.h"
22
23 namespace v8 {
24 namespace internal {
25 namespace compiler {
26
27 #define __ tasm()->
28
29 #define kScratchDoubleReg xmm0
30
31 // Adds IA-32 specific methods for decoding operands.
32 class IA32OperandConverter : public InstructionOperandConverter {
33 public:
IA32OperandConverter(CodeGenerator * gen,Instruction * instr)34 IA32OperandConverter(CodeGenerator* gen, Instruction* instr)
35 : InstructionOperandConverter(gen, instr) {}
36
InputOperand(size_t index,int extra=0)37 Operand InputOperand(size_t index, int extra = 0) {
38 return ToOperand(instr_->InputAt(index), extra);
39 }
40
InputImmediate(size_t index)41 Immediate InputImmediate(size_t index) {
42 return ToImmediate(instr_->InputAt(index));
43 }
44
OutputOperand()45 Operand OutputOperand() { return ToOperand(instr_->Output()); }
46
ToOperand(InstructionOperand * op,int extra=0)47 Operand ToOperand(InstructionOperand* op, int extra = 0) {
48 if (op->IsRegister()) {
49 DCHECK_EQ(0, extra);
50 return Operand(ToRegister(op));
51 } else if (op->IsFPRegister()) {
52 DCHECK_EQ(0, extra);
53 return Operand(ToDoubleRegister(op));
54 }
55 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
56 return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
57 }
58
SlotToOperand(int slot,int extra=0)59 Operand SlotToOperand(int slot, int extra = 0) {
60 FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
61 return Operand(offset.from_stack_pointer() ? esp : ebp,
62 offset.offset() + extra);
63 }
64
ToImmediate(InstructionOperand * operand)65 Immediate ToImmediate(InstructionOperand* operand) {
66 Constant constant = ToConstant(operand);
67 if (constant.type() == Constant::kInt32 &&
68 RelocInfo::IsWasmReference(constant.rmode())) {
69 return Immediate(static_cast<Address>(constant.ToInt32()),
70 constant.rmode());
71 }
72 switch (constant.type()) {
73 case Constant::kInt32:
74 return Immediate(constant.ToInt32());
75 case Constant::kFloat32:
76 return Immediate::EmbeddedNumber(constant.ToFloat32());
77 case Constant::kFloat64:
78 return Immediate::EmbeddedNumber(constant.ToFloat64().value());
79 case Constant::kExternalReference:
80 return Immediate(constant.ToExternalReference());
81 case Constant::kHeapObject:
82 return Immediate(constant.ToHeapObject());
83 case Constant::kCompressedHeapObject:
84 break;
85 case Constant::kDelayedStringConstant:
86 return Immediate::EmbeddedStringConstant(
87 constant.ToDelayedStringConstant());
88 case Constant::kInt64:
89 break;
90 case Constant::kRpoNumber:
91 return Immediate::CodeRelativeOffset(ToLabel(operand));
92 }
93 UNREACHABLE();
94 }
95
NextOffset(size_t * offset)96 static size_t NextOffset(size_t* offset) {
97 size_t i = *offset;
98 (*offset)++;
99 return i;
100 }
101
ScaleFor(AddressingMode one,AddressingMode mode)102 static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
103 STATIC_ASSERT(0 == static_cast<int>(times_1));
104 STATIC_ASSERT(1 == static_cast<int>(times_2));
105 STATIC_ASSERT(2 == static_cast<int>(times_4));
106 STATIC_ASSERT(3 == static_cast<int>(times_8));
107 int scale = static_cast<int>(mode - one);
108 DCHECK(scale >= 0 && scale < 4);
109 return static_cast<ScaleFactor>(scale);
110 }
111
MemoryOperand(size_t * offset)112 Operand MemoryOperand(size_t* offset) {
113 AddressingMode mode = AddressingModeField::decode(instr_->opcode());
114 switch (mode) {
115 case kMode_MR: {
116 Register base = InputRegister(NextOffset(offset));
117 int32_t disp = 0;
118 return Operand(base, disp);
119 }
120 case kMode_MRI: {
121 Register base = InputRegister(NextOffset(offset));
122 Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
123 return Operand(base, ctant.ToInt32(), ctant.rmode());
124 }
125 case kMode_MR1:
126 case kMode_MR2:
127 case kMode_MR4:
128 case kMode_MR8: {
129 Register base = InputRegister(NextOffset(offset));
130 Register index = InputRegister(NextOffset(offset));
131 ScaleFactor scale = ScaleFor(kMode_MR1, mode);
132 int32_t disp = 0;
133 return Operand(base, index, scale, disp);
134 }
135 case kMode_MR1I:
136 case kMode_MR2I:
137 case kMode_MR4I:
138 case kMode_MR8I: {
139 Register base = InputRegister(NextOffset(offset));
140 Register index = InputRegister(NextOffset(offset));
141 ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
142 Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
143 return Operand(base, index, scale, ctant.ToInt32(), ctant.rmode());
144 }
145 case kMode_M1:
146 case kMode_M2:
147 case kMode_M4:
148 case kMode_M8: {
149 Register index = InputRegister(NextOffset(offset));
150 ScaleFactor scale = ScaleFor(kMode_M1, mode);
151 int32_t disp = 0;
152 return Operand(index, scale, disp);
153 }
154 case kMode_M1I:
155 case kMode_M2I:
156 case kMode_M4I:
157 case kMode_M8I: {
158 Register index = InputRegister(NextOffset(offset));
159 ScaleFactor scale = ScaleFor(kMode_M1I, mode);
160 Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
161 return Operand(index, scale, ctant.ToInt32(), ctant.rmode());
162 }
163 case kMode_MI: {
164 Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
165 return Operand(ctant.ToInt32(), ctant.rmode());
166 }
167 case kMode_Root: {
168 Register base = kRootRegister;
169 int32_t disp = InputInt32(NextOffset(offset));
170 return Operand(base, disp);
171 }
172 case kMode_None:
173 UNREACHABLE();
174 }
175 UNREACHABLE();
176 }
177
MemoryOperand(size_t first_input=0)178 Operand MemoryOperand(size_t first_input = 0) {
179 return MemoryOperand(&first_input);
180 }
181
NextMemoryOperand(size_t offset=0)182 Operand NextMemoryOperand(size_t offset = 0) {
183 AddressingMode mode = AddressingModeField::decode(instr_->opcode());
184 Register base = InputRegister(NextOffset(&offset));
185 const int32_t disp = 4;
186 if (mode == kMode_MR1) {
187 Register index = InputRegister(NextOffset(&offset));
188 ScaleFactor scale = ScaleFor(kMode_MR1, kMode_MR1);
189 return Operand(base, index, scale, disp);
190 } else if (mode == kMode_MRI) {
191 Constant ctant = ToConstant(instr_->InputAt(NextOffset(&offset)));
192 return Operand(base, ctant.ToInt32() + disp, ctant.rmode());
193 } else {
194 UNREACHABLE();
195 }
196 }
197
MoveInstructionOperandToRegister(Register destination,InstructionOperand * op)198 void MoveInstructionOperandToRegister(Register destination,
199 InstructionOperand* op) {
200 if (op->IsImmediate() || op->IsConstant()) {
201 gen_->tasm()->mov(destination, ToImmediate(op));
202 } else if (op->IsRegister()) {
203 gen_->tasm()->Move(destination, ToRegister(op));
204 } else {
205 gen_->tasm()->mov(destination, ToOperand(op));
206 }
207 }
208 };
209
210 namespace {
211
HasAddressingMode(Instruction * instr)212 bool HasAddressingMode(Instruction* instr) {
213 return instr->addressing_mode() != kMode_None;
214 }
215
HasImmediateInput(Instruction * instr,size_t index)216 bool HasImmediateInput(Instruction* instr, size_t index) {
217 return instr->InputAt(index)->IsImmediate();
218 }
219
HasRegisterInput(Instruction * instr,size_t index)220 bool HasRegisterInput(Instruction* instr, size_t index) {
221 return instr->InputAt(index)->IsRegister();
222 }
223
224 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
225 public:
OutOfLineLoadFloat32NaN(CodeGenerator * gen,XMMRegister result)226 OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
227 : OutOfLineCode(gen), result_(result) {}
228
Generate()229 void Generate() final {
230 __ xorps(result_, result_);
231 __ divss(result_, result_);
232 }
233
234 private:
235 XMMRegister const result_;
236 };
237
238 class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
239 public:
OutOfLineLoadFloat64NaN(CodeGenerator * gen,XMMRegister result)240 OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
241 : OutOfLineCode(gen), result_(result) {}
242
Generate()243 void Generate() final {
244 __ xorpd(result_, result_);
245 __ divsd(result_, result_);
246 }
247
248 private:
249 XMMRegister const result_;
250 };
251
252 class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
253 public:
OutOfLineTruncateDoubleToI(CodeGenerator * gen,Register result,XMMRegister input,StubCallMode stub_mode)254 OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
255 XMMRegister input, StubCallMode stub_mode)
256 : OutOfLineCode(gen),
257 result_(result),
258 input_(input),
259 stub_mode_(stub_mode),
260 isolate_(gen->isolate()),
261 zone_(gen->zone()) {}
262
Generate()263 void Generate() final {
264 __ AllocateStackSpace(kDoubleSize);
265 __ movsd(MemOperand(esp, 0), input_);
266 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
267 // A direct call to a wasm runtime stub defined in this module.
268 // Just encode the stub index. This will be patched when the code
269 // is added to the native module and copied into wasm code space.
270 __ wasm_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
271 } else if (tasm()->options().inline_offheap_trampolines) {
272 __ CallBuiltin(Builtins::kDoubleToI);
273 } else {
274 __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
275 }
276 __ mov(result_, MemOperand(esp, 0));
277 __ add(esp, Immediate(kDoubleSize));
278 }
279
280 private:
281 Register const result_;
282 XMMRegister const input_;
283 StubCallMode stub_mode_;
284 Isolate* isolate_;
285 Zone* zone_;
286 };
287
288 class OutOfLineRecordWrite final : public OutOfLineCode {
289 public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand operand,Register value,Register scratch0,Register scratch1,RecordWriteMode mode,StubCallMode stub_mode)290 OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
291 Register value, Register scratch0, Register scratch1,
292 RecordWriteMode mode, StubCallMode stub_mode)
293 : OutOfLineCode(gen),
294 object_(object),
295 operand_(operand),
296 value_(value),
297 scratch0_(scratch0),
298 scratch1_(scratch1),
299 mode_(mode),
300 stub_mode_(stub_mode),
301 zone_(gen->zone()) {}
302
Generate()303 void Generate() final {
304 if (mode_ > RecordWriteMode::kValueIsPointer) {
305 __ JumpIfSmi(value_, exit());
306 }
307 __ CheckPageFlag(value_, scratch0_,
308 MemoryChunk::kPointersToHereAreInterestingMask, zero,
309 exit());
310 __ lea(scratch1_, operand_);
311 RememberedSetAction const remembered_set_action =
312 mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
313 : OMIT_REMEMBERED_SET;
314 SaveFPRegsMode const save_fp_mode =
315 frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
316 if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
317 __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
318 } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
319 // A direct call to a wasm runtime stub defined in this module.
320 // Just encode the stub index. This will be patched when the code
321 // is added to the native module and copied into wasm code space.
322 __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
323 save_fp_mode, wasm::WasmCode::kRecordWrite);
324 } else {
325 __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
326 save_fp_mode);
327 }
328 }
329
330 private:
331 Register const object_;
332 Operand const operand_;
333 Register const value_;
334 Register const scratch0_;
335 Register const scratch1_;
336 RecordWriteMode const mode_;
337 StubCallMode const stub_mode_;
338 Zone* zone_;
339 };
340
341 } // namespace
342
343 #define ASSEMBLE_COMPARE(asm_instr) \
344 do { \
345 if (HasAddressingMode(instr)) { \
346 size_t index = 0; \
347 Operand left = i.MemoryOperand(&index); \
348 if (HasImmediateInput(instr, index)) { \
349 __ asm_instr(left, i.InputImmediate(index)); \
350 } else { \
351 __ asm_instr(left, i.InputRegister(index)); \
352 } \
353 } else { \
354 if (HasImmediateInput(instr, 1)) { \
355 if (HasRegisterInput(instr, 0)) { \
356 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
357 } else { \
358 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
359 } \
360 } else { \
361 if (HasRegisterInput(instr, 1)) { \
362 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
363 } else { \
364 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
365 } \
366 } \
367 } \
368 } while (0)
369
370 #define ASSEMBLE_IEEE754_BINOP(name) \
371 do { \
372 /* Pass two doubles as arguments on the stack. */ \
373 __ PrepareCallCFunction(4, eax); \
374 __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \
375 __ movsd(Operand(esp, 1 * kDoubleSize), i.InputDoubleRegister(1)); \
376 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 4); \
377 /* Return value is in st(0) on ia32. */ \
378 /* Store it into the result register. */ \
379 __ AllocateStackSpace(kDoubleSize); \
380 __ fstp_d(Operand(esp, 0)); \
381 __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \
382 __ add(esp, Immediate(kDoubleSize)); \
383 } while (false)
384
385 #define ASSEMBLE_IEEE754_UNOP(name) \
386 do { \
387 /* Pass one double as argument on the stack. */ \
388 __ PrepareCallCFunction(2, eax); \
389 __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \
390 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
391 /* Return value is in st(0) on ia32. */ \
392 /* Store it into the result register. */ \
393 __ AllocateStackSpace(kDoubleSize); \
394 __ fstp_d(Operand(esp, 0)); \
395 __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \
396 __ add(esp, Immediate(kDoubleSize)); \
397 } while (false)
398
399 #define ASSEMBLE_BINOP(asm_instr) \
400 do { \
401 if (HasAddressingMode(instr)) { \
402 size_t index = 1; \
403 Operand right = i.MemoryOperand(&index); \
404 __ asm_instr(i.InputRegister(0), right); \
405 } else { \
406 if (HasImmediateInput(instr, 1)) { \
407 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
408 } else { \
409 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
410 } \
411 } \
412 } while (0)
413
414 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
415 do { \
416 Label binop; \
417 __ bind(&binop); \
418 __ mov_inst(eax, i.MemoryOperand(1)); \
419 __ Move(i.TempRegister(0), eax); \
420 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
421 __ lock(); \
422 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
423 __ j(not_equal, &binop); \
424 } while (false)
425
426 #define ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \
427 do { \
428 Label binop; \
429 __ bind(&binop); \
430 __ mov(eax, i.MemoryOperand(2)); \
431 __ mov(edx, i.NextMemoryOperand(2)); \
432 __ push(ebx); \
433 frame_access_state()->IncreaseSPDelta(1); \
434 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0)); \
435 __ push(i.InputRegister(1)); \
436 __ instr1(ebx, eax); \
437 __ instr2(i.InputRegister(1), edx); \
438 __ lock(); \
439 __ cmpxchg8b(i.MemoryOperand(2)); \
440 __ pop(i.InputRegister(1)); \
441 __ pop(ebx); \
442 frame_access_state()->IncreaseSPDelta(-1); \
443 __ j(not_equal, &binop); \
444 } while (false);
445
446 #define ASSEMBLE_MOVX(mov_instr) \
447 do { \
448 if (HasAddressingMode(instr)) { \
449 __ mov_instr(i.OutputRegister(), i.MemoryOperand()); \
450 } else if (HasRegisterInput(instr, 0)) { \
451 __ mov_instr(i.OutputRegister(), i.InputRegister(0)); \
452 } else { \
453 __ mov_instr(i.OutputRegister(), i.InputOperand(0)); \
454 } \
455 } while (0)
456
457 #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
458 do { \
459 XMMRegister src0 = i.InputSimd128Register(0); \
460 Operand src1 = i.InputOperand(instr->InputCount() == 2 ? 1 : 0); \
461 if (CpuFeatures::IsSupported(AVX)) { \
462 CpuFeatureScope avx_scope(tasm(), AVX); \
463 __ v##opcode(i.OutputSimd128Register(), src0, src1); \
464 } else { \
465 DCHECK_EQ(i.OutputSimd128Register(), src0); \
466 __ opcode(i.OutputSimd128Register(), src1); \
467 } \
468 } while (false)
469
470 #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \
471 if (CpuFeatures::IsSupported(AVX)) { \
472 CpuFeatureScope avx_scope(tasm(), AVX); \
473 __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
474 i.InputOperand(1), imm); \
475 } else { \
476 CpuFeatureScope sse_scope(tasm(), SSELevel); \
477 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
478 __ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm); \
479 }
480
481 #define ASSEMBLE_SIMD_ALL_TRUE(opcode) \
482 do { \
483 Register dst = i.OutputRegister(); \
484 Operand src = i.InputOperand(0); \
485 Register tmp = i.TempRegister(0); \
486 XMMRegister tmp_simd = i.TempSimd128Register(1); \
487 __ mov(tmp, Immediate(1)); \
488 __ xor_(dst, dst); \
489 __ Pxor(tmp_simd, tmp_simd); \
490 __ opcode(tmp_simd, src); \
491 __ Ptest(tmp_simd, tmp_simd); \
492 __ cmov(zero, dst, tmp); \
493 } while (false)
494
495 #define ASSEMBLE_SIMD_SHIFT(opcode, width) \
496 do { \
497 XMMRegister dst = i.OutputSimd128Register(); \
498 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
499 if (HasImmediateInput(instr, 1)) { \
500 __ opcode(dst, dst, byte{i.InputInt##width(1)}); \
501 } else { \
502 XMMRegister tmp = i.TempSimd128Register(0); \
503 Register tmp_shift = i.TempRegister(1); \
504 constexpr int mask = (1 << width) - 1; \
505 __ mov(tmp_shift, i.InputRegister(1)); \
506 __ and_(tmp_shift, Immediate(mask)); \
507 __ Movd(tmp, tmp_shift); \
508 __ opcode(dst, dst, tmp); \
509 } \
510 } while (false)
511
AssembleDeconstructFrame()512 void CodeGenerator::AssembleDeconstructFrame() {
513 __ mov(esp, ebp);
514 __ pop(ebp);
515 }
516
AssemblePrepareTailCall()517 void CodeGenerator::AssemblePrepareTailCall() {
518 if (frame_access_state()->has_frame()) {
519 __ mov(ebp, MemOperand(ebp, 0));
520 }
521 frame_access_state()->SetFrameAccessToSP();
522 }
523
AssemblePopArgumentsAdaptorFrame(Register args_reg,Register,Register,Register)524 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
525 Register, Register,
526 Register) {
527 // There are not enough temp registers left on ia32 for a call instruction
528 // so we pick some scratch registers and save/restore them manually here.
529 int scratch_count = 3;
530 Register scratch1 = esi;
531 Register scratch2 = ecx;
532 Register scratch3 = edx;
533 DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
534 Label done;
535
536 // Check if current frame is an arguments adaptor frame.
537 __ cmp(Operand(ebp, StandardFrameConstants::kContextOffset),
538 Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
539 __ j(not_equal, &done, Label::kNear);
540
541 __ push(scratch1);
542 __ push(scratch2);
543 __ push(scratch3);
544
545 // Load arguments count from current arguments adaptor frame (note, it
546 // does not include receiver).
547 Register caller_args_count_reg = scratch1;
548 __ mov(caller_args_count_reg,
549 Operand(ebp, ArgumentsAdaptorFrameConstants::kLengthOffset));
550 __ SmiUntag(caller_args_count_reg);
551
552 __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3,
553 scratch_count);
554 __ pop(scratch3);
555 __ pop(scratch2);
556 __ pop(scratch1);
557
558 __ bind(&done);
559 }
560
561 namespace {
562
AdjustStackPointerForTailCall(TurboAssembler * tasm,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)563 void AdjustStackPointerForTailCall(TurboAssembler* tasm,
564 FrameAccessState* state,
565 int new_slot_above_sp,
566 bool allow_shrinkage = true) {
567 int current_sp_offset = state->GetSPToFPSlotCount() +
568 StandardFrameConstants::kFixedSlotCountAboveFp;
569 int stack_slot_delta = new_slot_above_sp - current_sp_offset;
570 if (stack_slot_delta > 0) {
571 tasm->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
572 state->IncreaseSPDelta(stack_slot_delta);
573 } else if (allow_shrinkage && stack_slot_delta < 0) {
574 tasm->add(esp, Immediate(-stack_slot_delta * kSystemPointerSize));
575 state->IncreaseSPDelta(stack_slot_delta);
576 }
577 }
578
579 #ifdef DEBUG
VerifyOutputOfAtomicPairInstr(IA32OperandConverter * converter,const Instruction * instr)580 bool VerifyOutputOfAtomicPairInstr(IA32OperandConverter* converter,
581 const Instruction* instr) {
582 if (instr->OutputCount() == 2) {
583 return (converter->OutputRegister(0) == eax &&
584 converter->OutputRegister(1) == edx);
585 }
586 if (instr->OutputCount() == 1) {
587 return (converter->OutputRegister(0) == eax &&
588 converter->TempRegister(0) == edx) ||
589 (converter->OutputRegister(0) == edx &&
590 converter->TempRegister(0) == eax);
591 }
592 DCHECK_EQ(instr->OutputCount(), 0);
593 return (converter->TempRegister(0) == eax &&
594 converter->TempRegister(1) == edx);
595 }
596 #endif
597
598 } // namespace
599
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_stack_slot)600 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
601 int first_unused_stack_slot) {
602 CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
603 ZoneVector<MoveOperands*> pushes(zone());
604 GetPushCompatibleMoves(instr, flags, &pushes);
605
606 if (!pushes.empty() &&
607 (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
608 first_unused_stack_slot)) {
609 IA32OperandConverter g(this, instr);
610 for (auto move : pushes) {
611 LocationOperand destination_location(
612 LocationOperand::cast(move->destination()));
613 InstructionOperand source(move->source());
614 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
615 destination_location.index());
616 if (source.IsStackSlot()) {
617 LocationOperand source_location(LocationOperand::cast(source));
618 __ push(g.SlotToOperand(source_location.index()));
619 } else if (source.IsRegister()) {
620 LocationOperand source_location(LocationOperand::cast(source));
621 __ push(source_location.GetRegister());
622 } else if (source.IsImmediate()) {
623 __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
624 } else {
625 // Pushes of non-scalar data types is not supported.
626 UNIMPLEMENTED();
627 }
628 frame_access_state()->IncreaseSPDelta(1);
629 move->Eliminate();
630 }
631 }
632 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
633 first_unused_stack_slot, false);
634 }
635
AssembleTailCallAfterGap(Instruction * instr,int first_unused_stack_slot)636 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
637 int first_unused_stack_slot) {
638 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
639 first_unused_stack_slot);
640 }
641
642 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()643 void CodeGenerator::AssembleCodeStartRegisterCheck() {
644 __ push(eax); // Push eax so we can use it as a scratch register.
645 __ ComputeCodeStartAddress(eax);
646 __ cmp(eax, kJavaScriptCallCodeStartRegister);
647 __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
648 __ pop(eax); // Restore eax.
649 }
650
651 // Check if the code object is marked for deoptimization. If it is, then it
652 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
653 // to:
654 // 1. read from memory the word that contains that bit, which can be found in
655 // the flags in the referenced {CodeDataContainer} object;
656 // 2. test kMarkedForDeoptimizationBit in those flags; and
657 // 3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()658 void CodeGenerator::BailoutIfDeoptimized() {
659 int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
660 __ push(eax); // Push eax so we can use it as a scratch register.
661 __ mov(eax, Operand(kJavaScriptCallCodeStartRegister, offset));
662 __ test(FieldOperand(eax, CodeDataContainer::kKindSpecificFlagsOffset),
663 Immediate(1 << Code::kMarkedForDeoptimizationBit));
664 __ pop(eax); // Restore eax.
665
666 Label skip;
667 __ j(zero, &skip, Label::kNear);
668 __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
669 RelocInfo::CODE_TARGET);
670 __ bind(&skip);
671 }
672
GenerateSpeculationPoisonFromCodeStartRegister()673 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
674 // TODO(860429): Remove remaining poisoning infrastructure on ia32.
675 UNREACHABLE();
676 }
677
AssembleRegisterArgumentPoisoning()678 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
679 // TODO(860429): Remove remaining poisoning infrastructure on ia32.
680 UNREACHABLE();
681 }
682
683 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)684 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
685 Instruction* instr) {
686 IA32OperandConverter i(this, instr);
687 InstructionCode opcode = instr->opcode();
688 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
689 switch (arch_opcode) {
690 case kArchCallCodeObject: {
691 InstructionOperand* op = instr->InputAt(0);
692 if (op->IsImmediate()) {
693 Handle<Code> code = i.InputCode(0);
694 __ Call(code, RelocInfo::CODE_TARGET);
695 } else {
696 Register reg = i.InputRegister(0);
697 DCHECK_IMPLIES(
698 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
699 reg == kJavaScriptCallCodeStartRegister);
700 __ LoadCodeObjectEntry(reg, reg);
701 if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
702 __ RetpolineCall(reg);
703 } else {
704 __ call(reg);
705 }
706 }
707 RecordCallPosition(instr);
708 frame_access_state()->ClearSPDelta();
709 break;
710 }
711 case kArchCallBuiltinPointer: {
712 DCHECK(!HasImmediateInput(instr, 0));
713 Register builtin_index = i.InputRegister(0);
714 __ CallBuiltinByIndex(builtin_index);
715 RecordCallPosition(instr);
716 frame_access_state()->ClearSPDelta();
717 break;
718 }
719 case kArchCallWasmFunction: {
720 if (HasImmediateInput(instr, 0)) {
721 Constant constant = i.ToConstant(instr->InputAt(0));
722 Address wasm_code = static_cast<Address>(constant.ToInt32());
723 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
724 __ wasm_call(wasm_code, constant.rmode());
725 } else {
726 if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
727 __ RetpolineCall(wasm_code, constant.rmode());
728 } else {
729 __ call(wasm_code, constant.rmode());
730 }
731 }
732 } else {
733 Register reg = i.InputRegister(0);
734 if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
735 __ RetpolineCall(reg);
736 } else {
737 __ call(reg);
738 }
739 }
740 RecordCallPosition(instr);
741 frame_access_state()->ClearSPDelta();
742 break;
743 }
744 case kArchTailCallCodeObjectFromJSFunction:
745 case kArchTailCallCodeObject: {
746 if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
747 AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
748 no_reg, no_reg, no_reg);
749 }
750 if (HasImmediateInput(instr, 0)) {
751 Handle<Code> code = i.InputCode(0);
752 __ Jump(code, RelocInfo::CODE_TARGET);
753 } else {
754 Register reg = i.InputRegister(0);
755 DCHECK_IMPLIES(
756 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
757 reg == kJavaScriptCallCodeStartRegister);
758 __ LoadCodeObjectEntry(reg, reg);
759 if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
760 __ RetpolineJump(reg);
761 } else {
762 __ jmp(reg);
763 }
764 }
765 frame_access_state()->ClearSPDelta();
766 frame_access_state()->SetFrameAccessToDefault();
767 break;
768 }
769 case kArchTailCallWasm: {
770 if (HasImmediateInput(instr, 0)) {
771 Constant constant = i.ToConstant(instr->InputAt(0));
772 Address wasm_code = static_cast<Address>(constant.ToInt32());
773 __ jmp(wasm_code, constant.rmode());
774 } else {
775 Register reg = i.InputRegister(0);
776 if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
777 __ RetpolineJump(reg);
778 } else {
779 __ jmp(reg);
780 }
781 }
782 frame_access_state()->ClearSPDelta();
783 frame_access_state()->SetFrameAccessToDefault();
784 break;
785 }
786 case kArchTailCallAddress: {
787 CHECK(!HasImmediateInput(instr, 0));
788 Register reg = i.InputRegister(0);
789 DCHECK_IMPLIES(
790 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
791 reg == kJavaScriptCallCodeStartRegister);
792 if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
793 __ RetpolineJump(reg);
794 } else {
795 __ jmp(reg);
796 }
797 frame_access_state()->ClearSPDelta();
798 frame_access_state()->SetFrameAccessToDefault();
799 break;
800 }
801 case kArchCallJSFunction: {
802 Register func = i.InputRegister(0);
803 if (FLAG_debug_code) {
804 // Check the function's context matches the context argument.
805 __ cmp(esi, FieldOperand(func, JSFunction::kContextOffset));
806 __ Assert(equal, AbortReason::kWrongFunctionContext);
807 }
808 static_assert(kJavaScriptCallCodeStartRegister == ecx, "ABI mismatch");
809 __ mov(ecx, FieldOperand(func, JSFunction::kCodeOffset));
810 __ CallCodeObject(ecx);
811 RecordCallPosition(instr);
812 frame_access_state()->ClearSPDelta();
813 break;
814 }
815 case kArchPrepareCallCFunction: {
816 // Frame alignment requires using FP-relative frame addressing.
817 frame_access_state()->SetFrameAccessToFP();
818 int const num_parameters = MiscField::decode(instr->opcode());
819 __ PrepareCallCFunction(num_parameters, i.TempRegister(0));
820 break;
821 }
822 case kArchSaveCallerRegisters: {
823 fp_mode_ =
824 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
825 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
826 // kReturnRegister0 should have been saved before entering the stub.
827 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
828 DCHECK(IsAligned(bytes, kSystemPointerSize));
829 DCHECK_EQ(0, frame_access_state()->sp_delta());
830 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
831 DCHECK(!caller_registers_saved_);
832 caller_registers_saved_ = true;
833 break;
834 }
835 case kArchRestoreCallerRegisters: {
836 DCHECK(fp_mode_ ==
837 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
838 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
839 // Don't overwrite the returned value.
840 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
841 frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
842 DCHECK_EQ(0, frame_access_state()->sp_delta());
843 DCHECK(caller_registers_saved_);
844 caller_registers_saved_ = false;
845 break;
846 }
847 case kArchPrepareTailCall:
848 AssemblePrepareTailCall();
849 break;
850 case kArchCallCFunction: {
851 int const num_parameters = MiscField::decode(instr->opcode());
852 Label return_location;
853 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
854 // Put the return address in a stack slot.
855 Register scratch = eax;
856 __ push(scratch);
857 __ PushPC();
858 int pc = __ pc_offset();
859 __ pop(scratch);
860 __ sub(scratch, Immediate(pc + Code::kHeaderSize - kHeapObjectTag));
861 __ add(scratch, Immediate::CodeRelativeOffset(&return_location));
862 __ mov(MemOperand(ebp, WasmExitFrameConstants::kCallingPCOffset),
863 scratch);
864 __ pop(scratch);
865 }
866 if (HasImmediateInput(instr, 0)) {
867 ExternalReference ref = i.InputExternalReference(0);
868 __ CallCFunction(ref, num_parameters);
869 } else {
870 Register func = i.InputRegister(0);
871 __ CallCFunction(func, num_parameters);
872 }
873 __ bind(&return_location);
874 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
875 RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
876 }
877 frame_access_state()->SetFrameAccessToDefault();
878 // Ideally, we should decrement SP delta to match the change of stack
879 // pointer in CallCFunction. However, for certain architectures (e.g.
880 // ARM), there may be more strict alignment requirement, causing old SP
881 // to be saved on the stack. In those cases, we can not calculate the SP
882 // delta statically.
883 frame_access_state()->ClearSPDelta();
884 if (caller_registers_saved_) {
885 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
886 // Here, we assume the sequence to be:
887 // kArchSaveCallerRegisters;
888 // kArchCallCFunction;
889 // kArchRestoreCallerRegisters;
890 int bytes =
891 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
892 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
893 }
894 break;
895 }
896 case kArchJmp:
897 AssembleArchJump(i.InputRpo(0));
898 break;
899 case kArchBinarySearchSwitch:
900 AssembleArchBinarySearchSwitch(instr);
901 break;
902 case kArchTableSwitch:
903 AssembleArchTableSwitch(instr);
904 break;
905 case kArchComment:
906 __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
907 break;
908 case kArchAbortCSAAssert:
909 DCHECK(i.InputRegister(0) == edx);
910 {
911 // We don't actually want to generate a pile of code for this, so just
912 // claim there is a stack frame, without generating one.
913 FrameScope scope(tasm(), StackFrame::NONE);
914 __ Call(
915 isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
916 RelocInfo::CODE_TARGET);
917 }
918 __ int3();
919 break;
920 case kArchDebugBreak:
921 __ DebugBreak();
922 break;
923 case kArchNop:
924 case kArchThrowTerminator:
925 // don't emit code for nops.
926 break;
927 case kArchDeoptimize: {
928 DeoptimizationExit* exit =
929 BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
930 __ jmp(exit->label());
931 break;
932 }
933 case kArchRet:
934 AssembleReturn(instr->InputAt(0));
935 break;
936 case kArchFramePointer:
937 __ mov(i.OutputRegister(), ebp);
938 break;
939 case kArchParentFramePointer:
940 if (frame_access_state()->has_frame()) {
941 __ mov(i.OutputRegister(), Operand(ebp, 0));
942 } else {
943 __ mov(i.OutputRegister(), ebp);
944 }
945 break;
946 case kArchStackPointerGreaterThan: {
947 // Potentially apply an offset to the current stack pointer before the
948 // comparison to consider the size difference of an optimized frame versus
949 // the contained unoptimized frames.
950 Register lhs_register = esp;
951 uint32_t offset;
952
953 if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
954 lhs_register = i.TempRegister(0);
955 __ lea(lhs_register, Operand(esp, -1 * static_cast<int32_t>(offset)));
956 }
957
958 constexpr size_t kValueIndex = 0;
959 if (HasAddressingMode(instr)) {
960 __ cmp(lhs_register, i.MemoryOperand(kValueIndex));
961 } else {
962 __ cmp(lhs_register, i.InputRegister(kValueIndex));
963 }
964 break;
965 }
966 case kArchStackCheckOffset:
967 __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
968 break;
969 case kArchTruncateDoubleToI: {
970 auto result = i.OutputRegister();
971 auto input = i.InputDoubleRegister(0);
972 auto ool = zone()->New<OutOfLineTruncateDoubleToI>(
973 this, result, input, DetermineStubCallMode());
974 __ cvttsd2si(result, Operand(input));
975 __ cmp(result, 1);
976 __ j(overflow, ool->entry());
977 __ bind(ool->exit());
978 break;
979 }
980 case kArchStoreWithWriteBarrier: {
981 RecordWriteMode mode =
982 static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
983 Register object = i.InputRegister(0);
984 size_t index = 0;
985 Operand operand = i.MemoryOperand(&index);
986 Register value = i.InputRegister(index);
987 Register scratch0 = i.TempRegister(0);
988 Register scratch1 = i.TempRegister(1);
989 auto ool = zone()->New<OutOfLineRecordWrite>(this, object, operand, value,
990 scratch0, scratch1, mode,
991 DetermineStubCallMode());
992 __ mov(operand, value);
993 __ CheckPageFlag(object, scratch0,
994 MemoryChunk::kPointersFromHereAreInterestingMask,
995 not_zero, ool->entry());
996 __ bind(ool->exit());
997 break;
998 }
999 case kArchStackSlot: {
1000 FrameOffset offset =
1001 frame_access_state()->GetFrameOffset(i.InputInt32(0));
1002 Register base = offset.from_stack_pointer() ? esp : ebp;
1003 __ lea(i.OutputRegister(), Operand(base, offset.offset()));
1004 break;
1005 }
1006 case kIeee754Float64Acos:
1007 ASSEMBLE_IEEE754_UNOP(acos);
1008 break;
1009 case kIeee754Float64Acosh:
1010 ASSEMBLE_IEEE754_UNOP(acosh);
1011 break;
1012 case kIeee754Float64Asin:
1013 ASSEMBLE_IEEE754_UNOP(asin);
1014 break;
1015 case kIeee754Float64Asinh:
1016 ASSEMBLE_IEEE754_UNOP(asinh);
1017 break;
1018 case kIeee754Float64Atan:
1019 ASSEMBLE_IEEE754_UNOP(atan);
1020 break;
1021 case kIeee754Float64Atanh:
1022 ASSEMBLE_IEEE754_UNOP(atanh);
1023 break;
1024 case kIeee754Float64Atan2:
1025 ASSEMBLE_IEEE754_BINOP(atan2);
1026 break;
1027 case kIeee754Float64Cbrt:
1028 ASSEMBLE_IEEE754_UNOP(cbrt);
1029 break;
1030 case kIeee754Float64Cos:
1031 ASSEMBLE_IEEE754_UNOP(cos);
1032 break;
1033 case kIeee754Float64Cosh:
1034 ASSEMBLE_IEEE754_UNOP(cosh);
1035 break;
1036 case kIeee754Float64Expm1:
1037 ASSEMBLE_IEEE754_UNOP(expm1);
1038 break;
1039 case kIeee754Float64Exp:
1040 ASSEMBLE_IEEE754_UNOP(exp);
1041 break;
1042 case kIeee754Float64Log:
1043 ASSEMBLE_IEEE754_UNOP(log);
1044 break;
1045 case kIeee754Float64Log1p:
1046 ASSEMBLE_IEEE754_UNOP(log1p);
1047 break;
1048 case kIeee754Float64Log2:
1049 ASSEMBLE_IEEE754_UNOP(log2);
1050 break;
1051 case kIeee754Float64Log10:
1052 ASSEMBLE_IEEE754_UNOP(log10);
1053 break;
1054 case kIeee754Float64Pow:
1055 ASSEMBLE_IEEE754_BINOP(pow);
1056 break;
1057 case kIeee754Float64Sin:
1058 ASSEMBLE_IEEE754_UNOP(sin);
1059 break;
1060 case kIeee754Float64Sinh:
1061 ASSEMBLE_IEEE754_UNOP(sinh);
1062 break;
1063 case kIeee754Float64Tan:
1064 ASSEMBLE_IEEE754_UNOP(tan);
1065 break;
1066 case kIeee754Float64Tanh:
1067 ASSEMBLE_IEEE754_UNOP(tanh);
1068 break;
1069 case kIA32Add:
1070 ASSEMBLE_BINOP(add);
1071 break;
1072 case kIA32And:
1073 ASSEMBLE_BINOP(and_);
1074 break;
1075 case kIA32Cmp:
1076 ASSEMBLE_COMPARE(cmp);
1077 break;
1078 case kIA32Cmp16:
1079 ASSEMBLE_COMPARE(cmpw);
1080 break;
1081 case kIA32Cmp8:
1082 ASSEMBLE_COMPARE(cmpb);
1083 break;
1084 case kIA32Test:
1085 ASSEMBLE_COMPARE(test);
1086 break;
1087 case kIA32Test16:
1088 ASSEMBLE_COMPARE(test_w);
1089 break;
1090 case kIA32Test8:
1091 ASSEMBLE_COMPARE(test_b);
1092 break;
1093 case kIA32Imul:
1094 if (HasImmediateInput(instr, 1)) {
1095 __ imul(i.OutputRegister(), i.InputOperand(0), i.InputInt32(1));
1096 } else {
1097 __ imul(i.OutputRegister(), i.InputOperand(1));
1098 }
1099 break;
1100 case kIA32ImulHigh:
1101 __ imul(i.InputRegister(1));
1102 break;
1103 case kIA32UmulHigh:
1104 __ mul(i.InputRegister(1));
1105 break;
1106 case kIA32Idiv:
1107 __ cdq();
1108 __ idiv(i.InputOperand(1));
1109 break;
1110 case kIA32Udiv:
1111 __ Move(edx, Immediate(0));
1112 __ div(i.InputOperand(1));
1113 break;
1114 case kIA32Not:
1115 __ not_(i.OutputOperand());
1116 break;
1117 case kIA32Neg:
1118 __ neg(i.OutputOperand());
1119 break;
1120 case kIA32Or:
1121 ASSEMBLE_BINOP(or_);
1122 break;
1123 case kIA32Xor:
1124 ASSEMBLE_BINOP(xor_);
1125 break;
1126 case kIA32Sub:
1127 ASSEMBLE_BINOP(sub);
1128 break;
1129 case kIA32Shl:
1130 if (HasImmediateInput(instr, 1)) {
1131 __ shl(i.OutputOperand(), i.InputInt5(1));
1132 } else {
1133 __ shl_cl(i.OutputOperand());
1134 }
1135 break;
1136 case kIA32Shr:
1137 if (HasImmediateInput(instr, 1)) {
1138 __ shr(i.OutputOperand(), i.InputInt5(1));
1139 } else {
1140 __ shr_cl(i.OutputOperand());
1141 }
1142 break;
1143 case kIA32Sar:
1144 if (HasImmediateInput(instr, 1)) {
1145 __ sar(i.OutputOperand(), i.InputInt5(1));
1146 } else {
1147 __ sar_cl(i.OutputOperand());
1148 }
1149 break;
1150 case kIA32AddPair: {
1151 // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
1152 // i.InputRegister(1) ... left high word.
1153 // i.InputRegister(2) ... right low word.
1154 // i.InputRegister(3) ... right high word.
1155 bool use_temp = false;
1156 if ((HasRegisterInput(instr, 1) &&
1157 i.OutputRegister(0).code() == i.InputRegister(1).code()) ||
1158 i.OutputRegister(0).code() == i.InputRegister(3).code()) {
1159 // We cannot write to the output register directly, because it would
1160 // overwrite an input for adc. We have to use the temp register.
1161 use_temp = true;
1162 __ Move(i.TempRegister(0), i.InputRegister(0));
1163 __ add(i.TempRegister(0), i.InputRegister(2));
1164 } else {
1165 __ add(i.OutputRegister(0), i.InputRegister(2));
1166 }
1167 i.MoveInstructionOperandToRegister(i.OutputRegister(1),
1168 instr->InputAt(1));
1169 __ adc(i.OutputRegister(1), Operand(i.InputRegister(3)));
1170 if (use_temp) {
1171 __ Move(i.OutputRegister(0), i.TempRegister(0));
1172 }
1173 break;
1174 }
1175 case kIA32SubPair: {
1176 // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
1177 // i.InputRegister(1) ... left high word.
1178 // i.InputRegister(2) ... right low word.
1179 // i.InputRegister(3) ... right high word.
1180 bool use_temp = false;
1181 if ((HasRegisterInput(instr, 1) &&
1182 i.OutputRegister(0).code() == i.InputRegister(1).code()) ||
1183 i.OutputRegister(0).code() == i.InputRegister(3).code()) {
1184 // We cannot write to the output register directly, because it would
1185 // overwrite an input for adc. We have to use the temp register.
1186 use_temp = true;
1187 __ Move(i.TempRegister(0), i.InputRegister(0));
1188 __ sub(i.TempRegister(0), i.InputRegister(2));
1189 } else {
1190 __ sub(i.OutputRegister(0), i.InputRegister(2));
1191 }
1192 i.MoveInstructionOperandToRegister(i.OutputRegister(1),
1193 instr->InputAt(1));
1194 __ sbb(i.OutputRegister(1), Operand(i.InputRegister(3)));
1195 if (use_temp) {
1196 __ Move(i.OutputRegister(0), i.TempRegister(0));
1197 }
1198 break;
1199 }
1200 case kIA32MulPair: {
1201 __ imul(i.OutputRegister(1), i.InputOperand(0));
1202 i.MoveInstructionOperandToRegister(i.TempRegister(0), instr->InputAt(1));
1203 __ imul(i.TempRegister(0), i.InputOperand(2));
1204 __ add(i.OutputRegister(1), i.TempRegister(0));
1205 __ mov(i.OutputRegister(0), i.InputOperand(0));
1206 // Multiplies the low words and stores them in eax and edx.
1207 __ mul(i.InputRegister(2));
1208 __ add(i.OutputRegister(1), i.TempRegister(0));
1209
1210 break;
1211 }
1212 case kIA32ShlPair:
1213 if (HasImmediateInput(instr, 2)) {
1214 __ ShlPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1215 } else {
1216 // Shift has been loaded into CL by the register allocator.
1217 __ ShlPair_cl(i.InputRegister(1), i.InputRegister(0));
1218 }
1219 break;
1220 case kIA32ShrPair:
1221 if (HasImmediateInput(instr, 2)) {
1222 __ ShrPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1223 } else {
1224 // Shift has been loaded into CL by the register allocator.
1225 __ ShrPair_cl(i.InputRegister(1), i.InputRegister(0));
1226 }
1227 break;
1228 case kIA32SarPair:
1229 if (HasImmediateInput(instr, 2)) {
1230 __ SarPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1231 } else {
1232 // Shift has been loaded into CL by the register allocator.
1233 __ SarPair_cl(i.InputRegister(1), i.InputRegister(0));
1234 }
1235 break;
1236 case kIA32Rol:
1237 if (HasImmediateInput(instr, 1)) {
1238 __ rol(i.OutputOperand(), i.InputInt5(1));
1239 } else {
1240 __ rol_cl(i.OutputOperand());
1241 }
1242 break;
1243 case kIA32Ror:
1244 if (HasImmediateInput(instr, 1)) {
1245 __ ror(i.OutputOperand(), i.InputInt5(1));
1246 } else {
1247 __ ror_cl(i.OutputOperand());
1248 }
1249 break;
1250 case kIA32Lzcnt:
1251 __ Lzcnt(i.OutputRegister(), i.InputOperand(0));
1252 break;
1253 case kIA32Tzcnt:
1254 __ Tzcnt(i.OutputRegister(), i.InputOperand(0));
1255 break;
1256 case kIA32Popcnt:
1257 __ Popcnt(i.OutputRegister(), i.InputOperand(0));
1258 break;
1259 case kIA32Bswap:
1260 __ bswap(i.OutputRegister());
1261 break;
1262 case kArchWordPoisonOnSpeculation:
1263 // TODO(860429): Remove remaining poisoning infrastructure on ia32.
1264 UNREACHABLE();
1265 case kIA32MFence:
1266 __ mfence();
1267 break;
1268 case kIA32LFence:
1269 __ lfence();
1270 break;
1271 case kSSEFloat32Cmp:
1272 __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1273 break;
1274 case kSSEFloat32Add:
1275 __ addss(i.InputDoubleRegister(0), i.InputOperand(1));
1276 break;
1277 case kSSEFloat32Sub:
1278 __ subss(i.InputDoubleRegister(0), i.InputOperand(1));
1279 break;
1280 case kSSEFloat32Mul:
1281 __ mulss(i.InputDoubleRegister(0), i.InputOperand(1));
1282 break;
1283 case kSSEFloat32Div:
1284 __ divss(i.InputDoubleRegister(0), i.InputOperand(1));
1285 // Don't delete this mov. It may improve performance on some CPUs,
1286 // when there is a (v)mulss depending on the result.
1287 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1288 break;
1289 case kSSEFloat32Sqrt:
1290 __ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
1291 break;
1292 case kSSEFloat32Abs: {
1293 // TODO(bmeurer): Use 128-bit constants.
1294 XMMRegister tmp = i.TempSimd128Register(0);
1295 __ pcmpeqd(tmp, tmp);
1296 __ psrlq(tmp, 33);
1297 __ andps(i.OutputDoubleRegister(), tmp);
1298 break;
1299 }
1300 case kSSEFloat32Neg: {
1301 // TODO(bmeurer): Use 128-bit constants.
1302 XMMRegister tmp = i.TempSimd128Register(0);
1303 __ pcmpeqd(tmp, tmp);
1304 __ psllq(tmp, 31);
1305 __ xorps(i.OutputDoubleRegister(), tmp);
1306 break;
1307 }
1308 case kSSEFloat32Round: {
1309 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1310 RoundingMode const mode =
1311 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1312 __ roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1313 break;
1314 }
1315 case kSSEFloat64Cmp:
1316 __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1317 break;
1318 case kSSEFloat64Add:
1319 __ addsd(i.InputDoubleRegister(0), i.InputOperand(1));
1320 break;
1321 case kSSEFloat64Sub:
1322 __ subsd(i.InputDoubleRegister(0), i.InputOperand(1));
1323 break;
1324 case kSSEFloat64Mul:
1325 __ mulsd(i.InputDoubleRegister(0), i.InputOperand(1));
1326 break;
1327 case kSSEFloat64Div:
1328 __ divsd(i.InputDoubleRegister(0), i.InputOperand(1));
1329 // Don't delete this mov. It may improve performance on some CPUs,
1330 // when there is a (v)mulsd depending on the result.
1331 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1332 break;
1333 case kSSEFloat32Max: {
1334 Label compare_swap, done_compare;
1335 if (instr->InputAt(1)->IsFPRegister()) {
1336 __ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1337 } else {
1338 __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1339 }
1340 auto ool =
1341 zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1342 __ j(parity_even, ool->entry());
1343 __ j(above, &done_compare, Label::kNear);
1344 __ j(below, &compare_swap, Label::kNear);
1345 __ movmskps(i.TempRegister(0), i.InputDoubleRegister(0));
1346 __ test(i.TempRegister(0), Immediate(1));
1347 __ j(zero, &done_compare, Label::kNear);
1348 __ bind(&compare_swap);
1349 if (instr->InputAt(1)->IsFPRegister()) {
1350 __ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1351 } else {
1352 __ movss(i.InputDoubleRegister(0), i.InputOperand(1));
1353 }
1354 __ bind(&done_compare);
1355 __ bind(ool->exit());
1356 break;
1357 }
1358
1359 case kSSEFloat64Max: {
1360 Label compare_swap, done_compare;
1361 if (instr->InputAt(1)->IsFPRegister()) {
1362 __ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1363 } else {
1364 __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1365 }
1366 auto ool =
1367 zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1368 __ j(parity_even, ool->entry());
1369 __ j(above, &done_compare, Label::kNear);
1370 __ j(below, &compare_swap, Label::kNear);
1371 __ movmskpd(i.TempRegister(0), i.InputDoubleRegister(0));
1372 __ test(i.TempRegister(0), Immediate(1));
1373 __ j(zero, &done_compare, Label::kNear);
1374 __ bind(&compare_swap);
1375 if (instr->InputAt(1)->IsFPRegister()) {
1376 __ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1377 } else {
1378 __ movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1379 }
1380 __ bind(&done_compare);
1381 __ bind(ool->exit());
1382 break;
1383 }
1384 case kSSEFloat32Min: {
1385 Label compare_swap, done_compare;
1386 if (instr->InputAt(1)->IsFPRegister()) {
1387 __ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1388 } else {
1389 __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1390 }
1391 auto ool =
1392 zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1393 __ j(parity_even, ool->entry());
1394 __ j(below, &done_compare, Label::kNear);
1395 __ j(above, &compare_swap, Label::kNear);
1396 if (instr->InputAt(1)->IsFPRegister()) {
1397 __ movmskps(i.TempRegister(0), i.InputDoubleRegister(1));
1398 } else {
1399 __ movss(kScratchDoubleReg, i.InputOperand(1));
1400 __ movmskps(i.TempRegister(0), kScratchDoubleReg);
1401 }
1402 __ test(i.TempRegister(0), Immediate(1));
1403 __ j(zero, &done_compare, Label::kNear);
1404 __ bind(&compare_swap);
1405 if (instr->InputAt(1)->IsFPRegister()) {
1406 __ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1407 } else {
1408 __ movss(i.InputDoubleRegister(0), i.InputOperand(1));
1409 }
1410 __ bind(&done_compare);
1411 __ bind(ool->exit());
1412 break;
1413 }
1414 case kSSEFloat64Min: {
1415 Label compare_swap, done_compare;
1416 if (instr->InputAt(1)->IsFPRegister()) {
1417 __ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1418 } else {
1419 __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1420 }
1421 auto ool =
1422 zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1423 __ j(parity_even, ool->entry());
1424 __ j(below, &done_compare, Label::kNear);
1425 __ j(above, &compare_swap, Label::kNear);
1426 if (instr->InputAt(1)->IsFPRegister()) {
1427 __ movmskpd(i.TempRegister(0), i.InputDoubleRegister(1));
1428 } else {
1429 __ movsd(kScratchDoubleReg, i.InputOperand(1));
1430 __ movmskpd(i.TempRegister(0), kScratchDoubleReg);
1431 }
1432 __ test(i.TempRegister(0), Immediate(1));
1433 __ j(zero, &done_compare, Label::kNear);
1434 __ bind(&compare_swap);
1435 if (instr->InputAt(1)->IsFPRegister()) {
1436 __ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1437 } else {
1438 __ movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1439 }
1440 __ bind(&done_compare);
1441 __ bind(ool->exit());
1442 break;
1443 }
1444 case kSSEFloat64Mod: {
1445 Register tmp = i.TempRegister(1);
1446 __ mov(tmp, esp);
1447 __ AllocateStackSpace(kDoubleSize);
1448 __ and_(esp, -8); // align to 8 byte boundary.
1449 // Move values to st(0) and st(1).
1450 __ movsd(Operand(esp, 0), i.InputDoubleRegister(1));
1451 __ fld_d(Operand(esp, 0));
1452 __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
1453 __ fld_d(Operand(esp, 0));
1454 // Loop while fprem isn't done.
1455 Label mod_loop;
1456 __ bind(&mod_loop);
1457 // This instruction traps on all kinds of inputs, but we are assuming the
1458 // floating point control word is set to ignore them all.
1459 __ fprem();
1460 // fnstsw_ax clobbers eax.
1461 DCHECK_EQ(eax, i.TempRegister(0));
1462 __ fnstsw_ax();
1463 __ sahf();
1464 __ j(parity_even, &mod_loop);
1465 // Move output to stack and clean up.
1466 __ fstp(1);
1467 __ fstp_d(Operand(esp, 0));
1468 __ movsd(i.OutputDoubleRegister(), Operand(esp, 0));
1469 __ mov(esp, tmp);
1470 break;
1471 }
1472 case kSSEFloat64Abs: {
1473 // TODO(bmeurer): Use 128-bit constants.
1474 XMMRegister tmp = i.TempSimd128Register(0);
1475 __ pcmpeqd(tmp, tmp);
1476 __ psrlq(tmp, 1);
1477 __ andpd(i.OutputDoubleRegister(), tmp);
1478 break;
1479 }
1480 case kSSEFloat64Neg: {
1481 // TODO(bmeurer): Use 128-bit constants.
1482 XMMRegister tmp = i.TempSimd128Register(0);
1483 __ pcmpeqd(tmp, tmp);
1484 __ psllq(tmp, 63);
1485 __ xorpd(i.OutputDoubleRegister(), tmp);
1486 break;
1487 }
1488 case kSSEFloat64Sqrt:
1489 __ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
1490 break;
1491 case kSSEFloat64Round: {
1492 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1493 RoundingMode const mode =
1494 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1495 __ roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1496 break;
1497 }
1498 case kSSEFloat32ToFloat64:
1499 __ cvtss2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1500 break;
1501 case kSSEFloat64ToFloat32:
1502 __ cvtsd2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1503 break;
1504 case kSSEFloat32ToInt32:
1505 __ cvttss2si(i.OutputRegister(), i.InputOperand(0));
1506 break;
1507 case kSSEFloat32ToUint32:
1508 __ Cvttss2ui(i.OutputRegister(), i.InputOperand(0),
1509 i.TempSimd128Register(0));
1510 break;
1511 case kSSEFloat64ToInt32:
1512 __ cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1513 break;
1514 case kSSEFloat64ToUint32:
1515 __ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0),
1516 i.TempSimd128Register(0));
1517 break;
1518 case kSSEInt32ToFloat32:
1519 __ cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1520 break;
1521 case kSSEUint32ToFloat32:
1522 __ Cvtui2ss(i.OutputDoubleRegister(), i.InputOperand(0),
1523 i.TempRegister(0));
1524 break;
1525 case kSSEInt32ToFloat64:
1526 __ cvtsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1527 break;
1528 case kSSEUint32ToFloat64:
1529 __ Cvtui2sd(i.OutputDoubleRegister(), i.InputOperand(0),
1530 i.TempRegister(0));
1531 break;
1532 case kSSEFloat64ExtractLowWord32:
1533 if (instr->InputAt(0)->IsFPStackSlot()) {
1534 __ mov(i.OutputRegister(), i.InputOperand(0));
1535 } else {
1536 __ movd(i.OutputRegister(), i.InputDoubleRegister(0));
1537 }
1538 break;
1539 case kSSEFloat64ExtractHighWord32:
1540 if (instr->InputAt(0)->IsFPStackSlot()) {
1541 __ mov(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1542 } else {
1543 __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1544 }
1545 break;
1546 case kSSEFloat64InsertLowWord32:
1547 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1548 break;
1549 case kSSEFloat64InsertHighWord32:
1550 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1551 break;
1552 case kSSEFloat64LoadLowWord32:
1553 __ movd(i.OutputDoubleRegister(), i.InputOperand(0));
1554 break;
1555 case kAVXFloat32Add: {
1556 CpuFeatureScope avx_scope(tasm(), AVX);
1557 __ vaddss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1558 i.InputOperand(1));
1559 break;
1560 }
1561 case kAVXFloat32Sub: {
1562 CpuFeatureScope avx_scope(tasm(), AVX);
1563 __ vsubss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1564 i.InputOperand(1));
1565 break;
1566 }
1567 case kAVXFloat32Mul: {
1568 CpuFeatureScope avx_scope(tasm(), AVX);
1569 __ vmulss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1570 i.InputOperand(1));
1571 break;
1572 }
1573 case kAVXFloat32Div: {
1574 CpuFeatureScope avx_scope(tasm(), AVX);
1575 __ vdivss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1576 i.InputOperand(1));
1577 // Don't delete this mov. It may improve performance on some CPUs,
1578 // when there is a (v)mulss depending on the result.
1579 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1580 break;
1581 }
1582 case kAVXFloat64Add: {
1583 CpuFeatureScope avx_scope(tasm(), AVX);
1584 __ vaddsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1585 i.InputOperand(1));
1586 break;
1587 }
1588 case kAVXFloat64Sub: {
1589 CpuFeatureScope avx_scope(tasm(), AVX);
1590 __ vsubsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1591 i.InputOperand(1));
1592 break;
1593 }
1594 case kAVXFloat64Mul: {
1595 CpuFeatureScope avx_scope(tasm(), AVX);
1596 __ vmulsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1597 i.InputOperand(1));
1598 break;
1599 }
1600 case kAVXFloat64Div: {
1601 CpuFeatureScope avx_scope(tasm(), AVX);
1602 __ vdivsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1603 i.InputOperand(1));
1604 // Don't delete this mov. It may improve performance on some CPUs,
1605 // when there is a (v)mulsd depending on the result.
1606 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1607 break;
1608 }
1609 case kAVXFloat32Abs: {
1610 // TODO(bmeurer): Use RIP relative 128-bit constants.
1611 XMMRegister tmp = i.TempSimd128Register(0);
1612 __ pcmpeqd(tmp, tmp);
1613 __ psrlq(tmp, 33);
1614 CpuFeatureScope avx_scope(tasm(), AVX);
1615 __ vandps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1616 break;
1617 }
1618 case kAVXFloat32Neg: {
1619 // TODO(bmeurer): Use RIP relative 128-bit constants.
1620 XMMRegister tmp = i.TempSimd128Register(0);
1621 __ pcmpeqd(tmp, tmp);
1622 __ psllq(tmp, 31);
1623 CpuFeatureScope avx_scope(tasm(), AVX);
1624 __ vxorps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1625 break;
1626 }
1627 case kAVXFloat64Abs: {
1628 // TODO(bmeurer): Use RIP relative 128-bit constants.
1629 XMMRegister tmp = i.TempSimd128Register(0);
1630 __ pcmpeqd(tmp, tmp);
1631 __ psrlq(tmp, 1);
1632 CpuFeatureScope avx_scope(tasm(), AVX);
1633 __ vandpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1634 break;
1635 }
1636 case kAVXFloat64Neg: {
1637 // TODO(bmeurer): Use RIP relative 128-bit constants.
1638 XMMRegister tmp = i.TempSimd128Register(0);
1639 __ pcmpeqd(tmp, tmp);
1640 __ psllq(tmp, 63);
1641 CpuFeatureScope avx_scope(tasm(), AVX);
1642 __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1643 break;
1644 }
1645 case kSSEFloat64SilenceNaN:
1646 __ xorpd(kScratchDoubleReg, kScratchDoubleReg);
1647 __ subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1648 break;
1649 case kIA32Movsxbl:
1650 ASSEMBLE_MOVX(movsx_b);
1651 break;
1652 case kIA32Movzxbl:
1653 ASSEMBLE_MOVX(movzx_b);
1654 break;
1655 case kIA32Movb: {
1656 size_t index = 0;
1657 Operand operand = i.MemoryOperand(&index);
1658 if (HasImmediateInput(instr, index)) {
1659 __ mov_b(operand, i.InputInt8(index));
1660 } else {
1661 __ mov_b(operand, i.InputRegister(index));
1662 }
1663 break;
1664 }
1665 case kIA32Movsxwl:
1666 ASSEMBLE_MOVX(movsx_w);
1667 break;
1668 case kIA32Movzxwl:
1669 ASSEMBLE_MOVX(movzx_w);
1670 break;
1671 case kIA32Movw: {
1672 size_t index = 0;
1673 Operand operand = i.MemoryOperand(&index);
1674 if (HasImmediateInput(instr, index)) {
1675 __ mov_w(operand, i.InputInt16(index));
1676 } else {
1677 __ mov_w(operand, i.InputRegister(index));
1678 }
1679 break;
1680 }
1681 case kIA32Movl:
1682 if (instr->HasOutput()) {
1683 __ mov(i.OutputRegister(), i.MemoryOperand());
1684 } else {
1685 size_t index = 0;
1686 Operand operand = i.MemoryOperand(&index);
1687 if (HasImmediateInput(instr, index)) {
1688 __ mov(operand, i.InputImmediate(index));
1689 } else {
1690 __ mov(operand, i.InputRegister(index));
1691 }
1692 }
1693 break;
1694 case kIA32Movsd:
1695 if (instr->HasOutput()) {
1696 __ movsd(i.OutputDoubleRegister(), i.MemoryOperand());
1697 } else {
1698 size_t index = 0;
1699 Operand operand = i.MemoryOperand(&index);
1700 __ movsd(operand, i.InputDoubleRegister(index));
1701 }
1702 break;
1703 case kIA32Movss:
1704 if (instr->HasOutput()) {
1705 __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
1706 } else {
1707 size_t index = 0;
1708 Operand operand = i.MemoryOperand(&index);
1709 __ movss(operand, i.InputDoubleRegister(index));
1710 }
1711 break;
1712 case kIA32Movdqu:
1713 if (instr->HasOutput()) {
1714 __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
1715 } else {
1716 size_t index = 0;
1717 Operand operand = i.MemoryOperand(&index);
1718 __ Movdqu(operand, i.InputSimd128Register(index));
1719 }
1720 break;
1721 case kIA32BitcastFI:
1722 if (instr->InputAt(0)->IsFPStackSlot()) {
1723 __ mov(i.OutputRegister(), i.InputOperand(0));
1724 } else {
1725 __ movd(i.OutputRegister(), i.InputDoubleRegister(0));
1726 }
1727 break;
1728 case kIA32BitcastIF:
1729 if (HasRegisterInput(instr, 0)) {
1730 __ movd(i.OutputDoubleRegister(), i.InputRegister(0));
1731 } else {
1732 __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
1733 }
1734 break;
1735 case kIA32Lea: {
1736 AddressingMode mode = AddressingModeField::decode(instr->opcode());
1737 // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
1738 // and addressing mode just happens to work out. The "addl"/"subl" forms
1739 // in these cases are faster based on measurements.
1740 if (mode == kMode_MI) {
1741 __ Move(i.OutputRegister(), Immediate(i.InputInt32(0)));
1742 } else if (i.InputRegister(0) == i.OutputRegister()) {
1743 if (mode == kMode_MRI) {
1744 int32_t constant_summand = i.InputInt32(1);
1745 if (constant_summand > 0) {
1746 __ add(i.OutputRegister(), Immediate(constant_summand));
1747 } else if (constant_summand < 0) {
1748 __ sub(i.OutputRegister(),
1749 Immediate(base::NegateWithWraparound(constant_summand)));
1750 }
1751 } else if (mode == kMode_MR1) {
1752 if (i.InputRegister(1) == i.OutputRegister()) {
1753 __ shl(i.OutputRegister(), 1);
1754 } else {
1755 __ add(i.OutputRegister(), i.InputRegister(1));
1756 }
1757 } else if (mode == kMode_M2) {
1758 __ shl(i.OutputRegister(), 1);
1759 } else if (mode == kMode_M4) {
1760 __ shl(i.OutputRegister(), 2);
1761 } else if (mode == kMode_M8) {
1762 __ shl(i.OutputRegister(), 3);
1763 } else {
1764 __ lea(i.OutputRegister(), i.MemoryOperand());
1765 }
1766 } else if (mode == kMode_MR1 &&
1767 i.InputRegister(1) == i.OutputRegister()) {
1768 __ add(i.OutputRegister(), i.InputRegister(0));
1769 } else {
1770 __ lea(i.OutputRegister(), i.MemoryOperand());
1771 }
1772 break;
1773 }
1774 case kIA32PushFloat32:
1775 if (instr->InputAt(0)->IsFPRegister()) {
1776 __ AllocateStackSpace(kFloatSize);
1777 __ movss(Operand(esp, 0), i.InputDoubleRegister(0));
1778 frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize);
1779 } else if (HasImmediateInput(instr, 0)) {
1780 __ Move(kScratchDoubleReg, i.InputFloat32(0));
1781 __ AllocateStackSpace(kFloatSize);
1782 __ movss(Operand(esp, 0), kScratchDoubleReg);
1783 frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize);
1784 } else {
1785 __ movss(kScratchDoubleReg, i.InputOperand(0));
1786 __ AllocateStackSpace(kFloatSize);
1787 __ movss(Operand(esp, 0), kScratchDoubleReg);
1788 frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize);
1789 }
1790 break;
1791 case kIA32PushFloat64:
1792 if (instr->InputAt(0)->IsFPRegister()) {
1793 __ AllocateStackSpace(kDoubleSize);
1794 __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
1795 frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
1796 } else if (HasImmediateInput(instr, 0)) {
1797 __ Move(kScratchDoubleReg, i.InputDouble(0));
1798 __ AllocateStackSpace(kDoubleSize);
1799 __ movsd(Operand(esp, 0), kScratchDoubleReg);
1800 frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
1801 } else {
1802 __ movsd(kScratchDoubleReg, i.InputOperand(0));
1803 __ AllocateStackSpace(kDoubleSize);
1804 __ movsd(Operand(esp, 0), kScratchDoubleReg);
1805 frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
1806 }
1807 break;
1808 case kIA32PushSimd128:
1809 if (instr->InputAt(0)->IsFPRegister()) {
1810 __ AllocateStackSpace(kSimd128Size);
1811 __ movups(Operand(esp, 0), i.InputSimd128Register(0));
1812 } else {
1813 __ movups(kScratchDoubleReg, i.InputOperand(0));
1814 __ AllocateStackSpace(kSimd128Size);
1815 __ movups(Operand(esp, 0), kScratchDoubleReg);
1816 }
1817 frame_access_state()->IncreaseSPDelta(kSimd128Size / kSystemPointerSize);
1818 break;
1819 case kIA32Push:
1820 if (HasAddressingMode(instr)) {
1821 size_t index = 0;
1822 Operand operand = i.MemoryOperand(&index);
1823 __ push(operand);
1824 frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize);
1825 } else if (instr->InputAt(0)->IsFPRegister()) {
1826 __ AllocateStackSpace(kFloatSize);
1827 __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
1828 frame_access_state()->IncreaseSPDelta(kFloatSize / kSystemPointerSize);
1829 } else if (HasImmediateInput(instr, 0)) {
1830 __ push(i.InputImmediate(0));
1831 frame_access_state()->IncreaseSPDelta(1);
1832 } else {
1833 __ push(i.InputOperand(0));
1834 frame_access_state()->IncreaseSPDelta(1);
1835 }
1836 break;
1837 case kIA32Poke: {
1838 int slot = MiscField::decode(instr->opcode());
1839 if (HasImmediateInput(instr, 0)) {
1840 __ mov(Operand(esp, slot * kSystemPointerSize), i.InputImmediate(0));
1841 } else {
1842 __ mov(Operand(esp, slot * kSystemPointerSize), i.InputRegister(0));
1843 }
1844 break;
1845 }
1846 case kIA32Peek: {
1847 int reverse_slot = i.InputInt32(0);
1848 int offset =
1849 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1850 if (instr->OutputAt(0)->IsFPRegister()) {
1851 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1852 if (op->representation() == MachineRepresentation::kFloat64) {
1853 __ movsd(i.OutputDoubleRegister(), Operand(ebp, offset));
1854 } else if (op->representation() == MachineRepresentation::kFloat32) {
1855 __ movss(i.OutputFloatRegister(), Operand(ebp, offset));
1856 } else {
1857 DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
1858 __ movdqu(i.OutputSimd128Register(), Operand(ebp, offset));
1859 }
1860 } else {
1861 __ mov(i.OutputRegister(), Operand(ebp, offset));
1862 }
1863 break;
1864 }
1865 case kSSEF64x2Splat: {
1866 DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1867 XMMRegister dst = i.OutputSimd128Register();
1868 __ shufpd(dst, dst, 0x0);
1869 break;
1870 }
1871 case kAVXF64x2Splat: {
1872 CpuFeatureScope avx_scope(tasm(), AVX);
1873 XMMRegister src = i.InputDoubleRegister(0);
1874 __ vshufpd(i.OutputSimd128Register(), src, src, 0x0);
1875 break;
1876 }
1877 case kSSEF64x2ExtractLane: {
1878 DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1879 XMMRegister dst = i.OutputDoubleRegister();
1880 int8_t lane = i.InputInt8(1);
1881 if (lane != 0) {
1882 DCHECK_LT(lane, 4);
1883 __ shufpd(dst, dst, lane);
1884 }
1885 break;
1886 }
1887 case kAVXF64x2ExtractLane: {
1888 CpuFeatureScope avx_scope(tasm(), AVX);
1889 XMMRegister dst = i.OutputDoubleRegister();
1890 XMMRegister src = i.InputSimd128Register(0);
1891 int8_t lane = i.InputInt8(1);
1892 if (lane == 0) {
1893 if (dst != src) __ vmovapd(dst, src);
1894 } else {
1895 DCHECK_LT(lane, 4);
1896 __ vshufpd(dst, src, src, lane);
1897 }
1898 break;
1899 }
1900 case kSSEF64x2ReplaceLane: {
1901 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1902 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1903 XMMRegister dst = i.OutputSimd128Register();
1904 int8_t lane = i.InputInt8(1);
1905 DoubleRegister rep = i.InputDoubleRegister(2);
1906
1907 // insertps takes a mask which contains (high to low):
1908 // - 2 bit specifying source float element to copy
1909 // - 2 bit specifying destination float element to write to
1910 // - 4 bits specifying which elements of the destination to zero
1911 DCHECK_LT(lane, 2);
1912 if (lane == 0) {
1913 __ insertps(dst, rep, 0b00000000);
1914 __ insertps(dst, rep, 0b01010000);
1915 } else {
1916 __ insertps(dst, rep, 0b00100000);
1917 __ insertps(dst, rep, 0b01110000);
1918 }
1919 break;
1920 }
1921 case kAVXF64x2ReplaceLane: {
1922 CpuFeatureScope avx_scope(tasm(), AVX);
1923 XMMRegister dst = i.OutputSimd128Register();
1924 XMMRegister src = i.InputSimd128Register(0);
1925 int8_t lane = i.InputInt8(1);
1926 DoubleRegister rep = i.InputDoubleRegister(2);
1927 DCHECK_NE(dst, rep);
1928
1929 DCHECK_LT(lane, 2);
1930 if (lane == 0) {
1931 __ vinsertps(dst, src, rep, 0b00000000);
1932 __ vinsertps(dst, dst, rep, 0b01010000);
1933 } else {
1934 __ vinsertps(dst, src, rep, 0b00100000);
1935 __ vinsertps(dst, dst, rep, 0b01110000);
1936 }
1937 break;
1938 }
1939 case kIA32F64x2Sqrt: {
1940 __ Sqrtpd(i.OutputSimd128Register(), i.InputOperand(0));
1941 break;
1942 }
1943 case kIA32F64x2Add: {
1944 __ Addpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1945 i.InputOperand(1));
1946 break;
1947 }
1948 case kIA32F64x2Sub: {
1949 __ Subpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1950 i.InputOperand(1));
1951 break;
1952 }
1953 case kIA32F64x2Mul: {
1954 __ Mulpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1955 i.InputOperand(1));
1956 break;
1957 }
1958 case kIA32F64x2Div: {
1959 __ Divpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1960 i.InputOperand(1));
1961 break;
1962 }
1963 case kIA32F64x2Min: {
1964 Operand src1 = i.InputOperand(1);
1965 XMMRegister dst = i.OutputSimd128Register(),
1966 src = i.InputSimd128Register(0),
1967 tmp = i.TempSimd128Register(0);
1968 // The minpd instruction doesn't propagate NaNs and +0's in its first
1969 // operand. Perform minpd in both orders, merge the resuls, and adjust.
1970 __ Movupd(tmp, src1);
1971 __ Minpd(tmp, tmp, src);
1972 __ Minpd(dst, src, src1);
1973 // propagate -0's and NaNs, which may be non-canonical.
1974 __ Orpd(tmp, dst);
1975 // Canonicalize NaNs by quieting and clearing the payload.
1976 __ Cmpunordpd(dst, dst, tmp);
1977 __ Orpd(tmp, dst);
1978 __ Psrlq(dst, 13);
1979 __ Andnpd(dst, tmp);
1980 break;
1981 }
1982 case kIA32F64x2Max: {
1983 Operand src1 = i.InputOperand(1);
1984 XMMRegister dst = i.OutputSimd128Register(),
1985 src = i.InputSimd128Register(0),
1986 tmp = i.TempSimd128Register(0);
1987 // The maxpd instruction doesn't propagate NaNs and +0's in its first
1988 // operand. Perform maxpd in both orders, merge the resuls, and adjust.
1989 __ Movupd(tmp, src1);
1990 __ Maxpd(tmp, tmp, src);
1991 __ Maxpd(dst, src, src1);
1992 // Find discrepancies.
1993 __ Xorpd(dst, tmp);
1994 // Propagate NaNs, which may be non-canonical.
1995 __ Orpd(tmp, dst);
1996 // Propagate sign discrepancy and (subtle) quiet NaNs.
1997 __ Subpd(tmp, tmp, dst);
1998 // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
1999 __ Cmpunordpd(dst, dst, tmp);
2000 __ Psrlq(dst, 13);
2001 __ Andnpd(dst, tmp);
2002 break;
2003 }
2004 case kIA32F64x2Eq: {
2005 __ Cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2006 i.InputOperand(1));
2007 break;
2008 }
2009 case kIA32F64x2Ne: {
2010 __ Cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2011 i.InputOperand(1));
2012 break;
2013 }
2014 case kIA32F64x2Lt: {
2015 __ Cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2016 i.InputOperand(1));
2017 break;
2018 }
2019 case kIA32F64x2Le: {
2020 __ Cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2021 i.InputOperand(1));
2022 break;
2023 }
2024 case kIA32F64x2Pmin: {
2025 XMMRegister dst = i.OutputSimd128Register();
2026 DCHECK_EQ(dst, i.InputSimd128Register(0));
2027 __ Minpd(dst, dst, i.InputSimd128Register(1));
2028 break;
2029 }
2030 case kIA32F64x2Pmax: {
2031 XMMRegister dst = i.OutputSimd128Register();
2032 DCHECK_EQ(dst, i.InputSimd128Register(0));
2033 __ Maxpd(dst, dst, i.InputSimd128Register(1));
2034 break;
2035 }
2036 case kIA32F64x2Round: {
2037 RoundingMode const mode =
2038 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
2039 __ Roundpd(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode);
2040 break;
2041 }
2042 case kIA32I64x2SplatI32Pair: {
2043 XMMRegister dst = i.OutputSimd128Register();
2044 __ Pinsrd(dst, i.InputRegister(0), 0);
2045 __ Pinsrd(dst, i.InputOperand(1), 1);
2046 __ Pshufd(dst, dst, 0x44);
2047 break;
2048 }
2049 case kIA32I64x2ReplaceLaneI32Pair: {
2050 int8_t lane = i.InputInt8(1);
2051 __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), lane * 2);
2052 __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(3), lane * 2 + 1);
2053 break;
2054 }
2055 case kIA32I64x2Neg: {
2056 XMMRegister dst = i.OutputSimd128Register();
2057 Operand src = i.InputOperand(0);
2058 __ Pxor(dst, dst);
2059 __ Psubq(dst, src);
2060 break;
2061 }
2062 case kIA32I64x2Shl: {
2063 ASSEMBLE_SIMD_SHIFT(Psllq, 6);
2064 break;
2065 }
2066 case kIA32I64x2ShrS: {
2067 XMMRegister dst = i.OutputSimd128Register();
2068 XMMRegister src = i.InputSimd128Register(0);
2069 XMMRegister tmp = i.TempSimd128Register(0);
2070 XMMRegister tmp2 = i.TempSimd128Register(1);
2071 Operand shift = i.InputOperand(1);
2072
2073 // Take shift value modulo 64.
2074 __ and_(shift, Immediate(63));
2075 __ Movd(tmp, shift);
2076
2077 // Set up a mask [0x80000000,0,0x80000000,0].
2078 __ Pcmpeqb(tmp2, tmp2);
2079 __ Psllq(tmp2, tmp2, 63);
2080
2081 __ Psrlq(tmp2, tmp2, tmp);
2082 __ Psrlq(dst, src, tmp);
2083 __ Pxor(dst, tmp2);
2084 __ Psubq(dst, tmp2);
2085 break;
2086 }
2087 case kIA32I64x2Add: {
2088 __ Paddq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2089 i.InputOperand(1));
2090 break;
2091 }
2092 case kIA32I64x2Sub: {
2093 __ Psubq(i.OutputSimd128Register(), i.InputSimd128Register(0),
2094 i.InputOperand(1));
2095 break;
2096 }
2097 case kIA32I64x2Mul: {
2098 XMMRegister dst = i.OutputSimd128Register();
2099 XMMRegister left = i.InputSimd128Register(0);
2100 XMMRegister right = i.InputSimd128Register(1);
2101 XMMRegister tmp1 = i.TempSimd128Register(0);
2102 XMMRegister tmp2 = i.TempSimd128Register(1);
2103
2104 __ Movaps(tmp1, left);
2105 __ Movaps(tmp2, right);
2106
2107 // Multiply high dword of each qword of left with right.
2108 __ Psrlq(tmp1, 32);
2109 __ Pmuludq(tmp1, tmp1, right);
2110
2111 // Multiply high dword of each qword of right with left.
2112 __ Psrlq(tmp2, 32);
2113 __ Pmuludq(tmp2, tmp2, left);
2114
2115 __ Paddq(tmp2, tmp2, tmp1);
2116 __ Psllq(tmp2, tmp2, 32);
2117
2118 __ Pmuludq(dst, left, right);
2119 __ Paddq(dst, dst, tmp2);
2120 break;
2121 }
2122 case kIA32I64x2ShrU: {
2123 ASSEMBLE_SIMD_SHIFT(Psrlq, 6);
2124 break;
2125 }
2126 case kSSEF32x4Splat: {
2127 DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
2128 XMMRegister dst = i.OutputSimd128Register();
2129 __ shufps(dst, dst, 0x0);
2130 break;
2131 }
2132 case kAVXF32x4Splat: {
2133 CpuFeatureScope avx_scope(tasm(), AVX);
2134 XMMRegister src = i.InputFloatRegister(0);
2135 __ vshufps(i.OutputSimd128Register(), src, src, 0x0);
2136 break;
2137 }
2138 case kSSEF32x4ExtractLane: {
2139 DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
2140 XMMRegister dst = i.OutputFloatRegister();
2141 int8_t lane = i.InputInt8(1);
2142 if (lane != 0) {
2143 DCHECK_LT(lane, 4);
2144 __ shufps(dst, dst, lane);
2145 }
2146 break;
2147 }
2148 case kAVXF32x4ExtractLane: {
2149 CpuFeatureScope avx_scope(tasm(), AVX);
2150 XMMRegister dst = i.OutputFloatRegister();
2151 XMMRegister src = i.InputSimd128Register(0);
2152 int8_t lane = i.InputInt8(1);
2153 if (lane == 0) {
2154 if (dst != src) __ vmovaps(dst, src);
2155 } else {
2156 DCHECK_LT(lane, 4);
2157 __ vshufps(dst, src, src, lane);
2158 }
2159 break;
2160 }
2161 case kSSEF32x4ReplaceLane: {
2162 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2163 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2164 __ insertps(i.OutputSimd128Register(), i.InputOperand(2),
2165 i.InputInt8(1) << 4);
2166 break;
2167 }
2168 case kAVXF32x4ReplaceLane: {
2169 CpuFeatureScope avx_scope(tasm(), AVX);
2170 __ vinsertps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2171 i.InputOperand(2), i.InputInt8(1) << 4);
2172 break;
2173 }
2174 case kIA32F32x4SConvertI32x4: {
2175 __ Cvtdq2ps(i.OutputSimd128Register(), i.InputOperand(0));
2176 break;
2177 }
2178 case kSSEF32x4UConvertI32x4: {
2179 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2180 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2181 XMMRegister dst = i.OutputSimd128Register();
2182 __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
2183 __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
2184 __ psubd(dst, kScratchDoubleReg); // get hi 16 bits
2185 __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
2186 __ psrld(dst, 1); // divide by 2 to get in unsigned range
2187 __ cvtdq2ps(dst, dst); // convert hi exactly
2188 __ addps(dst, dst); // double hi, exactly
2189 __ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
2190 break;
2191 }
2192 case kAVXF32x4UConvertI32x4: {
2193 CpuFeatureScope avx_scope(tasm(), AVX);
2194 XMMRegister dst = i.OutputSimd128Register();
2195 XMMRegister src = i.InputSimd128Register(0);
2196 __ vpxor(kScratchDoubleReg, kScratchDoubleReg,
2197 kScratchDoubleReg); // zeros
2198 __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, src,
2199 0x55); // get lo 16 bits
2200 __ vpsubd(dst, src, kScratchDoubleReg); // get hi 16 bits
2201 __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
2202 __ vpsrld(dst, dst, 1); // divide by 2 to get in unsigned range
2203 __ vcvtdq2ps(dst, dst); // convert hi exactly
2204 __ vaddps(dst, dst, dst); // double hi, exactly
2205 __ vaddps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
2206 break;
2207 }
2208 case kSSEF32x4Abs: {
2209 XMMRegister dst = i.OutputSimd128Register();
2210 DCHECK_EQ(i.InputSimd128Register(0), dst);
2211 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2212 __ psrld(kScratchDoubleReg, 1);
2213 __ andps(dst, kScratchDoubleReg);
2214 break;
2215 }
2216 case kAVXF32x4Abs: {
2217 CpuFeatureScope avx_scope(tasm(), AVX);
2218 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2219 __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
2220 __ vandps(i.OutputSimd128Register(), kScratchDoubleReg,
2221 i.InputOperand(0));
2222 break;
2223 }
2224 case kSSEF32x4Neg: {
2225 XMMRegister dst = i.OutputSimd128Register();
2226 DCHECK_EQ(dst, i.InputSimd128Register(0));
2227 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2228 __ pslld(kScratchDoubleReg, 31);
2229 __ xorps(dst, kScratchDoubleReg);
2230 break;
2231 }
2232 case kAVXF32x4Neg: {
2233 CpuFeatureScope avx_scope(tasm(), AVX);
2234 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2235 __ vpslld(kScratchDoubleReg, kScratchDoubleReg, 31);
2236 __ vxorps(i.OutputSimd128Register(), kScratchDoubleReg,
2237 i.InputOperand(0));
2238 break;
2239 }
2240 case kSSEF32x4Sqrt: {
2241 __ sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2242 break;
2243 }
2244 case kAVXF32x4Sqrt: {
2245 CpuFeatureScope avx_scope(tasm(), AVX);
2246 __ vsqrtps(i.OutputSimd128Register(), i.InputOperand(0));
2247 break;
2248 }
2249 case kIA32F32x4RecipApprox: {
2250 __ Rcpps(i.OutputSimd128Register(), i.InputOperand(0));
2251 break;
2252 }
2253 case kIA32F32x4RecipSqrtApprox: {
2254 __ Rsqrtps(i.OutputSimd128Register(), i.InputOperand(0));
2255 break;
2256 }
2257 case kSSEF32x4Add: {
2258 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2259 __ addps(i.OutputSimd128Register(), i.InputOperand(1));
2260 break;
2261 }
2262 case kAVXF32x4Add: {
2263 CpuFeatureScope avx_scope(tasm(), AVX);
2264 __ vaddps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2265 i.InputOperand(1));
2266 break;
2267 }
2268 case kSSEF32x4AddHoriz: {
2269 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2270 CpuFeatureScope sse_scope(tasm(), SSE3);
2271 __ haddps(i.OutputSimd128Register(), i.InputOperand(1));
2272 break;
2273 }
2274 case kAVXF32x4AddHoriz: {
2275 CpuFeatureScope avx_scope(tasm(), AVX);
2276 __ vhaddps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2277 i.InputOperand(1));
2278 break;
2279 }
2280 case kSSEF32x4Sub: {
2281 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2282 __ subps(i.OutputSimd128Register(), i.InputOperand(1));
2283 break;
2284 }
2285 case kAVXF32x4Sub: {
2286 CpuFeatureScope avx_scope(tasm(), AVX);
2287 __ vsubps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2288 i.InputOperand(1));
2289 break;
2290 }
2291 case kSSEF32x4Mul: {
2292 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2293 __ mulps(i.OutputSimd128Register(), i.InputOperand(1));
2294 break;
2295 }
2296 case kAVXF32x4Mul: {
2297 CpuFeatureScope avx_scope(tasm(), AVX);
2298 __ vmulps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2299 i.InputOperand(1));
2300 break;
2301 }
2302 case kSSEF32x4Div: {
2303 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2304 __ divps(i.OutputSimd128Register(), i.InputOperand(1));
2305 break;
2306 }
2307 case kAVXF32x4Div: {
2308 CpuFeatureScope avx_scope(tasm(), AVX);
2309 __ vdivps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2310 i.InputOperand(1));
2311 break;
2312 }
2313 case kSSEF32x4Min: {
2314 XMMRegister src1 = i.InputSimd128Register(1),
2315 dst = i.OutputSimd128Register();
2316 DCHECK_EQ(dst, i.InputSimd128Register(0));
2317 // The minps instruction doesn't propagate NaNs and +0's in its first
2318 // operand. Perform minps in both orders, merge the resuls, and adjust.
2319 __ movaps(kScratchDoubleReg, src1);
2320 __ minps(kScratchDoubleReg, dst);
2321 __ minps(dst, src1);
2322 // propagate -0's and NaNs, which may be non-canonical.
2323 __ orps(kScratchDoubleReg, dst);
2324 // Canonicalize NaNs by quieting and clearing the payload.
2325 __ cmpps(dst, kScratchDoubleReg, 3);
2326 __ orps(kScratchDoubleReg, dst);
2327 __ psrld(dst, 10);
2328 __ andnps(dst, kScratchDoubleReg);
2329 break;
2330 }
2331 case kAVXF32x4Min: {
2332 CpuFeatureScope avx_scope(tasm(), AVX);
2333 XMMRegister dst = i.OutputSimd128Register();
2334 XMMRegister src0 = i.InputSimd128Register(0);
2335 Operand src1 = i.InputOperand(1);
2336 // See comment above for correction of minps.
2337 __ movups(kScratchDoubleReg, src1);
2338 __ vminps(kScratchDoubleReg, kScratchDoubleReg, src0);
2339 __ vminps(dst, src0, src1);
2340 __ vorps(dst, dst, kScratchDoubleReg);
2341 __ vcmpneqps(kScratchDoubleReg, dst, dst);
2342 __ vorps(dst, dst, kScratchDoubleReg);
2343 __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 10);
2344 __ vandnps(dst, kScratchDoubleReg, dst);
2345 break;
2346 }
2347 case kSSEF32x4Max: {
2348 XMMRegister src1 = i.InputSimd128Register(1),
2349 dst = i.OutputSimd128Register();
2350 DCHECK_EQ(dst, i.InputSimd128Register(0));
2351 // The maxps instruction doesn't propagate NaNs and +0's in its first
2352 // operand. Perform maxps in both orders, merge the resuls, and adjust.
2353 __ movaps(kScratchDoubleReg, src1);
2354 __ maxps(kScratchDoubleReg, dst);
2355 __ maxps(dst, src1);
2356 // Find discrepancies.
2357 __ xorps(dst, kScratchDoubleReg);
2358 // Propagate NaNs, which may be non-canonical.
2359 __ orps(kScratchDoubleReg, dst);
2360 // Propagate sign discrepancy and (subtle) quiet NaNs.
2361 __ subps(kScratchDoubleReg, dst);
2362 // Canonicalize NaNs by clearing the payload.
2363 __ cmpps(dst, kScratchDoubleReg, 3);
2364 __ psrld(dst, 10);
2365 __ andnps(dst, kScratchDoubleReg);
2366 break;
2367 }
2368 case kAVXF32x4Max: {
2369 CpuFeatureScope avx_scope(tasm(), AVX);
2370 XMMRegister dst = i.OutputSimd128Register();
2371 XMMRegister src0 = i.InputSimd128Register(0);
2372 Operand src1 = i.InputOperand(1);
2373 // See comment above for correction of maxps.
2374 __ vmovups(kScratchDoubleReg, src1);
2375 __ vmaxps(kScratchDoubleReg, kScratchDoubleReg, src0);
2376 __ vmaxps(dst, src0, src1);
2377 __ vxorps(dst, dst, kScratchDoubleReg);
2378 __ vorps(kScratchDoubleReg, kScratchDoubleReg, dst);
2379 __ vsubps(kScratchDoubleReg, kScratchDoubleReg, dst);
2380 __ vcmpneqps(dst, kScratchDoubleReg, kScratchDoubleReg);
2381 __ vpsrld(dst, dst, 10);
2382 __ vandnps(dst, dst, kScratchDoubleReg);
2383 break;
2384 }
2385 case kSSEF32x4Eq: {
2386 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2387 __ cmpeqps(i.OutputSimd128Register(), i.InputOperand(1));
2388 break;
2389 }
2390 case kAVXF32x4Eq: {
2391 CpuFeatureScope avx_scope(tasm(), AVX);
2392 __ vcmpeqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2393 i.InputOperand(1));
2394 break;
2395 }
2396 case kSSEF32x4Ne: {
2397 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2398 __ cmpneqps(i.OutputSimd128Register(), i.InputOperand(1));
2399 break;
2400 }
2401 case kAVXF32x4Ne: {
2402 CpuFeatureScope avx_scope(tasm(), AVX);
2403 __ vcmpneqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2404 i.InputOperand(1));
2405 break;
2406 }
2407 case kSSEF32x4Lt: {
2408 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2409 __ cmpltps(i.OutputSimd128Register(), i.InputOperand(1));
2410 break;
2411 }
2412 case kAVXF32x4Lt: {
2413 CpuFeatureScope avx_scope(tasm(), AVX);
2414 __ vcmpltps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2415 i.InputOperand(1));
2416 break;
2417 }
2418 case kSSEF32x4Le: {
2419 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2420 __ cmpleps(i.OutputSimd128Register(), i.InputOperand(1));
2421 break;
2422 }
2423 case kAVXF32x4Le: {
2424 CpuFeatureScope avx_scope(tasm(), AVX);
2425 __ vcmpleps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2426 i.InputOperand(1));
2427 break;
2428 }
2429 case kIA32F32x4Pmin: {
2430 XMMRegister dst = i.OutputSimd128Register();
2431 DCHECK_EQ(dst, i.InputSimd128Register(0));
2432 __ Minps(dst, dst, i.InputSimd128Register(1));
2433 break;
2434 }
2435 case kIA32F32x4Pmax: {
2436 XMMRegister dst = i.OutputSimd128Register();
2437 DCHECK_EQ(dst, i.InputSimd128Register(0));
2438 __ Maxps(dst, dst, i.InputSimd128Register(1));
2439 break;
2440 }
2441 case kIA32F32x4Round: {
2442 RoundingMode const mode =
2443 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
2444 __ Roundps(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode);
2445 break;
2446 }
2447 case kIA32I32x4Splat: {
2448 XMMRegister dst = i.OutputSimd128Register();
2449 __ Movd(dst, i.InputOperand(0));
2450 __ Pshufd(dst, dst, 0x0);
2451 break;
2452 }
2453 case kIA32I32x4ExtractLane: {
2454 __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2455 break;
2456 }
2457 case kSSEI32x4ReplaceLane: {
2458 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2459 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2460 __ pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2461 break;
2462 }
2463 case kAVXI32x4ReplaceLane: {
2464 CpuFeatureScope avx_scope(tasm(), AVX);
2465 __ vpinsrd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2466 i.InputOperand(2), i.InputInt8(1));
2467 break;
2468 }
2469 case kSSEI32x4SConvertF32x4: {
2470 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2471 XMMRegister dst = i.OutputSimd128Register();
2472 // NAN->0
2473 __ movaps(kScratchDoubleReg, dst);
2474 __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
2475 __ pand(dst, kScratchDoubleReg);
2476 // Set top bit if >= 0 (but not -0.0!)
2477 __ pxor(kScratchDoubleReg, dst);
2478 // Convert
2479 __ cvttps2dq(dst, dst);
2480 // Set top bit if >=0 is now < 0
2481 __ pand(kScratchDoubleReg, dst);
2482 __ psrad(kScratchDoubleReg, 31);
2483 // Set positive overflow lanes to 0x7FFFFFFF
2484 __ pxor(dst, kScratchDoubleReg);
2485 break;
2486 }
2487 case kAVXI32x4SConvertF32x4: {
2488 CpuFeatureScope avx_scope(tasm(), AVX);
2489 XMMRegister dst = i.OutputSimd128Register();
2490 XMMRegister src = i.InputSimd128Register(0);
2491 // NAN->0
2492 __ vcmpeqps(kScratchDoubleReg, src, src);
2493 __ vpand(dst, src, kScratchDoubleReg);
2494 // Set top bit if >= 0 (but not -0.0!)
2495 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, dst);
2496 // Convert
2497 __ vcvttps2dq(dst, dst);
2498 // Set top bit if >=0 is now < 0
2499 __ vpand(kScratchDoubleReg, kScratchDoubleReg, dst);
2500 __ vpsrad(kScratchDoubleReg, kScratchDoubleReg, 31);
2501 // Set positive overflow lanes to 0x7FFFFFFF
2502 __ vpxor(dst, dst, kScratchDoubleReg);
2503 break;
2504 }
2505 case kIA32I32x4SConvertI16x8Low: {
2506 __ Pmovsxwd(i.OutputSimd128Register(), i.InputOperand(0));
2507 break;
2508 }
2509 case kIA32I32x4SConvertI16x8High: {
2510 XMMRegister dst = i.OutputSimd128Register();
2511 __ Palignr(dst, i.InputOperand(0), 8);
2512 __ Pmovsxwd(dst, dst);
2513 break;
2514 }
2515 case kIA32I32x4Neg: {
2516 XMMRegister dst = i.OutputSimd128Register();
2517 Operand src = i.InputOperand(0);
2518 if (src.is_reg(dst)) {
2519 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2520 __ Psignd(dst, kScratchDoubleReg);
2521 } else {
2522 __ Pxor(dst, dst);
2523 __ Psubd(dst, src);
2524 }
2525 break;
2526 }
2527 case kIA32I32x4Shl: {
2528 ASSEMBLE_SIMD_SHIFT(Pslld, 5);
2529 break;
2530 }
2531 case kIA32I32x4ShrS: {
2532 ASSEMBLE_SIMD_SHIFT(Psrad, 5);
2533 break;
2534 }
2535 case kSSEI32x4Add: {
2536 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2537 __ paddd(i.OutputSimd128Register(), i.InputOperand(1));
2538 break;
2539 }
2540 case kAVXI32x4Add: {
2541 CpuFeatureScope avx_scope(tasm(), AVX);
2542 __ vpaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2543 i.InputOperand(1));
2544 break;
2545 }
2546 case kSSEI32x4AddHoriz: {
2547 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2548 CpuFeatureScope sse_scope(tasm(), SSSE3);
2549 __ phaddd(i.OutputSimd128Register(), i.InputOperand(1));
2550 break;
2551 }
2552 case kAVXI32x4AddHoriz: {
2553 CpuFeatureScope avx_scope(tasm(), AVX);
2554 __ vphaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2555 i.InputOperand(1));
2556 break;
2557 }
2558 case kSSEI32x4Sub: {
2559 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2560 __ psubd(i.OutputSimd128Register(), i.InputOperand(1));
2561 break;
2562 }
2563 case kAVXI32x4Sub: {
2564 CpuFeatureScope avx_scope(tasm(), AVX);
2565 __ vpsubd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2566 i.InputOperand(1));
2567 break;
2568 }
2569 case kSSEI32x4Mul: {
2570 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2571 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2572 __ pmulld(i.OutputSimd128Register(), i.InputOperand(1));
2573 break;
2574 }
2575 case kAVXI32x4Mul: {
2576 CpuFeatureScope avx_scope(tasm(), AVX);
2577 __ vpmulld(i.OutputSimd128Register(), i.InputSimd128Register(0),
2578 i.InputOperand(1));
2579 break;
2580 }
2581 case kSSEI32x4MinS: {
2582 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2583 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2584 __ pminsd(i.OutputSimd128Register(), i.InputOperand(1));
2585 break;
2586 }
2587 case kAVXI32x4MinS: {
2588 CpuFeatureScope avx_scope(tasm(), AVX);
2589 __ vpminsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2590 i.InputOperand(1));
2591 break;
2592 }
2593 case kSSEI32x4MaxS: {
2594 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2595 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2596 __ pmaxsd(i.OutputSimd128Register(), i.InputOperand(1));
2597 break;
2598 }
2599 case kAVXI32x4MaxS: {
2600 CpuFeatureScope avx_scope(tasm(), AVX);
2601 __ vpmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2602 i.InputOperand(1));
2603 break;
2604 }
2605 case kSSEI32x4Eq: {
2606 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2607 __ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1));
2608 break;
2609 }
2610 case kAVXI32x4Eq: {
2611 CpuFeatureScope avx_scope(tasm(), AVX);
2612 __ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2613 i.InputOperand(1));
2614 break;
2615 }
2616 case kSSEI32x4Ne: {
2617 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2618 __ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1));
2619 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2620 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2621 break;
2622 }
2623 case kAVXI32x4Ne: {
2624 CpuFeatureScope avx_scope(tasm(), AVX);
2625 __ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2626 i.InputOperand(1));
2627 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2628 __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2629 kScratchDoubleReg);
2630 break;
2631 }
2632 case kSSEI32x4GtS: {
2633 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2634 __ pcmpgtd(i.OutputSimd128Register(), i.InputOperand(1));
2635 break;
2636 }
2637 case kAVXI32x4GtS: {
2638 CpuFeatureScope avx_scope(tasm(), AVX);
2639 __ vpcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2640 i.InputOperand(1));
2641 break;
2642 }
2643 case kSSEI32x4GeS: {
2644 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2645 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2646 XMMRegister dst = i.OutputSimd128Register();
2647 Operand src = i.InputOperand(1);
2648 __ pminsd(dst, src);
2649 __ pcmpeqd(dst, src);
2650 break;
2651 }
2652 case kAVXI32x4GeS: {
2653 CpuFeatureScope avx_scope(tasm(), AVX);
2654 XMMRegister src1 = i.InputSimd128Register(0);
2655 Operand src2 = i.InputOperand(1);
2656 __ vpminsd(kScratchDoubleReg, src1, src2);
2657 __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2658 break;
2659 }
2660 case kSSEI32x4UConvertF32x4: {
2661 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2662 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2663 XMMRegister dst = i.OutputSimd128Register();
2664 XMMRegister tmp = i.TempSimd128Register(0);
2665 // NAN->0, negative->0
2666 __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2667 __ maxps(dst, kScratchDoubleReg);
2668 // scratch: float representation of max_signed
2669 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2670 __ psrld(kScratchDoubleReg, 1); // 0x7fffffff
2671 __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
2672 // tmp: convert (src-max_signed).
2673 // Positive overflow lanes -> 0x7FFFFFFF
2674 // Negative lanes -> 0
2675 __ movaps(tmp, dst);
2676 __ subps(tmp, kScratchDoubleReg);
2677 __ cmpleps(kScratchDoubleReg, tmp);
2678 __ cvttps2dq(tmp, tmp);
2679 __ pxor(tmp, kScratchDoubleReg);
2680 __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2681 __ pmaxsd(tmp, kScratchDoubleReg);
2682 // convert. Overflow lanes above max_signed will be 0x80000000
2683 __ cvttps2dq(dst, dst);
2684 // Add (src-max_signed) for overflow lanes.
2685 __ paddd(dst, tmp);
2686 break;
2687 }
2688 case kAVXI32x4UConvertF32x4: {
2689 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2690 CpuFeatureScope avx_scope(tasm(), AVX);
2691 XMMRegister dst = i.OutputSimd128Register();
2692 XMMRegister tmp = i.TempSimd128Register(0);
2693 // NAN->0, negative->0
2694 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2695 __ vmaxps(dst, dst, kScratchDoubleReg);
2696 // scratch: float representation of max_signed
2697 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2698 __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1); // 0x7fffffff
2699 __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
2700 // tmp: convert (src-max_signed).
2701 // Positive overflow lanes -> 0x7FFFFFFF
2702 // Negative lanes -> 0
2703 __ vsubps(tmp, dst, kScratchDoubleReg);
2704 __ vcmpleps(kScratchDoubleReg, kScratchDoubleReg, tmp);
2705 __ vcvttps2dq(tmp, tmp);
2706 __ vpxor(tmp, tmp, kScratchDoubleReg);
2707 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2708 __ vpmaxsd(tmp, tmp, kScratchDoubleReg);
2709 // convert. Overflow lanes above max_signed will be 0x80000000
2710 __ vcvttps2dq(dst, dst);
2711 // Add (src-max_signed) for overflow lanes.
2712 __ vpaddd(dst, dst, tmp);
2713 break;
2714 }
2715 case kIA32I32x4UConvertI16x8Low: {
2716 __ Pmovzxwd(i.OutputSimd128Register(), i.InputOperand(0));
2717 break;
2718 }
2719 case kIA32I32x4UConvertI16x8High: {
2720 XMMRegister dst = i.OutputSimd128Register();
2721 __ Palignr(dst, i.InputOperand(0), 8);
2722 __ Pmovzxwd(dst, dst);
2723 break;
2724 }
2725 case kIA32I32x4ShrU: {
2726 ASSEMBLE_SIMD_SHIFT(Psrld, 5);
2727 break;
2728 }
2729 case kSSEI32x4MinU: {
2730 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2731 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2732 __ pminud(i.OutputSimd128Register(), i.InputOperand(1));
2733 break;
2734 }
2735 case kAVXI32x4MinU: {
2736 CpuFeatureScope avx_scope(tasm(), AVX);
2737 __ vpminud(i.OutputSimd128Register(), i.InputSimd128Register(0),
2738 i.InputOperand(1));
2739 break;
2740 }
2741 case kSSEI32x4MaxU: {
2742 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2743 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2744 __ pmaxud(i.OutputSimd128Register(), i.InputOperand(1));
2745 break;
2746 }
2747 case kAVXI32x4MaxU: {
2748 CpuFeatureScope avx_scope(tasm(), AVX);
2749 __ vpmaxud(i.OutputSimd128Register(), i.InputSimd128Register(0),
2750 i.InputOperand(1));
2751 break;
2752 }
2753 case kSSEI32x4GtU: {
2754 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2755 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2756 XMMRegister dst = i.OutputSimd128Register();
2757 Operand src = i.InputOperand(1);
2758 __ pmaxud(dst, src);
2759 __ pcmpeqd(dst, src);
2760 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2761 __ pxor(dst, kScratchDoubleReg);
2762 break;
2763 }
2764 case kAVXI32x4GtU: {
2765 CpuFeatureScope avx_scope(tasm(), AVX);
2766 XMMRegister dst = i.OutputSimd128Register();
2767 XMMRegister src1 = i.InputSimd128Register(0);
2768 Operand src2 = i.InputOperand(1);
2769 __ vpmaxud(kScratchDoubleReg, src1, src2);
2770 __ vpcmpeqd(dst, kScratchDoubleReg, src2);
2771 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2772 __ vpxor(dst, dst, kScratchDoubleReg);
2773 break;
2774 }
2775 case kSSEI32x4GeU: {
2776 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2777 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2778 XMMRegister dst = i.OutputSimd128Register();
2779 Operand src = i.InputOperand(1);
2780 __ pminud(dst, src);
2781 __ pcmpeqd(dst, src);
2782 break;
2783 }
2784 case kAVXI32x4GeU: {
2785 CpuFeatureScope avx_scope(tasm(), AVX);
2786 XMMRegister src1 = i.InputSimd128Register(0);
2787 Operand src2 = i.InputOperand(1);
2788 __ vpminud(kScratchDoubleReg, src1, src2);
2789 __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2790 break;
2791 }
2792 case kIA32I32x4Abs: {
2793 __ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2794 break;
2795 }
2796 case kIA32I32x4BitMask: {
2797 __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
2798 break;
2799 }
2800 case kIA32I32x4DotI16x8S: {
2801 __ Pmaddwd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2802 i.InputSimd128Register(1));
2803 break;
2804 }
2805 case kIA32I16x8Splat: {
2806 XMMRegister dst = i.OutputSimd128Register();
2807 __ Movd(dst, i.InputOperand(0));
2808 __ Pshuflw(dst, dst, 0x0);
2809 __ Pshufd(dst, dst, 0x0);
2810 break;
2811 }
2812 case kIA32I16x8ExtractLaneU: {
2813 Register dst = i.OutputRegister();
2814 __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
2815 break;
2816 }
2817 case kIA32I16x8ExtractLaneS: {
2818 Register dst = i.OutputRegister();
2819 __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
2820 __ movsx_w(dst, dst);
2821 break;
2822 }
2823 case kSSEI16x8ReplaceLane: {
2824 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2825 __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2826 break;
2827 }
2828 case kAVXI16x8ReplaceLane: {
2829 CpuFeatureScope avx_scope(tasm(), AVX);
2830 __ vpinsrw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2831 i.InputOperand(2), i.InputInt8(1));
2832 break;
2833 }
2834 case kIA32I16x8SConvertI8x16Low: {
2835 __ Pmovsxbw(i.OutputSimd128Register(), i.InputOperand(0));
2836 break;
2837 }
2838 case kIA32I16x8SConvertI8x16High: {
2839 XMMRegister dst = i.OutputSimd128Register();
2840 __ Palignr(dst, i.InputOperand(0), 8);
2841 __ Pmovsxbw(dst, dst);
2842 break;
2843 }
2844 case kIA32I16x8Neg: {
2845 XMMRegister dst = i.OutputSimd128Register();
2846 Operand src = i.InputOperand(0);
2847 if (src.is_reg(dst)) {
2848 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2849 __ Psignw(dst, kScratchDoubleReg);
2850 } else {
2851 __ Pxor(dst, dst);
2852 __ Psubw(dst, src);
2853 }
2854 break;
2855 }
2856 case kIA32I16x8Shl: {
2857 ASSEMBLE_SIMD_SHIFT(Psllw, 4);
2858 break;
2859 }
2860 case kIA32I16x8ShrS: {
2861 ASSEMBLE_SIMD_SHIFT(Psraw, 4);
2862 break;
2863 }
2864 case kSSEI16x8SConvertI32x4: {
2865 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2866 __ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2867 break;
2868 }
2869 case kAVXI16x8SConvertI32x4: {
2870 CpuFeatureScope avx_scope(tasm(), AVX);
2871 __ vpackssdw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2872 i.InputOperand(1));
2873 break;
2874 }
2875 case kSSEI16x8Add: {
2876 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2877 __ paddw(i.OutputSimd128Register(), i.InputOperand(1));
2878 break;
2879 }
2880 case kAVXI16x8Add: {
2881 CpuFeatureScope avx_scope(tasm(), AVX);
2882 __ vpaddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2883 i.InputOperand(1));
2884 break;
2885 }
2886 case kSSEI16x8AddSatS: {
2887 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2888 __ paddsw(i.OutputSimd128Register(), i.InputOperand(1));
2889 break;
2890 }
2891 case kAVXI16x8AddSatS: {
2892 CpuFeatureScope avx_scope(tasm(), AVX);
2893 __ vpaddsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2894 i.InputOperand(1));
2895 break;
2896 }
2897 case kSSEI16x8AddHoriz: {
2898 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2899 CpuFeatureScope sse_scope(tasm(), SSSE3);
2900 __ phaddw(i.OutputSimd128Register(), i.InputOperand(1));
2901 break;
2902 }
2903 case kAVXI16x8AddHoriz: {
2904 CpuFeatureScope avx_scope(tasm(), AVX);
2905 __ vphaddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2906 i.InputOperand(1));
2907 break;
2908 }
2909 case kSSEI16x8Sub: {
2910 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2911 __ psubw(i.OutputSimd128Register(), i.InputOperand(1));
2912 break;
2913 }
2914 case kAVXI16x8Sub: {
2915 CpuFeatureScope avx_scope(tasm(), AVX);
2916 __ vpsubw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2917 i.InputOperand(1));
2918 break;
2919 }
2920 case kSSEI16x8SubSatS: {
2921 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2922 __ psubsw(i.OutputSimd128Register(), i.InputOperand(1));
2923 break;
2924 }
2925 case kAVXI16x8SubSatS: {
2926 CpuFeatureScope avx_scope(tasm(), AVX);
2927 __ vpsubsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2928 i.InputOperand(1));
2929 break;
2930 }
2931 case kSSEI16x8Mul: {
2932 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2933 __ pmullw(i.OutputSimd128Register(), i.InputOperand(1));
2934 break;
2935 }
2936 case kAVXI16x8Mul: {
2937 CpuFeatureScope avx_scope(tasm(), AVX);
2938 __ vpmullw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2939 i.InputOperand(1));
2940 break;
2941 }
2942 case kSSEI16x8MinS: {
2943 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2944 __ pminsw(i.OutputSimd128Register(), i.InputOperand(1));
2945 break;
2946 }
2947 case kAVXI16x8MinS: {
2948 CpuFeatureScope avx_scope(tasm(), AVX);
2949 __ vpminsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2950 i.InputOperand(1));
2951 break;
2952 }
2953 case kSSEI16x8MaxS: {
2954 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2955 __ pmaxsw(i.OutputSimd128Register(), i.InputOperand(1));
2956 break;
2957 }
2958 case kAVXI16x8MaxS: {
2959 CpuFeatureScope avx_scope(tasm(), AVX);
2960 __ vpmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2961 i.InputOperand(1));
2962 break;
2963 }
2964 case kSSEI16x8Eq: {
2965 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2966 __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1));
2967 break;
2968 }
2969 case kAVXI16x8Eq: {
2970 CpuFeatureScope avx_scope(tasm(), AVX);
2971 __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2972 i.InputOperand(1));
2973 break;
2974 }
2975 case kSSEI16x8Ne: {
2976 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2977 __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1));
2978 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2979 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2980 break;
2981 }
2982 case kAVXI16x8Ne: {
2983 CpuFeatureScope avx_scope(tasm(), AVX);
2984 __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2985 i.InputOperand(1));
2986 __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2987 __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2988 kScratchDoubleReg);
2989 break;
2990 }
2991 case kSSEI16x8GtS: {
2992 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2993 __ pcmpgtw(i.OutputSimd128Register(), i.InputOperand(1));
2994 break;
2995 }
2996 case kAVXI16x8GtS: {
2997 CpuFeatureScope avx_scope(tasm(), AVX);
2998 __ vpcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2999 i.InputOperand(1));
3000 break;
3001 }
3002 case kSSEI16x8GeS: {
3003 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3004 XMMRegister dst = i.OutputSimd128Register();
3005 Operand src = i.InputOperand(1);
3006 __ pminsw(dst, src);
3007 __ pcmpeqw(dst, src);
3008 break;
3009 }
3010 case kAVXI16x8GeS: {
3011 CpuFeatureScope avx_scope(tasm(), AVX);
3012 XMMRegister src1 = i.InputSimd128Register(0);
3013 Operand src2 = i.InputOperand(1);
3014 __ vpminsw(kScratchDoubleReg, src1, src2);
3015 __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
3016 break;
3017 }
3018 case kIA32I16x8UConvertI8x16Low: {
3019 __ Pmovzxbw(i.OutputSimd128Register(), i.InputOperand(0));
3020 break;
3021 }
3022 case kIA32I16x8UConvertI8x16High: {
3023 XMMRegister dst = i.OutputSimd128Register();
3024 __ Palignr(dst, i.InputOperand(0), 8);
3025 __ Pmovzxbw(dst, dst);
3026 break;
3027 }
3028 case kIA32I16x8ShrU: {
3029 ASSEMBLE_SIMD_SHIFT(Psrlw, 4);
3030 break;
3031 }
3032 case kSSEI16x8UConvertI32x4: {
3033 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3034 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3035 __ packusdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3036 break;
3037 }
3038 case kAVXI16x8UConvertI32x4: {
3039 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3040 CpuFeatureScope avx_scope(tasm(), AVX);
3041 XMMRegister dst = i.OutputSimd128Register();
3042 __ vpackusdw(dst, dst, i.InputSimd128Register(1));
3043 break;
3044 }
3045 case kSSEI16x8AddSatU: {
3046 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3047 __ paddusw(i.OutputSimd128Register(), i.InputOperand(1));
3048 break;
3049 }
3050 case kAVXI16x8AddSatU: {
3051 CpuFeatureScope avx_scope(tasm(), AVX);
3052 __ vpaddusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
3053 i.InputOperand(1));
3054 break;
3055 }
3056 case kSSEI16x8SubSatU: {
3057 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3058 __ psubusw(i.OutputSimd128Register(), i.InputOperand(1));
3059 break;
3060 }
3061 case kAVXI16x8SubSatU: {
3062 CpuFeatureScope avx_scope(tasm(), AVX);
3063 __ vpsubusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
3064 i.InputOperand(1));
3065 break;
3066 }
3067 case kSSEI16x8MinU: {
3068 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3069 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3070 __ pminuw(i.OutputSimd128Register(), i.InputOperand(1));
3071 break;
3072 }
3073 case kAVXI16x8MinU: {
3074 CpuFeatureScope avx_scope(tasm(), AVX);
3075 __ vpminuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
3076 i.InputOperand(1));
3077 break;
3078 }
3079 case kSSEI16x8MaxU: {
3080 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3081 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3082 __ pmaxuw(i.OutputSimd128Register(), i.InputOperand(1));
3083 break;
3084 }
3085 case kAVXI16x8MaxU: {
3086 CpuFeatureScope avx_scope(tasm(), AVX);
3087 __ vpmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
3088 i.InputOperand(1));
3089 break;
3090 }
3091 case kSSEI16x8GtU: {
3092 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3093 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3094 XMMRegister dst = i.OutputSimd128Register();
3095 Operand src = i.InputOperand(1);
3096 __ pmaxuw(dst, src);
3097 __ pcmpeqw(dst, src);
3098 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3099 __ pxor(dst, kScratchDoubleReg);
3100 break;
3101 }
3102 case kAVXI16x8GtU: {
3103 CpuFeatureScope avx_scope(tasm(), AVX);
3104 XMMRegister dst = i.OutputSimd128Register();
3105 XMMRegister src1 = i.InputSimd128Register(0);
3106 Operand src2 = i.InputOperand(1);
3107 __ vpmaxuw(kScratchDoubleReg, src1, src2);
3108 __ vpcmpeqw(dst, kScratchDoubleReg, src2);
3109 __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3110 __ vpxor(dst, dst, kScratchDoubleReg);
3111 break;
3112 }
3113 case kSSEI16x8GeU: {
3114 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3115 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3116 XMMRegister dst = i.OutputSimd128Register();
3117 Operand src = i.InputOperand(1);
3118 __ pminuw(dst, src);
3119 __ pcmpeqw(dst, src);
3120 break;
3121 }
3122 case kAVXI16x8GeU: {
3123 CpuFeatureScope avx_scope(tasm(), AVX);
3124 XMMRegister src1 = i.InputSimd128Register(0);
3125 Operand src2 = i.InputOperand(1);
3126 __ vpminuw(kScratchDoubleReg, src1, src2);
3127 __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
3128 break;
3129 }
3130 case kIA32I16x8RoundingAverageU: {
3131 __ Pavgw(i.OutputSimd128Register(), i.InputSimd128Register(0),
3132 i.InputOperand(1));
3133 break;
3134 }
3135 case kIA32I16x8Abs: {
3136 __ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3137 break;
3138 }
3139 case kIA32I16x8BitMask: {
3140 Register dst = i.OutputRegister();
3141 XMMRegister tmp = i.TempSimd128Register(0);
3142 __ Packsswb(tmp, i.InputSimd128Register(0));
3143 __ Pmovmskb(dst, tmp);
3144 __ shr(dst, 8);
3145 break;
3146 }
3147 case kIA32I8x16Splat: {
3148 XMMRegister dst = i.OutputSimd128Register();
3149 __ Movd(dst, i.InputOperand(0));
3150 __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
3151 __ Pshufb(dst, kScratchDoubleReg);
3152 break;
3153 }
3154 case kIA32I8x16ExtractLaneU: {
3155 Register dst = i.OutputRegister();
3156 __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
3157 break;
3158 }
3159 case kIA32I8x16ExtractLaneS: {
3160 Register dst = i.OutputRegister();
3161 __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
3162 __ movsx_b(dst, dst);
3163 break;
3164 }
3165 case kSSEI8x16ReplaceLane: {
3166 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3167 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3168 __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
3169 break;
3170 }
3171 case kAVXI8x16ReplaceLane: {
3172 CpuFeatureScope avx_scope(tasm(), AVX);
3173 __ vpinsrb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3174 i.InputOperand(2), i.InputInt8(1));
3175 break;
3176 }
3177 case kSSEI8x16SConvertI16x8: {
3178 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3179 __ packsswb(i.OutputSimd128Register(), i.InputOperand(1));
3180 break;
3181 }
3182 case kAVXI8x16SConvertI16x8: {
3183 CpuFeatureScope avx_scope(tasm(), AVX);
3184 __ vpacksswb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3185 i.InputOperand(1));
3186 break;
3187 }
3188 case kIA32I8x16Neg: {
3189 XMMRegister dst = i.OutputSimd128Register();
3190 Operand src = i.InputOperand(0);
3191 if (src.is_reg(dst)) {
3192 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3193 __ Psignb(dst, kScratchDoubleReg);
3194 } else {
3195 __ Pxor(dst, dst);
3196 __ Psubb(dst, src);
3197 }
3198 break;
3199 }
3200 case kIA32I8x16Shl: {
3201 XMMRegister dst = i.OutputSimd128Register();
3202 DCHECK_EQ(dst, i.InputSimd128Register(0));
3203 Register tmp = i.ToRegister(instr->TempAt(0));
3204 XMMRegister tmp_simd = i.TempSimd128Register(1);
3205
3206 if (HasImmediateInput(instr, 1)) {
3207 // Perform 16-bit shift, then mask away low bits.
3208 uint8_t shift = i.InputInt3(1);
3209 __ Psllw(dst, dst, byte{shift});
3210
3211 uint8_t bmask = static_cast<uint8_t>(0xff << shift);
3212 uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
3213 __ mov(tmp, mask);
3214 __ Movd(tmp_simd, tmp);
3215 __ Pshufd(tmp_simd, tmp_simd, 0);
3216 __ Pand(dst, tmp_simd);
3217 } else {
3218 // Take shift value modulo 8.
3219 __ mov(tmp, i.InputRegister(1));
3220 __ and_(tmp, 7);
3221 // Mask off the unwanted bits before word-shifting.
3222 __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3223 __ add(tmp, Immediate(8));
3224 __ Movd(tmp_simd, tmp);
3225 __ Psrlw(kScratchDoubleReg, kScratchDoubleReg, tmp_simd);
3226 __ Packuswb(kScratchDoubleReg, kScratchDoubleReg);
3227 __ Pand(dst, kScratchDoubleReg);
3228 // TODO(zhin): sub here to avoid asking for another temporary register,
3229 // examine codegen for other i8x16 shifts, they use less instructions.
3230 __ sub(tmp, Immediate(8));
3231 __ Movd(tmp_simd, tmp);
3232 __ Psllw(dst, dst, tmp_simd);
3233 }
3234 break;
3235 }
3236 case kIA32I8x16ShrS: {
3237 XMMRegister dst = i.OutputSimd128Register();
3238 DCHECK_EQ(dst, i.InputSimd128Register(0));
3239 if (HasImmediateInput(instr, 1)) {
3240 __ Punpckhbw(kScratchDoubleReg, dst);
3241 __ Punpcklbw(dst, dst);
3242 uint8_t shift = i.InputInt3(1) + 8;
3243 __ Psraw(kScratchDoubleReg, shift);
3244 __ Psraw(dst, shift);
3245 __ Packsswb(dst, kScratchDoubleReg);
3246 } else {
3247 Register tmp = i.ToRegister(instr->TempAt(0));
3248 XMMRegister tmp_simd = i.TempSimd128Register(1);
3249 // Unpack the bytes into words, do arithmetic shifts, and repack.
3250 __ Punpckhbw(kScratchDoubleReg, dst);
3251 __ Punpcklbw(dst, dst);
3252 __ mov(tmp, i.InputRegister(1));
3253 // Take shift value modulo 8.
3254 __ and_(tmp, 7);
3255 __ add(tmp, Immediate(8));
3256 __ Movd(tmp_simd, tmp);
3257 __ Psraw(kScratchDoubleReg, kScratchDoubleReg, tmp_simd);
3258 __ Psraw(dst, dst, tmp_simd);
3259 __ Packsswb(dst, kScratchDoubleReg);
3260 }
3261 break;
3262 }
3263 case kSSEI8x16Add: {
3264 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3265 __ paddb(i.OutputSimd128Register(), i.InputOperand(1));
3266 break;
3267 }
3268 case kAVXI8x16Add: {
3269 CpuFeatureScope avx_scope(tasm(), AVX);
3270 __ vpaddb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3271 i.InputOperand(1));
3272 break;
3273 }
3274 case kSSEI8x16AddSatS: {
3275 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3276 __ paddsb(i.OutputSimd128Register(), i.InputOperand(1));
3277 break;
3278 }
3279 case kAVXI8x16AddSatS: {
3280 CpuFeatureScope avx_scope(tasm(), AVX);
3281 __ vpaddsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3282 i.InputOperand(1));
3283 break;
3284 }
3285 case kSSEI8x16Sub: {
3286 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3287 __ psubb(i.OutputSimd128Register(), i.InputOperand(1));
3288 break;
3289 }
3290 case kAVXI8x16Sub: {
3291 CpuFeatureScope avx_scope(tasm(), AVX);
3292 __ vpsubb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3293 i.InputOperand(1));
3294 break;
3295 }
3296 case kSSEI8x16SubSatS: {
3297 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3298 __ psubsb(i.OutputSimd128Register(), i.InputOperand(1));
3299 break;
3300 }
3301 case kAVXI8x16SubSatS: {
3302 CpuFeatureScope avx_scope(tasm(), AVX);
3303 __ vpsubsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3304 i.InputOperand(1));
3305 break;
3306 }
3307 case kSSEI8x16Mul: {
3308 XMMRegister dst = i.OutputSimd128Register();
3309 DCHECK_EQ(dst, i.InputSimd128Register(0));
3310 XMMRegister right = i.InputSimd128Register(1);
3311 XMMRegister tmp = i.TempSimd128Register(0);
3312
3313 // I16x8 view of I8x16
3314 // left = AAaa AAaa ... AAaa AAaa
3315 // right= BBbb BBbb ... BBbb BBbb
3316
3317 // t = 00AA 00AA ... 00AA 00AA
3318 // s = 00BB 00BB ... 00BB 00BB
3319 __ movaps(tmp, dst);
3320 __ movaps(kScratchDoubleReg, right);
3321 __ psrlw(tmp, 8);
3322 __ psrlw(kScratchDoubleReg, 8);
3323 // dst = left * 256
3324 __ psllw(dst, 8);
3325
3326 // t = I16x8Mul(t, s)
3327 // => __PP __PP ... __PP __PP
3328 __ pmullw(tmp, kScratchDoubleReg);
3329 // dst = I16x8Mul(left * 256, right)
3330 // => pp__ pp__ ... pp__ pp__
3331 __ pmullw(dst, right);
3332
3333 // t = I16x8Shl(t, 8)
3334 // => PP00 PP00 ... PP00 PP00
3335 __ psllw(tmp, 8);
3336
3337 // dst = I16x8Shr(dst, 8)
3338 // => 00pp 00pp ... 00pp 00pp
3339 __ psrlw(dst, 8);
3340
3341 // dst = I16x8Or(dst, t)
3342 // => PPpp PPpp ... PPpp PPpp
3343 __ por(dst, tmp);
3344 break;
3345 }
3346 case kAVXI8x16Mul: {
3347 CpuFeatureScope avx_scope(tasm(), AVX);
3348 XMMRegister dst = i.OutputSimd128Register();
3349 XMMRegister left = i.InputSimd128Register(0);
3350 XMMRegister right = i.InputSimd128Register(1);
3351 XMMRegister tmp = i.TempSimd128Register(0);
3352
3353 // I16x8 view of I8x16
3354 // left = AAaa AAaa ... AAaa AAaa
3355 // right= BBbb BBbb ... BBbb BBbb
3356
3357 // t = 00AA 00AA ... 00AA 00AA
3358 // s = 00BB 00BB ... 00BB 00BB
3359 __ vpsrlw(tmp, left, 8);
3360 __ vpsrlw(kScratchDoubleReg, right, 8);
3361
3362 // t = I16x8Mul(t0, t1)
3363 // => __PP __PP ... __PP __PP
3364 __ vpmullw(tmp, tmp, kScratchDoubleReg);
3365
3366 // s = left * 256
3367 __ vpsllw(kScratchDoubleReg, left, 8);
3368
3369 // dst = I16x8Mul(left * 256, right)
3370 // => pp__ pp__ ... pp__ pp__
3371 __ vpmullw(dst, kScratchDoubleReg, right);
3372
3373 // dst = I16x8Shr(dst, 8)
3374 // => 00pp 00pp ... 00pp 00pp
3375 __ vpsrlw(dst, dst, 8);
3376
3377 // t = I16x8Shl(t, 8)
3378 // => PP00 PP00 ... PP00 PP00
3379 __ vpsllw(tmp, tmp, 8);
3380
3381 // dst = I16x8Or(dst, t)
3382 // => PPpp PPpp ... PPpp PPpp
3383 __ vpor(dst, dst, tmp);
3384 break;
3385 }
3386 case kSSEI8x16MinS: {
3387 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3388 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3389 __ pminsb(i.OutputSimd128Register(), i.InputOperand(1));
3390 break;
3391 }
3392 case kAVXI8x16MinS: {
3393 CpuFeatureScope avx_scope(tasm(), AVX);
3394 __ vpminsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3395 i.InputOperand(1));
3396 break;
3397 }
3398 case kSSEI8x16MaxS: {
3399 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3400 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3401 __ pmaxsb(i.OutputSimd128Register(), i.InputOperand(1));
3402 break;
3403 }
3404 case kAVXI8x16MaxS: {
3405 CpuFeatureScope avx_scope(tasm(), AVX);
3406 __ vpmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3407 i.InputOperand(1));
3408 break;
3409 }
3410 case kSSEI8x16Eq: {
3411 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3412 __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
3413 break;
3414 }
3415 case kAVXI8x16Eq: {
3416 CpuFeatureScope avx_scope(tasm(), AVX);
3417 __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3418 i.InputOperand(1));
3419 break;
3420 }
3421 case kSSEI8x16Ne: {
3422 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3423 __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
3424 __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3425 __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
3426 break;
3427 }
3428 case kAVXI8x16Ne: {
3429 CpuFeatureScope avx_scope(tasm(), AVX);
3430 __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3431 i.InputOperand(1));
3432 __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3433 __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
3434 kScratchDoubleReg);
3435 break;
3436 }
3437 case kSSEI8x16GtS: {
3438 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3439 __ pcmpgtb(i.OutputSimd128Register(), i.InputOperand(1));
3440 break;
3441 }
3442 case kAVXI8x16GtS: {
3443 CpuFeatureScope avx_scope(tasm(), AVX);
3444 __ vpcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3445 i.InputOperand(1));
3446 break;
3447 }
3448 case kSSEI8x16GeS: {
3449 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3450 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3451 XMMRegister dst = i.OutputSimd128Register();
3452 Operand src = i.InputOperand(1);
3453 __ pminsb(dst, src);
3454 __ pcmpeqb(dst, src);
3455 break;
3456 }
3457 case kAVXI8x16GeS: {
3458 CpuFeatureScope avx_scope(tasm(), AVX);
3459 XMMRegister src1 = i.InputSimd128Register(0);
3460 Operand src2 = i.InputOperand(1);
3461 __ vpminsb(kScratchDoubleReg, src1, src2);
3462 __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
3463 break;
3464 }
3465 case kSSEI8x16UConvertI16x8: {
3466 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3467 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3468 XMMRegister dst = i.OutputSimd128Register();
3469 __ packuswb(dst, i.InputOperand(1));
3470 break;
3471 }
3472 case kAVXI8x16UConvertI16x8: {
3473 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3474 CpuFeatureScope avx_scope(tasm(), AVX);
3475 XMMRegister dst = i.OutputSimd128Register();
3476 __ vpackuswb(dst, dst, i.InputOperand(1));
3477 break;
3478 }
3479 case kSSEI8x16AddSatU: {
3480 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3481 __ paddusb(i.OutputSimd128Register(), i.InputOperand(1));
3482 break;
3483 }
3484 case kAVXI8x16AddSatU: {
3485 CpuFeatureScope avx_scope(tasm(), AVX);
3486 __ vpaddusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3487 i.InputOperand(1));
3488 break;
3489 }
3490 case kSSEI8x16SubSatU: {
3491 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3492 __ psubusb(i.OutputSimd128Register(), i.InputOperand(1));
3493 break;
3494 }
3495 case kAVXI8x16SubSatU: {
3496 CpuFeatureScope avx_scope(tasm(), AVX);
3497 __ vpsubusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3498 i.InputOperand(1));
3499 break;
3500 }
3501 case kIA32I8x16ShrU: {
3502 XMMRegister dst = i.OutputSimd128Register();
3503 DCHECK_EQ(dst, i.InputSimd128Register(0));
3504 Register tmp = i.ToRegister(instr->TempAt(0));
3505 XMMRegister tmp_simd = i.TempSimd128Register(1);
3506
3507 if (HasImmediateInput(instr, 1)) {
3508 // Perform 16-bit shift, then mask away high bits.
3509 uint8_t shift = i.InputInt3(1);
3510 __ Psrlw(dst, dst, byte{shift});
3511
3512 uint8_t bmask = 0xff >> shift;
3513 uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
3514 __ mov(tmp, mask);
3515 __ Movd(tmp_simd, tmp);
3516 __ Pshufd(tmp_simd, tmp_simd, 0);
3517 __ Pand(dst, tmp_simd);
3518 } else {
3519 // Unpack the bytes into words, do logical shifts, and repack.
3520 __ Punpckhbw(kScratchDoubleReg, dst);
3521 __ Punpcklbw(dst, dst);
3522 __ mov(tmp, i.InputRegister(1));
3523 // Take shift value modulo 8.
3524 __ and_(tmp, 7);
3525 __ add(tmp, Immediate(8));
3526 __ Movd(tmp_simd, tmp);
3527 __ Psrlw(kScratchDoubleReg, kScratchDoubleReg, tmp_simd);
3528 __ Psrlw(dst, dst, tmp_simd);
3529 __ Packuswb(dst, kScratchDoubleReg);
3530 }
3531 break;
3532 }
3533 case kSSEI8x16MinU: {
3534 XMMRegister dst = i.OutputSimd128Register();
3535 DCHECK_EQ(dst, i.InputSimd128Register(0));
3536 __ pminub(dst, i.InputOperand(1));
3537 break;
3538 }
3539 case kAVXI8x16MinU: {
3540 CpuFeatureScope avx_scope(tasm(), AVX);
3541 __ vpminub(i.OutputSimd128Register(), i.InputSimd128Register(0),
3542 i.InputOperand(1));
3543 break;
3544 }
3545 case kSSEI8x16MaxU: {
3546 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3547 __ pmaxub(i.OutputSimd128Register(), i.InputOperand(1));
3548 break;
3549 }
3550 case kAVXI8x16MaxU: {
3551 CpuFeatureScope avx_scope(tasm(), AVX);
3552 __ vpmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0),
3553 i.InputOperand(1));
3554 break;
3555 }
3556 case kSSEI8x16GtU: {
3557 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3558 XMMRegister dst = i.OutputSimd128Register();
3559 Operand src = i.InputOperand(1);
3560 __ pmaxub(dst, src);
3561 __ pcmpeqb(dst, src);
3562 __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3563 __ pxor(dst, kScratchDoubleReg);
3564 break;
3565 }
3566 case kAVXI8x16GtU: {
3567 CpuFeatureScope avx_scope(tasm(), AVX);
3568 XMMRegister dst = i.OutputSimd128Register();
3569 XMMRegister src1 = i.InputSimd128Register(0);
3570 Operand src2 = i.InputOperand(1);
3571 __ vpmaxub(kScratchDoubleReg, src1, src2);
3572 __ vpcmpeqb(dst, kScratchDoubleReg, src2);
3573 __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3574 __ vpxor(dst, dst, kScratchDoubleReg);
3575 break;
3576 }
3577 case kSSEI8x16GeU: {
3578 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3579 XMMRegister dst = i.OutputSimd128Register();
3580 Operand src = i.InputOperand(1);
3581 __ pminub(dst, src);
3582 __ pcmpeqb(dst, src);
3583 break;
3584 }
3585 case kAVXI8x16GeU: {
3586 CpuFeatureScope avx_scope(tasm(), AVX);
3587 XMMRegister src1 = i.InputSimd128Register(0);
3588 Operand src2 = i.InputOperand(1);
3589 __ vpminub(kScratchDoubleReg, src1, src2);
3590 __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
3591 break;
3592 }
3593 case kIA32I8x16RoundingAverageU: {
3594 __ Pavgb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3595 i.InputOperand(1));
3596 break;
3597 }
3598 case kIA32I8x16Abs: {
3599 __ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
3600 break;
3601 }
3602 case kIA32I8x16BitMask: {
3603 __ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0));
3604 break;
3605 }
3606 case kIA32S128Const: {
3607 XMMRegister dst = i.OutputSimd128Register();
3608 Register tmp = i.TempRegister(0);
3609 uint64_t low_qword = make_uint64(i.InputUint32(1), i.InputUint32(0));
3610 __ Move(dst, low_qword);
3611 __ Move(tmp, Immediate(i.InputUint32(2)));
3612 __ Pinsrd(dst, tmp, 2);
3613 __ Move(tmp, Immediate(i.InputUint32(3)));
3614 __ Pinsrd(dst, tmp, 3);
3615 break;
3616 }
3617 case kIA32S128Zero: {
3618 XMMRegister dst = i.OutputSimd128Register();
3619 __ Pxor(dst, dst);
3620 break;
3621 }
3622 case kIA32S128AllOnes: {
3623 XMMRegister dst = i.OutputSimd128Register();
3624 __ Pcmpeqd(dst, dst);
3625 break;
3626 }
3627 case kSSES128Not: {
3628 XMMRegister dst = i.OutputSimd128Register();
3629 DCHECK_EQ(dst, i.InputSimd128Register(0));
3630 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3631 __ pxor(dst, kScratchDoubleReg);
3632 break;
3633 }
3634 case kAVXS128Not: {
3635 CpuFeatureScope avx_scope(tasm(), AVX);
3636 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3637 __ vpxor(i.OutputSimd128Register(), kScratchDoubleReg, i.InputOperand(0));
3638 break;
3639 }
3640 case kSSES128And: {
3641 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3642 __ pand(i.OutputSimd128Register(), i.InputOperand(1));
3643 break;
3644 }
3645 case kAVXS128And: {
3646 CpuFeatureScope avx_scope(tasm(), AVX);
3647 __ vpand(i.OutputSimd128Register(), i.InputSimd128Register(0),
3648 i.InputOperand(1));
3649 break;
3650 }
3651 case kSSES128Or: {
3652 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3653 __ por(i.OutputSimd128Register(), i.InputOperand(1));
3654 break;
3655 }
3656 case kAVXS128Or: {
3657 CpuFeatureScope avx_scope(tasm(), AVX);
3658 __ vpor(i.OutputSimd128Register(), i.InputSimd128Register(0),
3659 i.InputOperand(1));
3660 break;
3661 }
3662 case kSSES128Xor: {
3663 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3664 __ pxor(i.OutputSimd128Register(), i.InputOperand(1));
3665 break;
3666 }
3667 case kAVXS128Xor: {
3668 CpuFeatureScope avx_scope(tasm(), AVX);
3669 __ vpxor(i.OutputSimd128Register(), i.InputSimd128Register(0),
3670 i.InputOperand(1));
3671 break;
3672 }
3673 case kSSES128Select: {
3674 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3675 // Mask used here is stored in dst.
3676 XMMRegister dst = i.OutputSimd128Register();
3677 __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
3678 __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
3679 __ andps(dst, kScratchDoubleReg);
3680 __ xorps(dst, i.InputSimd128Register(2));
3681 break;
3682 }
3683 case kAVXS128Select: {
3684 CpuFeatureScope avx_scope(tasm(), AVX);
3685 XMMRegister dst = i.OutputSimd128Register();
3686 __ vxorps(kScratchDoubleReg, i.InputSimd128Register(2),
3687 i.InputOperand(1));
3688 __ vandps(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(0));
3689 __ vxorps(dst, kScratchDoubleReg, i.InputSimd128Register(2));
3690 break;
3691 }
3692 case kIA32S128AndNot: {
3693 XMMRegister dst = i.OutputSimd128Register();
3694 DCHECK_EQ(dst, i.InputSimd128Register(0));
3695 // The inputs have been inverted by instruction selector, so we can call
3696 // andnps here without any modifications.
3697 XMMRegister src1 = i.InputSimd128Register(1);
3698 __ Andnps(dst, src1);
3699 break;
3700 }
3701 case kIA32I8x16Swizzle: {
3702 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3703 XMMRegister dst = i.OutputSimd128Register();
3704 XMMRegister mask = i.TempSimd128Register(0);
3705
3706 // Out-of-range indices should return 0, add 112 so that any value > 15
3707 // saturates to 128 (top bit set), so pshufb will zero that lane.
3708 __ Move(mask, uint32_t{0x70707070});
3709 __ Pshufd(mask, mask, 0x0);
3710 __ Paddusb(mask, i.InputSimd128Register(1));
3711 __ Pshufb(dst, mask);
3712 break;
3713 }
3714 case kIA32I8x16Shuffle: {
3715 XMMRegister dst = i.OutputSimd128Register();
3716 Operand src0 = i.InputOperand(0);
3717 Register tmp = i.TempRegister(0);
3718 // Prepare 16 byte aligned buffer for shuffle control mask
3719 __ mov(tmp, esp);
3720 __ and_(esp, -16);
3721 if (instr->InputCount() == 5) { // only one input operand
3722 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3723 for (int j = 4; j > 0; j--) {
3724 uint32_t mask = i.InputUint32(j);
3725 __ push(Immediate(mask));
3726 }
3727 __ Pshufb(dst, Operand(esp, 0));
3728 } else { // two input operands
3729 DCHECK_EQ(6, instr->InputCount());
3730 __ movups(kScratchDoubleReg, src0);
3731 for (int j = 5; j > 1; j--) {
3732 uint32_t lanes = i.InputUint32(j);
3733 uint32_t mask = 0;
3734 for (int k = 0; k < 32; k += 8) {
3735 uint8_t lane = lanes >> k;
3736 mask |= (lane < kSimd128Size ? lane : 0x80) << k;
3737 }
3738 __ push(Immediate(mask));
3739 }
3740 __ Pshufb(kScratchDoubleReg, Operand(esp, 0));
3741 Operand src1 = i.InputOperand(1);
3742 if (!src1.is_reg(dst)) __ movups(dst, src1);
3743 for (int j = 5; j > 1; j--) {
3744 uint32_t lanes = i.InputUint32(j);
3745 uint32_t mask = 0;
3746 for (int k = 0; k < 32; k += 8) {
3747 uint8_t lane = lanes >> k;
3748 mask |= (lane >= kSimd128Size ? (lane & 0xF) : 0x80) << k;
3749 }
3750 __ push(Immediate(mask));
3751 }
3752 __ Pshufb(dst, Operand(esp, 0));
3753 __ por(dst, kScratchDoubleReg);
3754 }
3755 __ mov(esp, tmp);
3756 break;
3757 }
3758 case kIA32S128Load8Splat: {
3759 __ Pinsrb(i.OutputSimd128Register(), i.MemoryOperand(), 0);
3760 __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
3761 __ Pshufb(i.OutputSimd128Register(), kScratchDoubleReg);
3762 break;
3763 }
3764 case kIA32S128Load16Splat: {
3765 __ Pinsrw(i.OutputSimd128Register(), i.MemoryOperand(), 0);
3766 __ Pshuflw(i.OutputSimd128Register(), i.OutputSimd128Register(),
3767 uint8_t{0});
3768 __ Punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register());
3769 break;
3770 }
3771 case kIA32S128Load32Splat: {
3772 __ Vbroadcastss(i.OutputSimd128Register(), i.MemoryOperand());
3773 break;
3774 }
3775 case kIA32S128Load64Splat: {
3776 __ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
3777 break;
3778 }
3779 case kIA32S128Load8x8S: {
3780 __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
3781 break;
3782 }
3783 case kIA32S128Load8x8U: {
3784 __ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
3785 break;
3786 }
3787 case kIA32S128Load16x4S: {
3788 __ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
3789 break;
3790 }
3791 case kIA32S128Load16x4U: {
3792 __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
3793 break;
3794 }
3795 case kIA32S128Load32x2S: {
3796 __ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
3797 break;
3798 }
3799 case kIA32S128Load32x2U: {
3800 __ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
3801 break;
3802 }
3803 case kIA32S32x4Swizzle: {
3804 DCHECK_EQ(2, instr->InputCount());
3805 __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));
3806 break;
3807 }
3808 case kIA32S32x4Shuffle: {
3809 DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
3810 int8_t shuffle = i.InputInt8(2);
3811 DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
3812 __ Pshufd(kScratchDoubleReg, i.InputOperand(1), shuffle);
3813 __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), shuffle);
3814 __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
3815 break;
3816 }
3817 case kIA32S16x8Blend:
3818 ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
3819 break;
3820 case kIA32S16x8HalfShuffle1: {
3821 XMMRegister dst = i.OutputSimd128Register();
3822 __ Pshuflw(dst, i.InputOperand(0), i.InputInt8(1));
3823 __ Pshufhw(dst, dst, i.InputInt8(2));
3824 break;
3825 }
3826 case kIA32S16x8HalfShuffle2: {
3827 XMMRegister dst = i.OutputSimd128Register();
3828 __ Pshuflw(kScratchDoubleReg, i.InputOperand(1), i.InputInt8(2));
3829 __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
3830 __ Pshuflw(dst, i.InputOperand(0), i.InputInt8(2));
3831 __ Pshufhw(dst, dst, i.InputInt8(3));
3832 __ Pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
3833 break;
3834 }
3835 case kIA32S8x16Alignr:
3836 ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
3837 break;
3838 case kIA32S16x8Dup: {
3839 XMMRegister dst = i.OutputSimd128Register();
3840 Operand src = i.InputOperand(0);
3841 int8_t lane = i.InputInt8(1) & 0x7;
3842 int8_t lane4 = lane & 0x3;
3843 int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3844 if (lane < 4) {
3845 __ Pshuflw(dst, src, half_dup);
3846 __ Pshufd(dst, dst, 0);
3847 } else {
3848 __ Pshufhw(dst, src, half_dup);
3849 __ Pshufd(dst, dst, 0xaa);
3850 }
3851 break;
3852 }
3853 case kIA32S8x16Dup: {
3854 XMMRegister dst = i.OutputSimd128Register();
3855 XMMRegister src = i.InputSimd128Register(0);
3856 int8_t lane = i.InputInt8(1) & 0xf;
3857 if (CpuFeatures::IsSupported(AVX)) {
3858 CpuFeatureScope avx_scope(tasm(), AVX);
3859 if (lane < 8) {
3860 __ vpunpcklbw(dst, src, src);
3861 } else {
3862 __ vpunpckhbw(dst, src, src);
3863 }
3864 } else {
3865 DCHECK_EQ(dst, src);
3866 if (lane < 8) {
3867 __ punpcklbw(dst, dst);
3868 } else {
3869 __ punpckhbw(dst, dst);
3870 }
3871 }
3872 lane &= 0x7;
3873 int8_t lane4 = lane & 0x3;
3874 int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3875 if (lane < 4) {
3876 __ Pshuflw(dst, dst, half_dup);
3877 __ Pshufd(dst, dst, 0);
3878 } else {
3879 __ Pshufhw(dst, dst, half_dup);
3880 __ Pshufd(dst, dst, 0xaa);
3881 }
3882 break;
3883 }
3884 case kIA32S64x2UnpackHigh:
3885 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
3886 break;
3887 case kIA32S32x4UnpackHigh:
3888 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
3889 break;
3890 case kIA32S16x8UnpackHigh:
3891 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
3892 break;
3893 case kIA32S8x16UnpackHigh:
3894 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
3895 break;
3896 case kIA32S64x2UnpackLow:
3897 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
3898 break;
3899 case kIA32S32x4UnpackLow:
3900 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
3901 break;
3902 case kIA32S16x8UnpackLow:
3903 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
3904 break;
3905 case kIA32S8x16UnpackLow:
3906 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
3907 break;
3908 case kSSES16x8UnzipHigh: {
3909 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3910 XMMRegister dst = i.OutputSimd128Register();
3911 XMMRegister src2 = dst;
3912 DCHECK_EQ(dst, i.InputSimd128Register(0));
3913 if (instr->InputCount() == 2) {
3914 __ movups(kScratchDoubleReg, i.InputOperand(1));
3915 __ psrld(kScratchDoubleReg, 16);
3916 src2 = kScratchDoubleReg;
3917 }
3918 __ psrld(dst, 16);
3919 __ packusdw(dst, src2);
3920 break;
3921 }
3922 case kAVXS16x8UnzipHigh: {
3923 CpuFeatureScope avx_scope(tasm(), AVX);
3924 XMMRegister dst = i.OutputSimd128Register();
3925 XMMRegister src2 = dst;
3926 if (instr->InputCount() == 2) {
3927 __ vpsrld(kScratchDoubleReg, i.InputSimd128Register(1), 16);
3928 src2 = kScratchDoubleReg;
3929 }
3930 __ vpsrld(dst, i.InputSimd128Register(0), 16);
3931 __ vpackusdw(dst, dst, src2);
3932 break;
3933 }
3934 case kSSES16x8UnzipLow: {
3935 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3936 XMMRegister dst = i.OutputSimd128Register();
3937 XMMRegister src2 = dst;
3938 DCHECK_EQ(dst, i.InputSimd128Register(0));
3939 __ pxor(kScratchDoubleReg, kScratchDoubleReg);
3940 if (instr->InputCount() == 2) {
3941 __ pblendw(kScratchDoubleReg, i.InputOperand(1), 0x55);
3942 src2 = kScratchDoubleReg;
3943 }
3944 __ pblendw(dst, kScratchDoubleReg, 0xaa);
3945 __ packusdw(dst, src2);
3946 break;
3947 }
3948 case kAVXS16x8UnzipLow: {
3949 CpuFeatureScope avx_scope(tasm(), AVX);
3950 XMMRegister dst = i.OutputSimd128Register();
3951 XMMRegister src2 = dst;
3952 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3953 if (instr->InputCount() == 2) {
3954 __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1),
3955 0x55);
3956 src2 = kScratchDoubleReg;
3957 }
3958 __ vpblendw(dst, kScratchDoubleReg, i.InputSimd128Register(0), 0x55);
3959 __ vpackusdw(dst, dst, src2);
3960 break;
3961 }
3962 case kSSES8x16UnzipHigh: {
3963 XMMRegister dst = i.OutputSimd128Register();
3964 XMMRegister src2 = dst;
3965 DCHECK_EQ(dst, i.InputSimd128Register(0));
3966 if (instr->InputCount() == 2) {
3967 __ movups(kScratchDoubleReg, i.InputOperand(1));
3968 __ psrlw(kScratchDoubleReg, 8);
3969 src2 = kScratchDoubleReg;
3970 }
3971 __ psrlw(dst, 8);
3972 __ packuswb(dst, src2);
3973 break;
3974 }
3975 case kAVXS8x16UnzipHigh: {
3976 CpuFeatureScope avx_scope(tasm(), AVX);
3977 XMMRegister dst = i.OutputSimd128Register();
3978 XMMRegister src2 = dst;
3979 if (instr->InputCount() == 2) {
3980 __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3981 src2 = kScratchDoubleReg;
3982 }
3983 __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3984 __ vpackuswb(dst, dst, src2);
3985 break;
3986 }
3987 case kSSES8x16UnzipLow: {
3988 XMMRegister dst = i.OutputSimd128Register();
3989 XMMRegister src2 = dst;
3990 DCHECK_EQ(dst, i.InputSimd128Register(0));
3991 if (instr->InputCount() == 2) {
3992 __ movups(kScratchDoubleReg, i.InputOperand(1));
3993 __ psllw(kScratchDoubleReg, 8);
3994 __ psrlw(kScratchDoubleReg, 8);
3995 src2 = kScratchDoubleReg;
3996 }
3997 __ psllw(dst, 8);
3998 __ psrlw(dst, 8);
3999 __ packuswb(dst, src2);
4000 break;
4001 }
4002 case kAVXS8x16UnzipLow: {
4003 CpuFeatureScope avx_scope(tasm(), AVX);
4004 XMMRegister dst = i.OutputSimd128Register();
4005 XMMRegister src2 = dst;
4006 if (instr->InputCount() == 2) {
4007 __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
4008 __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8);
4009 src2 = kScratchDoubleReg;
4010 }
4011 __ vpsllw(dst, i.InputSimd128Register(0), 8);
4012 __ vpsrlw(dst, dst, 8);
4013 __ vpackuswb(dst, dst, src2);
4014 break;
4015 }
4016 case kSSES8x16TransposeLow: {
4017 XMMRegister dst = i.OutputSimd128Register();
4018 DCHECK_EQ(dst, i.InputSimd128Register(0));
4019 __ psllw(dst, 8);
4020 if (instr->InputCount() == 1) {
4021 __ movups(kScratchDoubleReg, dst);
4022 } else {
4023 DCHECK_EQ(2, instr->InputCount());
4024 __ movups(kScratchDoubleReg, i.InputOperand(1));
4025 __ psllw(kScratchDoubleReg, 8);
4026 }
4027 __ psrlw(dst, 8);
4028 __ por(dst, kScratchDoubleReg);
4029 break;
4030 }
4031 case kAVXS8x16TransposeLow: {
4032 CpuFeatureScope avx_scope(tasm(), AVX);
4033 XMMRegister dst = i.OutputSimd128Register();
4034 if (instr->InputCount() == 1) {
4035 __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(0), 8);
4036 __ vpsrlw(dst, kScratchDoubleReg, 8);
4037 } else {
4038 DCHECK_EQ(2, instr->InputCount());
4039 __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
4040 __ vpsllw(dst, i.InputSimd128Register(0), 8);
4041 __ vpsrlw(dst, dst, 8);
4042 }
4043 __ vpor(dst, dst, kScratchDoubleReg);
4044 break;
4045 }
4046 case kSSES8x16TransposeHigh: {
4047 XMMRegister dst = i.OutputSimd128Register();
4048 DCHECK_EQ(dst, i.InputSimd128Register(0));
4049 __ psrlw(dst, 8);
4050 if (instr->InputCount() == 1) {
4051 __ movups(kScratchDoubleReg, dst);
4052 } else {
4053 DCHECK_EQ(2, instr->InputCount());
4054 __ movups(kScratchDoubleReg, i.InputOperand(1));
4055 __ psrlw(kScratchDoubleReg, 8);
4056 }
4057 __ psllw(kScratchDoubleReg, 8);
4058 __ por(dst, kScratchDoubleReg);
4059 break;
4060 }
4061 case kAVXS8x16TransposeHigh: {
4062 CpuFeatureScope avx_scope(tasm(), AVX);
4063 XMMRegister dst = i.OutputSimd128Register();
4064 if (instr->InputCount() == 1) {
4065 __ vpsrlw(dst, i.InputSimd128Register(0), 8);
4066 __ vpsllw(kScratchDoubleReg, dst, 8);
4067 } else {
4068 DCHECK_EQ(2, instr->InputCount());
4069 __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
4070 __ vpsrlw(dst, i.InputSimd128Register(0), 8);
4071 __ vpsllw(kScratchDoubleReg, kScratchDoubleReg, 8);
4072 }
4073 __ vpor(dst, dst, kScratchDoubleReg);
4074 break;
4075 }
4076 case kSSES8x8Reverse:
4077 case kSSES8x4Reverse:
4078 case kSSES8x2Reverse: {
4079 DCHECK_EQ(1, instr->InputCount());
4080 XMMRegister dst = i.OutputSimd128Register();
4081 DCHECK_EQ(dst, i.InputSimd128Register(0));
4082 if (arch_opcode != kSSES8x2Reverse) {
4083 // First shuffle words into position.
4084 int8_t shuffle_mask = arch_opcode == kSSES8x4Reverse ? 0xB1 : 0x1B;
4085 __ pshuflw(dst, dst, shuffle_mask);
4086 __ pshufhw(dst, dst, shuffle_mask);
4087 }
4088 __ movaps(kScratchDoubleReg, dst);
4089 __ psrlw(kScratchDoubleReg, 8);
4090 __ psllw(dst, 8);
4091 __ por(dst, kScratchDoubleReg);
4092 break;
4093 }
4094 case kAVXS8x2Reverse:
4095 case kAVXS8x4Reverse:
4096 case kAVXS8x8Reverse: {
4097 DCHECK_EQ(1, instr->InputCount());
4098 CpuFeatureScope avx_scope(tasm(), AVX);
4099 XMMRegister dst = i.OutputSimd128Register();
4100 XMMRegister src = dst;
4101 if (arch_opcode != kAVXS8x2Reverse) {
4102 // First shuffle words into position.
4103 int8_t shuffle_mask = arch_opcode == kAVXS8x4Reverse ? 0xB1 : 0x1B;
4104 __ vpshuflw(dst, i.InputOperand(0), shuffle_mask);
4105 __ vpshufhw(dst, dst, shuffle_mask);
4106 } else {
4107 src = i.InputSimd128Register(0);
4108 }
4109 // Reverse each 16 bit lane.
4110 __ vpsrlw(kScratchDoubleReg, src, 8);
4111 __ vpsllw(dst, src, 8);
4112 __ vpor(dst, dst, kScratchDoubleReg);
4113 break;
4114 }
4115 case kIA32V32x4AnyTrue:
4116 case kIA32V16x8AnyTrue:
4117 case kIA32V8x16AnyTrue: {
4118 Register dst = i.OutputRegister();
4119 XMMRegister src = i.InputSimd128Register(0);
4120 Register tmp = i.TempRegister(0);
4121 __ xor_(tmp, tmp);
4122 __ mov(dst, Immediate(1));
4123 __ Ptest(src, src);
4124 __ cmov(zero, dst, tmp);
4125 break;
4126 }
4127 // Need to split up all the different lane structures because the
4128 // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
4129 // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
4130 // respectively.
4131 case kIA32V32x4AllTrue:
4132 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
4133 break;
4134 case kIA32V16x8AllTrue:
4135 ASSEMBLE_SIMD_ALL_TRUE(pcmpeqw);
4136 break;
4137 case kIA32V8x16AllTrue: {
4138 ASSEMBLE_SIMD_ALL_TRUE(pcmpeqb);
4139 break;
4140 }
4141 case kIA32Word32AtomicPairLoad: {
4142 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
4143 __ movq(tmp, i.MemoryOperand());
4144 __ Pextrd(i.OutputRegister(0), tmp, 0);
4145 __ Pextrd(i.OutputRegister(1), tmp, 1);
4146 break;
4147 }
4148 case kIA32Word32AtomicPairStore: {
4149 Label store;
4150 __ bind(&store);
4151 __ mov(i.TempRegister(0), i.MemoryOperand(2));
4152 __ mov(i.TempRegister(1), i.NextMemoryOperand(2));
4153 __ push(ebx);
4154 frame_access_state()->IncreaseSPDelta(1);
4155 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
4156 __ lock();
4157 __ cmpxchg8b(i.MemoryOperand(2));
4158 __ pop(ebx);
4159 frame_access_state()->IncreaseSPDelta(-1);
4160 __ j(not_equal, &store);
4161 break;
4162 }
4163 case kWord32AtomicExchangeInt8: {
4164 __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
4165 __ movsx_b(i.InputRegister(0), i.InputRegister(0));
4166 break;
4167 }
4168 case kWord32AtomicExchangeUint8: {
4169 __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
4170 __ movzx_b(i.InputRegister(0), i.InputRegister(0));
4171 break;
4172 }
4173 case kWord32AtomicExchangeInt16: {
4174 __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
4175 __ movsx_w(i.InputRegister(0), i.InputRegister(0));
4176 break;
4177 }
4178 case kWord32AtomicExchangeUint16: {
4179 __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
4180 __ movzx_w(i.InputRegister(0), i.InputRegister(0));
4181 break;
4182 }
4183 case kWord32AtomicExchangeWord32: {
4184 __ xchg(i.InputRegister(0), i.MemoryOperand(1));
4185 break;
4186 }
4187 case kIA32Word32AtomicPairExchange: {
4188 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr));
4189 Label exchange;
4190 __ bind(&exchange);
4191 __ mov(eax, i.MemoryOperand(2));
4192 __ mov(edx, i.NextMemoryOperand(2));
4193 __ push(ebx);
4194 frame_access_state()->IncreaseSPDelta(1);
4195 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
4196 __ lock();
4197 __ cmpxchg8b(i.MemoryOperand(2));
4198 __ pop(ebx);
4199 frame_access_state()->IncreaseSPDelta(-1);
4200 __ j(not_equal, &exchange);
4201 break;
4202 }
4203 case kWord32AtomicCompareExchangeInt8: {
4204 __ lock();
4205 __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
4206 __ movsx_b(eax, eax);
4207 break;
4208 }
4209 case kWord32AtomicCompareExchangeUint8: {
4210 __ lock();
4211 __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
4212 __ movzx_b(eax, eax);
4213 break;
4214 }
4215 case kWord32AtomicCompareExchangeInt16: {
4216 __ lock();
4217 __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
4218 __ movsx_w(eax, eax);
4219 break;
4220 }
4221 case kWord32AtomicCompareExchangeUint16: {
4222 __ lock();
4223 __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
4224 __ movzx_w(eax, eax);
4225 break;
4226 }
4227 case kWord32AtomicCompareExchangeWord32: {
4228 __ lock();
4229 __ cmpxchg(i.MemoryOperand(2), i.InputRegister(1));
4230 break;
4231 }
4232 case kIA32Word32AtomicPairCompareExchange: {
4233 __ push(ebx);
4234 frame_access_state()->IncreaseSPDelta(1);
4235 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(2));
4236 __ lock();
4237 __ cmpxchg8b(i.MemoryOperand(4));
4238 __ pop(ebx);
4239 frame_access_state()->IncreaseSPDelta(-1);
4240 break;
4241 }
4242 #define ATOMIC_BINOP_CASE(op, inst) \
4243 case kWord32Atomic##op##Int8: { \
4244 ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
4245 __ movsx_b(eax, eax); \
4246 break; \
4247 } \
4248 case kWord32Atomic##op##Uint8: { \
4249 ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
4250 __ movzx_b(eax, eax); \
4251 break; \
4252 } \
4253 case kWord32Atomic##op##Int16: { \
4254 ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
4255 __ movsx_w(eax, eax); \
4256 break; \
4257 } \
4258 case kWord32Atomic##op##Uint16: { \
4259 ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
4260 __ movzx_w(eax, eax); \
4261 break; \
4262 } \
4263 case kWord32Atomic##op##Word32: { \
4264 ASSEMBLE_ATOMIC_BINOP(inst, mov, cmpxchg); \
4265 break; \
4266 }
4267 ATOMIC_BINOP_CASE(Add, add)
4268 ATOMIC_BINOP_CASE(Sub, sub)
4269 ATOMIC_BINOP_CASE(And, and_)
4270 ATOMIC_BINOP_CASE(Or, or_)
4271 ATOMIC_BINOP_CASE(Xor, xor_)
4272 #undef ATOMIC_BINOP_CASE
4273 #define ATOMIC_BINOP_CASE(op, instr1, instr2) \
4274 case kIA32Word32AtomicPair##op: { \
4275 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr)); \
4276 ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \
4277 break; \
4278 }
4279 ATOMIC_BINOP_CASE(Add, add, adc)
4280 ATOMIC_BINOP_CASE(And, and_, and_)
4281 ATOMIC_BINOP_CASE(Or, or_, or_)
4282 ATOMIC_BINOP_CASE(Xor, xor_, xor_)
4283 #undef ATOMIC_BINOP_CASE
4284 case kIA32Word32AtomicPairSub: {
4285 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr));
4286 Label binop;
4287 __ bind(&binop);
4288 // Move memory operand into edx:eax
4289 __ mov(eax, i.MemoryOperand(2));
4290 __ mov(edx, i.NextMemoryOperand(2));
4291 // Save input registers temporarily on the stack.
4292 __ push(ebx);
4293 frame_access_state()->IncreaseSPDelta(1);
4294 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
4295 __ push(i.InputRegister(1));
4296 // Negate input in place
4297 __ neg(ebx);
4298 __ adc(i.InputRegister(1), 0);
4299 __ neg(i.InputRegister(1));
4300 // Add memory operand, negated input.
4301 __ add(ebx, eax);
4302 __ adc(i.InputRegister(1), edx);
4303 __ lock();
4304 __ cmpxchg8b(i.MemoryOperand(2));
4305 // Restore input registers
4306 __ pop(i.InputRegister(1));
4307 __ pop(ebx);
4308 frame_access_state()->IncreaseSPDelta(-1);
4309 __ j(not_equal, &binop);
4310 break;
4311 }
4312 case kWord32AtomicLoadInt8:
4313 case kWord32AtomicLoadUint8:
4314 case kWord32AtomicLoadInt16:
4315 case kWord32AtomicLoadUint16:
4316 case kWord32AtomicLoadWord32:
4317 case kWord32AtomicStoreWord8:
4318 case kWord32AtomicStoreWord16:
4319 case kWord32AtomicStoreWord32:
4320 UNREACHABLE(); // Won't be generated by instruction selector.
4321 break;
4322 }
4323 return kSuccess;
4324 } // NOLINT(readability/fn_size)
4325
FlagsConditionToCondition(FlagsCondition condition)4326 static Condition FlagsConditionToCondition(FlagsCondition condition) {
4327 switch (condition) {
4328 case kUnorderedEqual:
4329 case kEqual:
4330 return equal;
4331 break;
4332 case kUnorderedNotEqual:
4333 case kNotEqual:
4334 return not_equal;
4335 break;
4336 case kSignedLessThan:
4337 return less;
4338 break;
4339 case kSignedGreaterThanOrEqual:
4340 return greater_equal;
4341 break;
4342 case kSignedLessThanOrEqual:
4343 return less_equal;
4344 break;
4345 case kSignedGreaterThan:
4346 return greater;
4347 break;
4348 case kUnsignedLessThan:
4349 return below;
4350 break;
4351 case kUnsignedGreaterThanOrEqual:
4352 return above_equal;
4353 break;
4354 case kUnsignedLessThanOrEqual:
4355 return below_equal;
4356 break;
4357 case kUnsignedGreaterThan:
4358 return above;
4359 break;
4360 case kOverflow:
4361 return overflow;
4362 break;
4363 case kNotOverflow:
4364 return no_overflow;
4365 break;
4366 default:
4367 UNREACHABLE();
4368 }
4369 }
4370
4371 // Assembles a branch after an instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)4372 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
4373 Label::Distance flabel_distance =
4374 branch->fallthru ? Label::kNear : Label::kFar;
4375 Label* tlabel = branch->true_label;
4376 Label* flabel = branch->false_label;
4377 if (branch->condition == kUnorderedEqual) {
4378 __ j(parity_even, flabel, flabel_distance);
4379 } else if (branch->condition == kUnorderedNotEqual) {
4380 __ j(parity_even, tlabel);
4381 }
4382 __ j(FlagsConditionToCondition(branch->condition), tlabel);
4383
4384 // Add a jump if not falling through to the next block.
4385 if (!branch->fallthru) __ jmp(flabel);
4386 }
4387
AssembleBranchPoisoning(FlagsCondition condition,Instruction * instr)4388 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
4389 Instruction* instr) {
4390 // TODO(860429): Remove remaining poisoning infrastructure on ia32.
4391 UNREACHABLE();
4392 }
4393
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)4394 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
4395 BranchInfo* branch) {
4396 AssembleArchBranch(instr, branch);
4397 }
4398
AssembleArchJump(RpoNumber target)4399 void CodeGenerator::AssembleArchJump(RpoNumber target) {
4400 if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
4401 }
4402
AssembleArchTrap(Instruction * instr,FlagsCondition condition)4403 void CodeGenerator::AssembleArchTrap(Instruction* instr,
4404 FlagsCondition condition) {
4405 class OutOfLineTrap final : public OutOfLineCode {
4406 public:
4407 OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
4408 : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
4409
4410 void Generate() final {
4411 IA32OperandConverter i(gen_, instr_);
4412 TrapId trap_id =
4413 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
4414 GenerateCallToTrap(trap_id);
4415 }
4416
4417 private:
4418 void GenerateCallToTrap(TrapId trap_id) {
4419 if (trap_id == TrapId::kInvalid) {
4420 // We cannot test calls to the runtime in cctest/test-run-wasm.
4421 // Therefore we emit a call to C here instead of a call to the runtime.
4422 __ PrepareCallCFunction(0, esi);
4423 __ CallCFunction(
4424 ExternalReference::wasm_call_trap_callback_for_testing(), 0);
4425 __ LeaveFrame(StackFrame::WASM);
4426 auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
4427 size_t pop_size =
4428 call_descriptor->StackParameterCount() * kSystemPointerSize;
4429 // Use ecx as a scratch register, we return anyways immediately.
4430 __ Ret(static_cast<int>(pop_size), ecx);
4431 } else {
4432 gen_->AssembleSourcePosition(instr_);
4433 // A direct call to a wasm runtime stub defined in this module.
4434 // Just encode the stub index. This will be patched when the code
4435 // is added to the native module and copied into wasm code space.
4436 __ wasm_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
4437 ReferenceMap* reference_map =
4438 gen_->zone()->New<ReferenceMap>(gen_->zone());
4439 gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
4440 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4441 }
4442 }
4443
4444 Instruction* instr_;
4445 CodeGenerator* gen_;
4446 };
4447 auto ool = zone()->New<OutOfLineTrap>(this, instr);
4448 Label* tlabel = ool->entry();
4449 Label end;
4450 if (condition == kUnorderedEqual) {
4451 __ j(parity_even, &end, Label::kNear);
4452 } else if (condition == kUnorderedNotEqual) {
4453 __ j(parity_even, tlabel);
4454 }
4455 __ j(FlagsConditionToCondition(condition), tlabel);
4456 __ bind(&end);
4457 }
4458
4459 // Assembles boolean materializations after an instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)4460 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
4461 FlagsCondition condition) {
4462 IA32OperandConverter i(this, instr);
4463 Label done;
4464
4465 // Materialize a full 32-bit 1 or 0 value. The result register is always the
4466 // last output of the instruction.
4467 Label check;
4468 DCHECK_NE(0u, instr->OutputCount());
4469 Register reg = i.OutputRegister(instr->OutputCount() - 1);
4470 if (condition == kUnorderedEqual) {
4471 __ j(parity_odd, &check, Label::kNear);
4472 __ Move(reg, Immediate(0));
4473 __ jmp(&done, Label::kNear);
4474 } else if (condition == kUnorderedNotEqual) {
4475 __ j(parity_odd, &check, Label::kNear);
4476 __ mov(reg, Immediate(1));
4477 __ jmp(&done, Label::kNear);
4478 }
4479 Condition cc = FlagsConditionToCondition(condition);
4480
4481 __ bind(&check);
4482 if (reg.is_byte_register()) {
4483 // setcc for byte registers (al, bl, cl, dl).
4484 __ setcc(cc, reg);
4485 __ movzx_b(reg, reg);
4486 } else {
4487 // Emit a branch to set a register to either 1 or 0.
4488 Label set;
4489 __ j(cc, &set, Label::kNear);
4490 __ Move(reg, Immediate(0));
4491 __ jmp(&done, Label::kNear);
4492 __ bind(&set);
4493 __ mov(reg, Immediate(1));
4494 }
4495 __ bind(&done);
4496 }
4497
AssembleArchBinarySearchSwitch(Instruction * instr)4498 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
4499 IA32OperandConverter i(this, instr);
4500 Register input = i.InputRegister(0);
4501 std::vector<std::pair<int32_t, Label*>> cases;
4502 for (size_t index = 2; index < instr->InputCount(); index += 2) {
4503 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
4504 }
4505 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
4506 cases.data() + cases.size());
4507 }
4508
AssembleArchTableSwitch(Instruction * instr)4509 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
4510 IA32OperandConverter i(this, instr);
4511 Register input = i.InputRegister(0);
4512 size_t const case_count = instr->InputCount() - 2;
4513 Label** cases = zone()->NewArray<Label*>(case_count);
4514 for (size_t index = 0; index < case_count; ++index) {
4515 cases[index] = GetLabel(i.InputRpo(index + 2));
4516 }
4517 Label* const table = AddJumpTable(cases, case_count);
4518 __ cmp(input, Immediate(case_count));
4519 __ j(above_equal, GetLabel(i.InputRpo(1)));
4520 __ jmp(Operand::JumpTable(input, times_system_pointer_size, table));
4521 }
4522
4523 // The calling convention for JSFunctions on IA32 passes arguments on the
4524 // stack and the JSFunction and context in EDI and ESI, respectively, thus
4525 // the steps of the call look as follows:
4526
4527 // --{ before the call instruction }--------------------------------------------
4528 // | caller frame |
4529 // ^ esp ^ ebp
4530
4531 // --{ push arguments and setup ESI, EDI }--------------------------------------
4532 // | args + receiver | caller frame |
4533 // ^ esp ^ ebp
4534 // [edi = JSFunction, esi = context]
4535
4536 // --{ call [edi + kCodeEntryOffset] }------------------------------------------
4537 // | RET | args + receiver | caller frame |
4538 // ^ esp ^ ebp
4539
4540 // =={ prologue of called function }============================================
4541 // --{ push ebp }---------------------------------------------------------------
4542 // | FP | RET | args + receiver | caller frame |
4543 // ^ esp ^ ebp
4544
4545 // --{ mov ebp, esp }-----------------------------------------------------------
4546 // | FP | RET | args + receiver | caller frame |
4547 // ^ ebp,esp
4548
4549 // --{ push esi }---------------------------------------------------------------
4550 // | CTX | FP | RET | args + receiver | caller frame |
4551 // ^esp ^ ebp
4552
4553 // --{ push edi }---------------------------------------------------------------
4554 // | FNC | CTX | FP | RET | args + receiver | caller frame |
4555 // ^esp ^ ebp
4556
4557 // --{ subi esp, #N }-----------------------------------------------------------
4558 // | callee frame | FNC | CTX | FP | RET | args + receiver | caller frame |
4559 // ^esp ^ ebp
4560
4561 // =={ body of called function }================================================
4562
4563 // =={ epilogue of called function }============================================
4564 // --{ mov esp, ebp }-----------------------------------------------------------
4565 // | FP | RET | args + receiver | caller frame |
4566 // ^ esp,ebp
4567
4568 // --{ pop ebp }-----------------------------------------------------------
4569 // | | RET | args + receiver | caller frame |
4570 // ^ esp ^ ebp
4571
4572 // --{ ret #A+1 }-----------------------------------------------------------
4573 // | | caller frame |
4574 // ^ esp ^ ebp
4575
4576 // Runtime function calls are accomplished by doing a stub call to the
4577 // CEntry (a real code object). On IA32 passes arguments on the
4578 // stack, the number of arguments in EAX, the address of the runtime function
4579 // in EBX, and the context in ESI.
4580
4581 // --{ before the call instruction }--------------------------------------------
4582 // | caller frame |
4583 // ^ esp ^ ebp
4584
4585 // --{ push arguments and setup EAX, EBX, and ESI }-----------------------------
4586 // | args + receiver | caller frame |
4587 // ^ esp ^ ebp
4588 // [eax = #args, ebx = runtime function, esi = context]
4589
4590 // --{ call #CEntry }-----------------------------------------------------------
4591 // | RET | args + receiver | caller frame |
4592 // ^ esp ^ ebp
4593
4594 // =={ body of runtime function }===============================================
4595
4596 // --{ runtime returns }--------------------------------------------------------
4597 // | caller frame |
4598 // ^ esp ^ ebp
4599
4600 // Other custom linkages (e.g. for calling directly into and out of C++) may
4601 // need to save callee-saved registers on the stack, which is done in the
4602 // function prologue of generated code.
4603
4604 // --{ before the call instruction }--------------------------------------------
4605 // | caller frame |
4606 // ^ esp ^ ebp
4607
4608 // --{ set up arguments in registers on stack }---------------------------------
4609 // | args | caller frame |
4610 // ^ esp ^ ebp
4611 // [r0 = arg0, r1 = arg1, ...]
4612
4613 // --{ call code }--------------------------------------------------------------
4614 // | RET | args | caller frame |
4615 // ^ esp ^ ebp
4616
4617 // =={ prologue of called function }============================================
4618 // --{ push ebp }---------------------------------------------------------------
4619 // | FP | RET | args | caller frame |
4620 // ^ esp ^ ebp
4621
4622 // --{ mov ebp, esp }-----------------------------------------------------------
4623 // | FP | RET | args | caller frame |
4624 // ^ ebp,esp
4625
4626 // --{ save registers }---------------------------------------------------------
4627 // | regs | FP | RET | args | caller frame |
4628 // ^ esp ^ ebp
4629
4630 // --{ subi esp, #N }-----------------------------------------------------------
4631 // | callee frame | regs | FP | RET | args | caller frame |
4632 // ^esp ^ ebp
4633
4634 // =={ body of called function }================================================
4635
4636 // =={ epilogue of called function }============================================
4637 // --{ restore registers }------------------------------------------------------
4638 // | regs | FP | RET | args | caller frame |
4639 // ^ esp ^ ebp
4640
4641 // --{ mov esp, ebp }-----------------------------------------------------------
4642 // | FP | RET | args | caller frame |
4643 // ^ esp,ebp
4644
4645 // --{ pop ebp }----------------------------------------------------------------
4646 // | RET | args | caller frame |
4647 // ^ esp ^ ebp
4648
FinishFrame(Frame * frame)4649 void CodeGenerator::FinishFrame(Frame* frame) {
4650 auto call_descriptor = linkage()->GetIncomingDescriptor();
4651 const RegList saves = call_descriptor->CalleeSavedRegisters();
4652 if (saves != 0) { // Save callee-saved registers.
4653 DCHECK(!info()->is_osr());
4654 int pushed = 0;
4655 for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4656 if (!((1 << i) & saves)) continue;
4657 ++pushed;
4658 }
4659 frame->AllocateSavedCalleeRegisterSlots(pushed);
4660 }
4661 }
4662
AssembleConstructFrame()4663 void CodeGenerator::AssembleConstructFrame() {
4664 auto call_descriptor = linkage()->GetIncomingDescriptor();
4665 if (frame_access_state()->has_frame()) {
4666 if (call_descriptor->IsCFunctionCall()) {
4667 __ push(ebp);
4668 __ mov(ebp, esp);
4669 if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
4670 __ Push(Immediate(StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY)));
4671 // Reserve stack space for saving the c_entry_fp later.
4672 __ AllocateStackSpace(kSystemPointerSize);
4673 }
4674 } else if (call_descriptor->IsJSFunctionCall()) {
4675 __ Prologue();
4676 } else {
4677 __ StubPrologue(info()->GetOutputStackFrameType());
4678 if (call_descriptor->IsWasmFunctionCall()) {
4679 __ push(kWasmInstanceRegister);
4680 } else if (call_descriptor->IsWasmImportWrapper() ||
4681 call_descriptor->IsWasmCapiFunction()) {
4682 // Wasm import wrappers are passed a tuple in the place of the instance.
4683 // Unpack the tuple into the instance and the target callable.
4684 // This must be done here in the codegen because it cannot be expressed
4685 // properly in the graph.
4686 __ mov(kJSFunctionRegister,
4687 Operand(kWasmInstanceRegister,
4688 Tuple2::kValue2Offset - kHeapObjectTag));
4689 __ mov(kWasmInstanceRegister,
4690 Operand(kWasmInstanceRegister,
4691 Tuple2::kValue1Offset - kHeapObjectTag));
4692 __ push(kWasmInstanceRegister);
4693 if (call_descriptor->IsWasmCapiFunction()) {
4694 // Reserve space for saving the PC later.
4695 __ AllocateStackSpace(kSystemPointerSize);
4696 }
4697 }
4698 }
4699 }
4700
4701 int required_slots =
4702 frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
4703
4704 if (info()->is_osr()) {
4705 // TurboFan OSR-compiled functions cannot be entered directly.
4706 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
4707
4708 // Unoptimized code jumps directly to this entrypoint while the unoptimized
4709 // frame is still on the stack. Optimized code uses OSR values directly from
4710 // the unoptimized frame. Thus, all that needs to be done is to allocate the
4711 // remaining stack slots.
4712 if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
4713 osr_pc_offset_ = __ pc_offset();
4714 required_slots -= osr_helper()->UnoptimizedFrameSlots();
4715 }
4716
4717 const RegList saves = call_descriptor->CalleeSavedRegisters();
4718 if (required_slots > 0) {
4719 DCHECK(frame_access_state()->has_frame());
4720 if (info()->IsWasm() && required_slots > 128) {
4721 // For WebAssembly functions with big frames we have to do the stack
4722 // overflow check before we construct the frame. Otherwise we may not
4723 // have enough space on the stack to call the runtime for the stack
4724 // overflow.
4725 Label done;
4726
4727 // If the frame is bigger than the stack, we throw the stack overflow
4728 // exception unconditionally. Thereby we can avoid the integer overflow
4729 // check in the condition code.
4730 if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
4731 Register scratch = esi;
4732 __ push(scratch);
4733 __ mov(scratch,
4734 FieldOperand(kWasmInstanceRegister,
4735 WasmInstanceObject::kRealStackLimitAddressOffset));
4736 __ mov(scratch, Operand(scratch, 0));
4737 __ add(scratch, Immediate(required_slots * kSystemPointerSize));
4738 __ cmp(esp, scratch);
4739 __ pop(scratch);
4740 __ j(above_equal, &done, Label::kNear);
4741 }
4742
4743 __ wasm_call(wasm::WasmCode::kWasmStackOverflow,
4744 RelocInfo::WASM_STUB_CALL);
4745 ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
4746 RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
4747 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4748 __ bind(&done);
4749 }
4750
4751 // Skip callee-saved and return slots, which are created below.
4752 required_slots -= base::bits::CountPopulation(saves);
4753 required_slots -= frame()->GetReturnSlotCount();
4754 if (required_slots > 0) {
4755 __ AllocateStackSpace(required_slots * kSystemPointerSize);
4756 }
4757 }
4758
4759 if (saves != 0) { // Save callee-saved registers.
4760 DCHECK(!info()->is_osr());
4761 for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4762 if (((1 << i) & saves)) __ push(Register::from_code(i));
4763 }
4764 }
4765
4766 // Allocate return slots (located after callee-saved).
4767 if (frame()->GetReturnSlotCount() > 0) {
4768 __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize);
4769 }
4770 }
4771
AssembleReturn(InstructionOperand * additional_pop_count)4772 void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
4773 auto call_descriptor = linkage()->GetIncomingDescriptor();
4774
4775 const RegList saves = call_descriptor->CalleeSavedRegisters();
4776 // Restore registers.
4777 if (saves != 0) {
4778 const int returns = frame()->GetReturnSlotCount();
4779 if (returns != 0) {
4780 __ add(esp, Immediate(returns * kSystemPointerSize));
4781 }
4782 for (int i = 0; i < Register::kNumRegisters; i++) {
4783 if (!((1 << i) & saves)) continue;
4784 __ pop(Register::from_code(i));
4785 }
4786 }
4787
4788 // We might need ecx and edx for scratch.
4789 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & edx.bit());
4790 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & ecx.bit());
4791 IA32OperandConverter g(this, nullptr);
4792 int parameter_count =
4793 static_cast<int>(call_descriptor->StackParameterCount());
4794
4795 // {aditional_pop_count} is only greater than zero if {parameter_count = 0}.
4796 // Check RawMachineAssembler::PopAndReturn.
4797 if (parameter_count != 0) {
4798 if (additional_pop_count->IsImmediate()) {
4799 DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
4800 } else if (__ emit_debug_code()) {
4801 __ cmp(g.ToRegister(additional_pop_count), Immediate(0));
4802 __ Assert(equal, AbortReason::kUnexpectedAdditionalPopValue);
4803 }
4804 }
4805
4806 Register argc_reg = ecx;
4807 #ifdef V8_NO_ARGUMENTS_ADAPTOR
4808 // Functions with JS linkage have at least one parameter (the receiver).
4809 // If {parameter_count} == 0, it means it is a builtin with
4810 // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
4811 // itself.
4812 const bool drop_jsargs = frame_access_state()->has_frame() &&
4813 call_descriptor->IsJSFunctionCall() &&
4814 parameter_count != 0;
4815 #else
4816 const bool drop_jsargs = false;
4817 #endif
4818 if (call_descriptor->IsCFunctionCall()) {
4819 AssembleDeconstructFrame();
4820 } else if (frame_access_state()->has_frame()) {
4821 // Canonicalize JSFunction return sites for now if they always have the same
4822 // number of return args.
4823 if (additional_pop_count->IsImmediate() &&
4824 g.ToConstant(additional_pop_count).ToInt32() == 0) {
4825 if (return_label_.is_bound()) {
4826 __ jmp(&return_label_);
4827 return;
4828 } else {
4829 __ bind(&return_label_);
4830 }
4831 }
4832 if (drop_jsargs) {
4833 // Get the actual argument count.
4834 __ mov(argc_reg, Operand(ebp, StandardFrameConstants::kArgCOffset));
4835 }
4836 AssembleDeconstructFrame();
4837 }
4838
4839 if (drop_jsargs) {
4840 // We must pop all arguments from the stack (including the receiver). This
4841 // number of arguments is given by max(1 + argc_reg, parameter_count).
4842 int parameter_count_without_receiver =
4843 parameter_count - 1; // Exclude the receiver to simplify the
4844 // computation. We'll account for it at the end.
4845 Label mismatch_return;
4846 Register scratch_reg = edx;
4847 DCHECK_NE(argc_reg, scratch_reg);
4848 __ cmp(argc_reg, Immediate(parameter_count_without_receiver));
4849 __ j(greater, &mismatch_return, Label::kNear);
4850 __ Ret(parameter_count * kSystemPointerSize, scratch_reg);
4851 __ bind(&mismatch_return);
4852 __ PopReturnAddressTo(scratch_reg);
4853 __ lea(esp, Operand(esp, argc_reg, times_system_pointer_size,
4854 kSystemPointerSize)); // Also pop the receiver.
4855 // We use a return instead of a jump for better return address prediction.
4856 __ PushReturnAddressFrom(scratch_reg);
4857 __ Ret();
4858 } else if (additional_pop_count->IsImmediate()) {
4859 Register scratch_reg = ecx;
4860 int additional_count = g.ToConstant(additional_pop_count).ToInt32();
4861 size_t pop_size = (parameter_count + additional_count) * kSystemPointerSize;
4862 CHECK_LE(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
4863 __ Ret(static_cast<int>(pop_size), scratch_reg);
4864 } else {
4865 Register pop_reg = g.ToRegister(additional_pop_count);
4866 Register scratch_reg = pop_reg == ecx ? edx : ecx;
4867 int pop_size = static_cast<int>(parameter_count * kSystemPointerSize);
4868 __ PopReturnAddressTo(scratch_reg);
4869 __ lea(esp, Operand(esp, pop_reg, times_system_pointer_size,
4870 static_cast<int>(pop_size)));
4871 __ PushReturnAddressFrom(scratch_reg);
4872 __ Ret();
4873 }
4874 }
4875
FinishCode()4876 void CodeGenerator::FinishCode() {}
4877
PrepareForDeoptimizationExits(ZoneDeque<DeoptimizationExit * > * exits)4878 void CodeGenerator::PrepareForDeoptimizationExits(
4879 ZoneDeque<DeoptimizationExit*>* exits) {}
4880
AssembleMove(InstructionOperand * source,InstructionOperand * destination)4881 void CodeGenerator::AssembleMove(InstructionOperand* source,
4882 InstructionOperand* destination) {
4883 IA32OperandConverter g(this, nullptr);
4884 // Dispatch on the source and destination operand kinds.
4885 switch (MoveType::InferMove(source, destination)) {
4886 case MoveType::kRegisterToRegister:
4887 if (source->IsRegister()) {
4888 __ mov(g.ToRegister(destination), g.ToRegister(source));
4889 } else {
4890 DCHECK(source->IsFPRegister());
4891 __ movaps(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4892 }
4893 return;
4894 case MoveType::kRegisterToStack: {
4895 Operand dst = g.ToOperand(destination);
4896 if (source->IsRegister()) {
4897 __ mov(dst, g.ToRegister(source));
4898 } else {
4899 DCHECK(source->IsFPRegister());
4900 XMMRegister src = g.ToDoubleRegister(source);
4901 MachineRepresentation rep =
4902 LocationOperand::cast(source)->representation();
4903 if (rep == MachineRepresentation::kFloat32) {
4904 __ movss(dst, src);
4905 } else if (rep == MachineRepresentation::kFloat64) {
4906 __ movsd(dst, src);
4907 } else {
4908 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4909 __ movups(dst, src);
4910 }
4911 }
4912 return;
4913 }
4914 case MoveType::kStackToRegister: {
4915 Operand src = g.ToOperand(source);
4916 if (source->IsStackSlot()) {
4917 __ mov(g.ToRegister(destination), src);
4918 } else {
4919 DCHECK(source->IsFPStackSlot());
4920 XMMRegister dst = g.ToDoubleRegister(destination);
4921 MachineRepresentation rep =
4922 LocationOperand::cast(source)->representation();
4923 if (rep == MachineRepresentation::kFloat32) {
4924 __ movss(dst, src);
4925 } else if (rep == MachineRepresentation::kFloat64) {
4926 __ movsd(dst, src);
4927 } else {
4928 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4929 __ movups(dst, src);
4930 }
4931 }
4932 return;
4933 }
4934 case MoveType::kStackToStack: {
4935 Operand src = g.ToOperand(source);
4936 Operand dst = g.ToOperand(destination);
4937 if (source->IsStackSlot()) {
4938 __ push(src);
4939 __ pop(dst);
4940 } else {
4941 MachineRepresentation rep =
4942 LocationOperand::cast(source)->representation();
4943 if (rep == MachineRepresentation::kFloat32) {
4944 __ movss(kScratchDoubleReg, src);
4945 __ movss(dst, kScratchDoubleReg);
4946 } else if (rep == MachineRepresentation::kFloat64) {
4947 __ movsd(kScratchDoubleReg, src);
4948 __ movsd(dst, kScratchDoubleReg);
4949 } else {
4950 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4951 __ movups(kScratchDoubleReg, src);
4952 __ movups(dst, kScratchDoubleReg);
4953 }
4954 }
4955 return;
4956 }
4957 case MoveType::kConstantToRegister: {
4958 Constant src = g.ToConstant(source);
4959 if (destination->IsRegister()) {
4960 Register dst = g.ToRegister(destination);
4961 if (src.type() == Constant::kHeapObject) {
4962 __ Move(dst, src.ToHeapObject());
4963 } else {
4964 __ Move(dst, g.ToImmediate(source));
4965 }
4966 } else {
4967 DCHECK(destination->IsFPRegister());
4968 XMMRegister dst = g.ToDoubleRegister(destination);
4969 if (src.type() == Constant::kFloat32) {
4970 // TODO(turbofan): Can we do better here?
4971 __ Move(dst, src.ToFloat32AsInt());
4972 } else {
4973 DCHECK_EQ(src.type(), Constant::kFloat64);
4974 __ Move(dst, src.ToFloat64().AsUint64());
4975 }
4976 }
4977 return;
4978 }
4979 case MoveType::kConstantToStack: {
4980 Constant src = g.ToConstant(source);
4981 Operand dst = g.ToOperand(destination);
4982 if (destination->IsStackSlot()) {
4983 __ Move(dst, g.ToImmediate(source));
4984 } else {
4985 DCHECK(destination->IsFPStackSlot());
4986 if (src.type() == Constant::kFloat32) {
4987 __ Move(dst, Immediate(src.ToFloat32AsInt()));
4988 } else {
4989 DCHECK_EQ(src.type(), Constant::kFloat64);
4990 uint64_t constant_value = src.ToFloat64().AsUint64();
4991 uint32_t lower = static_cast<uint32_t>(constant_value);
4992 uint32_t upper = static_cast<uint32_t>(constant_value >> 32);
4993 Operand dst0 = dst;
4994 Operand dst1 = g.ToOperand(destination, kSystemPointerSize);
4995 __ Move(dst0, Immediate(lower));
4996 __ Move(dst1, Immediate(upper));
4997 }
4998 }
4999 return;
5000 }
5001 }
5002 UNREACHABLE();
5003 }
5004
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)5005 void CodeGenerator::AssembleSwap(InstructionOperand* source,
5006 InstructionOperand* destination) {
5007 IA32OperandConverter g(this, nullptr);
5008 // Dispatch on the source and destination operand kinds. Not all
5009 // combinations are possible.
5010 switch (MoveType::InferSwap(source, destination)) {
5011 case MoveType::kRegisterToRegister: {
5012 if (source->IsRegister()) {
5013 Register src = g.ToRegister(source);
5014 Register dst = g.ToRegister(destination);
5015 __ push(src);
5016 __ mov(src, dst);
5017 __ pop(dst);
5018 } else {
5019 DCHECK(source->IsFPRegister());
5020 XMMRegister src = g.ToDoubleRegister(source);
5021 XMMRegister dst = g.ToDoubleRegister(destination);
5022 __ movaps(kScratchDoubleReg, src);
5023 __ movaps(src, dst);
5024 __ movaps(dst, kScratchDoubleReg);
5025 }
5026 return;
5027 }
5028 case MoveType::kRegisterToStack: {
5029 if (source->IsRegister()) {
5030 Register src = g.ToRegister(source);
5031 __ push(src);
5032 frame_access_state()->IncreaseSPDelta(1);
5033 Operand dst = g.ToOperand(destination);
5034 __ mov(src, dst);
5035 frame_access_state()->IncreaseSPDelta(-1);
5036 dst = g.ToOperand(destination);
5037 __ pop(dst);
5038 } else {
5039 DCHECK(source->IsFPRegister());
5040 XMMRegister src = g.ToDoubleRegister(source);
5041 Operand dst = g.ToOperand(destination);
5042 MachineRepresentation rep =
5043 LocationOperand::cast(source)->representation();
5044 if (rep == MachineRepresentation::kFloat32) {
5045 __ movss(kScratchDoubleReg, dst);
5046 __ movss(dst, src);
5047 __ movaps(src, kScratchDoubleReg);
5048 } else if (rep == MachineRepresentation::kFloat64) {
5049 __ movsd(kScratchDoubleReg, dst);
5050 __ movsd(dst, src);
5051 __ movaps(src, kScratchDoubleReg);
5052 } else {
5053 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
5054 __ movups(kScratchDoubleReg, dst);
5055 __ movups(dst, src);
5056 __ movups(src, kScratchDoubleReg);
5057 }
5058 }
5059 return;
5060 }
5061 case MoveType::kStackToStack: {
5062 if (source->IsStackSlot()) {
5063 Operand dst1 = g.ToOperand(destination);
5064 __ push(dst1);
5065 frame_access_state()->IncreaseSPDelta(1);
5066 Operand src1 = g.ToOperand(source);
5067 __ push(src1);
5068 Operand dst2 = g.ToOperand(destination);
5069 __ pop(dst2);
5070 frame_access_state()->IncreaseSPDelta(-1);
5071 Operand src2 = g.ToOperand(source);
5072 __ pop(src2);
5073 } else {
5074 DCHECK(source->IsFPStackSlot());
5075 Operand src0 = g.ToOperand(source);
5076 Operand dst0 = g.ToOperand(destination);
5077 MachineRepresentation rep =
5078 LocationOperand::cast(source)->representation();
5079 if (rep == MachineRepresentation::kFloat32) {
5080 __ movss(kScratchDoubleReg, dst0); // Save dst in scratch register.
5081 __ push(src0); // Then use stack to copy src to destination.
5082 __ pop(dst0);
5083 __ movss(src0, kScratchDoubleReg);
5084 } else if (rep == MachineRepresentation::kFloat64) {
5085 __ movsd(kScratchDoubleReg, dst0); // Save dst in scratch register.
5086 __ push(src0); // Then use stack to copy src to destination.
5087 __ pop(dst0);
5088 __ push(g.ToOperand(source, kSystemPointerSize));
5089 __ pop(g.ToOperand(destination, kSystemPointerSize));
5090 __ movsd(src0, kScratchDoubleReg);
5091 } else {
5092 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
5093 __ movups(kScratchDoubleReg, dst0); // Save dst in scratch register.
5094 __ push(src0); // Then use stack to copy src to destination.
5095 __ pop(dst0);
5096 __ push(g.ToOperand(source, kSystemPointerSize));
5097 __ pop(g.ToOperand(destination, kSystemPointerSize));
5098 __ push(g.ToOperand(source, 2 * kSystemPointerSize));
5099 __ pop(g.ToOperand(destination, 2 * kSystemPointerSize));
5100 __ push(g.ToOperand(source, 3 * kSystemPointerSize));
5101 __ pop(g.ToOperand(destination, 3 * kSystemPointerSize));
5102 __ movups(src0, kScratchDoubleReg);
5103 }
5104 }
5105 return;
5106 }
5107 default:
5108 UNREACHABLE();
5109 }
5110 }
5111
AssembleJumpTable(Label ** targets,size_t target_count)5112 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
5113 for (size_t index = 0; index < target_count; ++index) {
5114 __ dd(targets[index]);
5115 }
5116 }
5117
5118 #undef __
5119 #undef kScratchDoubleReg
5120 #undef ASSEMBLE_COMPARE
5121 #undef ASSEMBLE_IEEE754_BINOP
5122 #undef ASSEMBLE_IEEE754_UNOP
5123 #undef ASSEMBLE_BINOP
5124 #undef ASSEMBLE_ATOMIC_BINOP
5125 #undef ASSEMBLE_I64ATOMIC_BINOP
5126 #undef ASSEMBLE_MOVX
5127 #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
5128 #undef ASSEMBLE_SIMD_IMM_SHUFFLE
5129 #undef ASSEMBLE_SIMD_ALL_TRUE
5130 #undef ASSEMBLE_SIMD_SHIFT
5131
5132 } // namespace compiler
5133 } // namespace internal
5134 } // namespace v8
5135