1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <limits>
6
7 #include "src/base/overflowing-math.h"
8 #include "src/codegen/assembler.h"
9 #include "src/codegen/cpu-features.h"
10 #include "src/codegen/external-reference.h"
11 #include "src/codegen/macro-assembler.h"
12 #include "src/codegen/optimized-compilation-info.h"
13 #include "src/codegen/x64/assembler-x64.h"
14 #include "src/codegen/x64/register-x64.h"
15 #include "src/common/globals.h"
16 #include "src/compiler/backend/code-generator-impl.h"
17 #include "src/compiler/backend/code-generator.h"
18 #include "src/compiler/backend/gap-resolver.h"
19 #include "src/compiler/backend/instruction-codes.h"
20 #include "src/compiler/node-matchers.h"
21 #include "src/compiler/osr.h"
22 #include "src/heap/memory-chunk.h"
23 #include "src/objects/code-kind.h"
24 #include "src/objects/smi.h"
25
26 #if V8_ENABLE_WEBASSEMBLY
27 #include "src/wasm/wasm-code-manager.h"
28 #include "src/wasm/wasm-objects.h"
29 #endif // V8_ENABLE_WEBASSEMBLY
30
31 namespace v8 {
32 namespace internal {
33 namespace compiler {
34
35 #define __ tasm()->
36
37 // Adds X64 specific methods for decoding operands.
38 class X64OperandConverter : public InstructionOperandConverter {
39 public:
X64OperandConverter(CodeGenerator * gen,Instruction * instr)40 X64OperandConverter(CodeGenerator* gen, Instruction* instr)
41 : InstructionOperandConverter(gen, instr) {}
42
InputImmediate(size_t index)43 Immediate InputImmediate(size_t index) {
44 return ToImmediate(instr_->InputAt(index));
45 }
46
InputOperand(size_t index,int extra=0)47 Operand InputOperand(size_t index, int extra = 0) {
48 return ToOperand(instr_->InputAt(index), extra);
49 }
50
OutputOperand()51 Operand OutputOperand() { return ToOperand(instr_->Output()); }
52
ToImmediate(InstructionOperand * operand)53 Immediate ToImmediate(InstructionOperand* operand) {
54 Constant constant = ToConstant(operand);
55 if (constant.type() == Constant::kFloat64) {
56 DCHECK_EQ(0, constant.ToFloat64().AsUint64());
57 return Immediate(0);
58 }
59 if (RelocInfo::IsWasmReference(constant.rmode())) {
60 return Immediate(constant.ToInt32(), constant.rmode());
61 }
62 return Immediate(constant.ToInt32());
63 }
64
ToOperand(InstructionOperand * op,int extra=0)65 Operand ToOperand(InstructionOperand* op, int extra = 0) {
66 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
67 return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
68 }
69
SlotToOperand(int slot_index,int extra=0)70 Operand SlotToOperand(int slot_index, int extra = 0) {
71 FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
72 return Operand(offset.from_stack_pointer() ? rsp : rbp,
73 offset.offset() + extra);
74 }
75
NextOffset(size_t * offset)76 static size_t NextOffset(size_t* offset) {
77 size_t i = *offset;
78 (*offset)++;
79 return i;
80 }
81
ScaleFor(AddressingMode one,AddressingMode mode)82 static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
83 STATIC_ASSERT(0 == static_cast<int>(times_1));
84 STATIC_ASSERT(1 == static_cast<int>(times_2));
85 STATIC_ASSERT(2 == static_cast<int>(times_4));
86 STATIC_ASSERT(3 == static_cast<int>(times_8));
87 int scale = static_cast<int>(mode - one);
88 DCHECK(scale >= 0 && scale < 4);
89 return static_cast<ScaleFactor>(scale);
90 }
91
MemoryOperand(size_t * offset)92 Operand MemoryOperand(size_t* offset) {
93 AddressingMode mode = AddressingModeField::decode(instr_->opcode());
94 switch (mode) {
95 case kMode_MR: {
96 Register base = InputRegister(NextOffset(offset));
97 int32_t disp = 0;
98 return Operand(base, disp);
99 }
100 case kMode_MRI: {
101 Register base = InputRegister(NextOffset(offset));
102 int32_t disp = InputInt32(NextOffset(offset));
103 return Operand(base, disp);
104 }
105 case kMode_MR1:
106 case kMode_MR2:
107 case kMode_MR4:
108 case kMode_MR8: {
109 Register base = InputRegister(NextOffset(offset));
110 Register index = InputRegister(NextOffset(offset));
111 ScaleFactor scale = ScaleFor(kMode_MR1, mode);
112 int32_t disp = 0;
113 return Operand(base, index, scale, disp);
114 }
115 case kMode_MR1I:
116 case kMode_MR2I:
117 case kMode_MR4I:
118 case kMode_MR8I: {
119 Register base = InputRegister(NextOffset(offset));
120 Register index = InputRegister(NextOffset(offset));
121 ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
122 int32_t disp = InputInt32(NextOffset(offset));
123 return Operand(base, index, scale, disp);
124 }
125 case kMode_M1: {
126 Register base = InputRegister(NextOffset(offset));
127 int32_t disp = 0;
128 return Operand(base, disp);
129 }
130 case kMode_M2:
131 UNREACHABLE(); // Should use kModeMR with more compact encoding instead
132 case kMode_M4:
133 case kMode_M8: {
134 Register index = InputRegister(NextOffset(offset));
135 ScaleFactor scale = ScaleFor(kMode_M1, mode);
136 int32_t disp = 0;
137 return Operand(index, scale, disp);
138 }
139 case kMode_M1I:
140 case kMode_M2I:
141 case kMode_M4I:
142 case kMode_M8I: {
143 Register index = InputRegister(NextOffset(offset));
144 ScaleFactor scale = ScaleFor(kMode_M1I, mode);
145 int32_t disp = InputInt32(NextOffset(offset));
146 return Operand(index, scale, disp);
147 }
148 case kMode_Root: {
149 Register base = kRootRegister;
150 int32_t disp = InputInt32(NextOffset(offset));
151 return Operand(base, disp);
152 }
153 case kMode_None:
154 UNREACHABLE();
155 }
156 UNREACHABLE();
157 }
158
MemoryOperand(size_t first_input=0)159 Operand MemoryOperand(size_t first_input = 0) {
160 return MemoryOperand(&first_input);
161 }
162 };
163
164 namespace {
165
HasAddressingMode(Instruction * instr)166 bool HasAddressingMode(Instruction* instr) {
167 return instr->addressing_mode() != kMode_None;
168 }
169
HasImmediateInput(Instruction * instr,size_t index)170 bool HasImmediateInput(Instruction* instr, size_t index) {
171 return instr->InputAt(index)->IsImmediate();
172 }
173
HasRegisterInput(Instruction * instr,size_t index)174 bool HasRegisterInput(Instruction* instr, size_t index) {
175 return instr->InputAt(index)->IsRegister();
176 }
177
178 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
179 public:
OutOfLineLoadFloat32NaN(CodeGenerator * gen,XMMRegister result)180 OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
181 : OutOfLineCode(gen), result_(result) {}
182
Generate()183 void Generate() final {
184 __ Xorps(result_, result_);
185 __ Divss(result_, result_);
186 }
187
188 private:
189 XMMRegister const result_;
190 };
191
192 class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
193 public:
OutOfLineLoadFloat64NaN(CodeGenerator * gen,XMMRegister result)194 OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
195 : OutOfLineCode(gen), result_(result) {}
196
Generate()197 void Generate() final {
198 __ Xorpd(result_, result_);
199 __ Divsd(result_, result_);
200 }
201
202 private:
203 XMMRegister const result_;
204 };
205
206 class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
207 public:
OutOfLineTruncateDoubleToI(CodeGenerator * gen,Register result,XMMRegister input,StubCallMode stub_mode,UnwindingInfoWriter * unwinding_info_writer)208 OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
209 XMMRegister input, StubCallMode stub_mode,
210 UnwindingInfoWriter* unwinding_info_writer)
211 : OutOfLineCode(gen),
212 result_(result),
213 input_(input),
214 #if V8_ENABLE_WEBASSEMBLY
215 stub_mode_(stub_mode),
216 #endif // V8_ENABLE_WEBASSEMBLY
217 unwinding_info_writer_(unwinding_info_writer),
218 isolate_(gen->isolate()),
219 zone_(gen->zone()) {
220 }
221
Generate()222 void Generate() final {
223 __ AllocateStackSpace(kDoubleSize);
224 unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
225 kDoubleSize);
226 __ Movsd(MemOperand(rsp, 0), input_);
227 #if V8_ENABLE_WEBASSEMBLY
228 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
229 // A direct call to a wasm runtime stub defined in this module.
230 // Just encode the stub index. This will be patched when the code
231 // is added to the native module and copied into wasm code space.
232 __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
233 #else
234 // For balance.
235 if (false) {
236 #endif // V8_ENABLE_WEBASSEMBLY
237 } else if (tasm()->options().inline_offheap_trampolines) {
238 // With embedded builtins we do not need the isolate here. This allows
239 // the call to be generated asynchronously.
240 __ CallBuiltin(Builtin::kDoubleToI);
241 } else {
242 __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
243 }
244 __ movl(result_, MemOperand(rsp, 0));
245 __ addq(rsp, Immediate(kDoubleSize));
246 unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
247 -kDoubleSize);
248 }
249
250 private:
251 Register const result_;
252 XMMRegister const input_;
253 #if V8_ENABLE_WEBASSEMBLY
254 StubCallMode stub_mode_;
255 #endif // V8_ENABLE_WEBASSEMBLY
256 UnwindingInfoWriter* const unwinding_info_writer_;
257 Isolate* isolate_;
258 Zone* zone_;
259 };
260
261 class OutOfLineRecordWrite final : public OutOfLineCode {
262 public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand operand,Register value,Register scratch0,Register scratch1,RecordWriteMode mode,StubCallMode stub_mode)263 OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
264 Register value, Register scratch0, Register scratch1,
265 RecordWriteMode mode, StubCallMode stub_mode)
266 : OutOfLineCode(gen),
267 object_(object),
268 operand_(operand),
269 value_(value),
270 scratch0_(scratch0),
271 scratch1_(scratch1),
272 mode_(mode),
273 #if V8_ENABLE_WEBASSEMBLY
274 stub_mode_(stub_mode),
275 #endif // V8_ENABLE_WEBASSEMBLY
276 zone_(gen->zone()) {
277 DCHECK(!AreAliased(object, scratch0, scratch1));
278 DCHECK(!AreAliased(value, scratch0, scratch1));
279 }
280
Generate()281 void Generate() final {
282 if (COMPRESS_POINTERS_BOOL) {
283 __ DecompressTaggedPointer(value_, value_);
284 }
285 __ CheckPageFlag(value_, scratch0_,
286 MemoryChunk::kPointersToHereAreInterestingMask, zero,
287 exit());
288 __ leaq(scratch1_, operand_);
289
290 RememberedSetAction const remembered_set_action =
291 mode_ > RecordWriteMode::kValueIsMap ||
292 FLAG_use_full_record_write_builtin
293 ? RememberedSetAction::kEmit
294 : RememberedSetAction::kOmit;
295 SaveFPRegsMode const save_fp_mode = frame()->DidAllocateDoubleRegisters()
296 ? SaveFPRegsMode::kSave
297 : SaveFPRegsMode::kIgnore;
298
299 if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
300 __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
301 #if V8_ENABLE_WEBASSEMBLY
302 } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
303 // A direct call to a wasm runtime stub defined in this module.
304 // Just encode the stub index. This will be patched when the code
305 // is added to the native module and copied into wasm code space.
306 __ CallRecordWriteStubSaveRegisters(object_, scratch1_,
307 remembered_set_action, save_fp_mode,
308 StubCallMode::kCallWasmRuntimeStub);
309 #endif // V8_ENABLE_WEBASSEMBLY
310 } else {
311 __ CallRecordWriteStubSaveRegisters(object_, scratch1_,
312 remembered_set_action, save_fp_mode);
313 }
314 }
315
316 private:
317 Register const object_;
318 Operand const operand_;
319 Register const value_;
320 Register const scratch0_;
321 Register const scratch1_;
322 RecordWriteMode const mode_;
323 #if V8_ENABLE_WEBASSEMBLY
324 StubCallMode const stub_mode_;
325 #endif // V8_ENABLE_WEBASSEMBLY
326 Zone* zone_;
327 };
328
329 template <std::memory_order order>
EmitStore(TurboAssembler * tasm,Operand operand,Register value,MachineRepresentation rep)330 void EmitStore(TurboAssembler* tasm, Operand operand, Register value,
331 MachineRepresentation rep) {
332 if (order == std::memory_order_relaxed) {
333 switch (rep) {
334 case MachineRepresentation::kWord8:
335 tasm->movb(operand, value);
336 break;
337 case MachineRepresentation::kWord16:
338 tasm->movw(operand, value);
339 break;
340 case MachineRepresentation::kWord32:
341 tasm->movl(operand, value);
342 break;
343 case MachineRepresentation::kWord64:
344 tasm->movq(operand, value);
345 break;
346 case MachineRepresentation::kTagged:
347 tasm->StoreTaggedField(operand, value);
348 break;
349 case MachineRepresentation::kSandboxedPointer:
350 tasm->StoreSandboxedPointerField(operand, value);
351 break;
352 default:
353 UNREACHABLE();
354 }
355 return;
356 }
357
358 DCHECK_EQ(order, std::memory_order_seq_cst);
359 switch (rep) {
360 case MachineRepresentation::kWord8:
361 tasm->movq(kScratchRegister, value);
362 tasm->xchgb(kScratchRegister, operand);
363 break;
364 case MachineRepresentation::kWord16:
365 tasm->movq(kScratchRegister, value);
366 tasm->xchgw(kScratchRegister, operand);
367 break;
368 case MachineRepresentation::kWord32:
369 tasm->movq(kScratchRegister, value);
370 tasm->xchgl(kScratchRegister, operand);
371 break;
372 case MachineRepresentation::kWord64:
373 tasm->movq(kScratchRegister, value);
374 tasm->xchgq(kScratchRegister, operand);
375 break;
376 case MachineRepresentation::kTagged:
377 tasm->AtomicStoreTaggedField(operand, value);
378 break;
379 default:
380 UNREACHABLE();
381 }
382 }
383
384 template <std::memory_order order>
385 void EmitStore(TurboAssembler* tasm, Operand operand, Immediate value,
386 MachineRepresentation rep);
387
388 template <>
EmitStore(TurboAssembler * tasm,Operand operand,Immediate value,MachineRepresentation rep)389 void EmitStore<std::memory_order_relaxed>(TurboAssembler* tasm, Operand operand,
390 Immediate value,
391 MachineRepresentation rep) {
392 switch (rep) {
393 case MachineRepresentation::kWord8:
394 tasm->movb(operand, value);
395 break;
396 case MachineRepresentation::kWord16:
397 tasm->movw(operand, value);
398 break;
399 case MachineRepresentation::kWord32:
400 tasm->movl(operand, value);
401 break;
402 case MachineRepresentation::kWord64:
403 tasm->movq(operand, value);
404 break;
405 case MachineRepresentation::kTagged:
406 tasm->StoreTaggedField(operand, value);
407 break;
408 default:
409 UNREACHABLE();
410 }
411 }
412
413 #ifdef V8_IS_TSAN
EmitMemoryProbeForTrapHandlerIfNeeded(TurboAssembler * tasm,Register scratch,Operand operand,StubCallMode mode,int size)414 void EmitMemoryProbeForTrapHandlerIfNeeded(TurboAssembler* tasm,
415 Register scratch, Operand operand,
416 StubCallMode mode, int size) {
417 #if V8_ENABLE_WEBASSEMBLY && V8_TRAP_HANDLER_SUPPORTED
418 // The wasm OOB trap handler needs to be able to look up the faulting
419 // instruction pointer to handle the SIGSEGV raised by an OOB access. It
420 // will not handle SIGSEGVs raised by the TSAN store helpers. Emit a
421 // redundant load here to give the trap handler a chance to handle any
422 // OOB SIGSEGVs.
423 if (trap_handler::IsTrapHandlerEnabled() &&
424 mode == StubCallMode::kCallWasmRuntimeStub) {
425 switch (size) {
426 case kInt8Size:
427 tasm->movb(scratch, operand);
428 break;
429 case kInt16Size:
430 tasm->movw(scratch, operand);
431 break;
432 case kInt32Size:
433 tasm->movl(scratch, operand);
434 break;
435 case kInt64Size:
436 tasm->movq(scratch, operand);
437 break;
438 default:
439 UNREACHABLE();
440 }
441 }
442 #endif
443 }
444
445 class OutOfLineTSANStore : public OutOfLineCode {
446 public:
OutOfLineTSANStore(CodeGenerator * gen,Operand operand,Register value,Register scratch0,StubCallMode stub_mode,int size,std::memory_order order)447 OutOfLineTSANStore(CodeGenerator* gen, Operand operand, Register value,
448 Register scratch0, StubCallMode stub_mode, int size,
449 std::memory_order order)
450 : OutOfLineCode(gen),
451 operand_(operand),
452 value_(value),
453 scratch0_(scratch0),
454 #if V8_ENABLE_WEBASSEMBLY
455 stub_mode_(stub_mode),
456 #endif // V8_ENABLE_WEBASSEMBLY
457 size_(size),
458 memory_order_(order),
459 zone_(gen->zone()) {
460 DCHECK(!AreAliased(value, scratch0));
461 }
462
Generate()463 void Generate() final {
464 const SaveFPRegsMode save_fp_mode = frame()->DidAllocateDoubleRegisters()
465 ? SaveFPRegsMode::kSave
466 : SaveFPRegsMode::kIgnore;
467 __ leaq(scratch0_, operand_);
468
469 #if V8_ENABLE_WEBASSEMBLY
470 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
471 // A direct call to a wasm runtime stub defined in this module.
472 // Just encode the stub index. This will be patched when the code
473 // is added to the native module and copied into wasm code space.
474 tasm()->CallTSANStoreStub(scratch0_, value_, save_fp_mode, size_,
475 StubCallMode::kCallWasmRuntimeStub,
476 memory_order_);
477 return;
478 }
479 #endif // V8_ENABLE_WEBASSEMBLY
480
481 tasm()->CallTSANStoreStub(scratch0_, value_, save_fp_mode, size_,
482 StubCallMode::kCallBuiltinPointer, memory_order_);
483 }
484
485 private:
486 Operand const operand_;
487 Register const value_;
488 Register const scratch0_;
489 #if V8_ENABLE_WEBASSEMBLY
490 StubCallMode const stub_mode_;
491 #endif // V8_ENABLE_WEBASSEMBLY
492 int size_;
493 const std::memory_order memory_order_;
494 Zone* zone_;
495 };
496
EmitTSANStoreOOL(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,Register value_reg,X64OperandConverter & i,StubCallMode mode,int size,std::memory_order order)497 void EmitTSANStoreOOL(Zone* zone, CodeGenerator* codegen, TurboAssembler* tasm,
498 Operand operand, Register value_reg,
499 X64OperandConverter& i, StubCallMode mode, int size,
500 std::memory_order order) {
501 // The FOR_TESTING code doesn't initialize the root register. We can't call
502 // the TSAN builtin since we need to load the external reference through the
503 // root register.
504 // TODO(solanes, v8:7790, v8:11600): See if we can support the FOR_TESTING
505 // path. It is not crucial, but it would be nice to remove this restriction.
506 DCHECK_NE(codegen->code_kind(), CodeKind::FOR_TESTING);
507
508 Register scratch0 = i.TempRegister(0);
509 auto tsan_ool = zone->New<OutOfLineTSANStore>(codegen, operand, value_reg,
510 scratch0, mode, size, order);
511 tasm->jmp(tsan_ool->entry());
512 tasm->bind(tsan_ool->exit());
513 }
514
515 template <std::memory_order order>
GetTSANValueRegister(TurboAssembler * tasm,Register value,X64OperandConverter & i,MachineRepresentation rep)516 Register GetTSANValueRegister(TurboAssembler* tasm, Register value,
517 X64OperandConverter& i,
518 MachineRepresentation rep) {
519 if (rep == MachineRepresentation::kSandboxedPointer) {
520 // SandboxedPointers need to be encoded.
521 Register value_reg = i.TempRegister(1);
522 tasm->movq(value_reg, value);
523 tasm->EncodeSandboxedPointer(value_reg);
524 return value_reg;
525 }
526 return value;
527 }
528
529 template <std::memory_order order>
530 Register GetTSANValueRegister(TurboAssembler* tasm, Immediate value,
531 X64OperandConverter& i,
532 MachineRepresentation rep);
533
534 template <>
GetTSANValueRegister(TurboAssembler * tasm,Immediate value,X64OperandConverter & i,MachineRepresentation rep)535 Register GetTSANValueRegister<std::memory_order_relaxed>(
536 TurboAssembler* tasm, Immediate value, X64OperandConverter& i,
537 MachineRepresentation rep) {
538 Register value_reg = i.TempRegister(1);
539 tasm->movq(value_reg, value);
540 if (rep == MachineRepresentation::kSandboxedPointer) {
541 // SandboxedPointers need to be encoded.
542 tasm->EncodeSandboxedPointer(value_reg);
543 }
544 return value_reg;
545 }
546
547 template <std::memory_order order, typename ValueT>
EmitTSANAwareStore(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,ValueT value,X64OperandConverter & i,StubCallMode stub_call_mode,MachineRepresentation rep)548 void EmitTSANAwareStore(Zone* zone, CodeGenerator* codegen,
549 TurboAssembler* tasm, Operand operand, ValueT value,
550 X64OperandConverter& i, StubCallMode stub_call_mode,
551 MachineRepresentation rep) {
552 // The FOR_TESTING code doesn't initialize the root register. We can't call
553 // the TSAN builtin since we need to load the external reference through the
554 // root register.
555 // TODO(solanes, v8:7790, v8:11600): See if we can support the FOR_TESTING
556 // path. It is not crucial, but it would be nice to remove this restriction.
557 if (codegen->code_kind() != CodeKind::FOR_TESTING) {
558 int size = ElementSizeInBytes(rep);
559 EmitMemoryProbeForTrapHandlerIfNeeded(tasm, i.TempRegister(0), operand,
560 stub_call_mode, size);
561 Register value_reg = GetTSANValueRegister<order>(tasm, value, i, rep);
562 EmitTSANStoreOOL(zone, codegen, tasm, operand, value_reg, i, stub_call_mode,
563 size, order);
564 } else {
565 EmitStore<order>(tasm, operand, value, rep);
566 }
567 }
568
569 class OutOfLineTSANRelaxedLoad final : public OutOfLineCode {
570 public:
OutOfLineTSANRelaxedLoad(CodeGenerator * gen,Operand operand,Register scratch0,StubCallMode stub_mode,int size)571 OutOfLineTSANRelaxedLoad(CodeGenerator* gen, Operand operand,
572 Register scratch0, StubCallMode stub_mode, int size)
573 : OutOfLineCode(gen),
574 operand_(operand),
575 scratch0_(scratch0),
576 #if V8_ENABLE_WEBASSEMBLY
577 stub_mode_(stub_mode),
578 #endif // V8_ENABLE_WEBASSEMBLY
579 size_(size),
580 zone_(gen->zone()) {
581 }
582
Generate()583 void Generate() final {
584 const SaveFPRegsMode save_fp_mode = frame()->DidAllocateDoubleRegisters()
585 ? SaveFPRegsMode::kSave
586 : SaveFPRegsMode::kIgnore;
587 __ leaq(scratch0_, operand_);
588
589 #if V8_ENABLE_WEBASSEMBLY
590 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
591 // A direct call to a wasm runtime stub defined in this module.
592 // Just encode the stub index. This will be patched when the code
593 // is added to the native module and copied into wasm code space.
594 __ CallTSANRelaxedLoadStub(scratch0_, save_fp_mode, size_,
595 StubCallMode::kCallWasmRuntimeStub);
596 return;
597 }
598 #endif // V8_ENABLE_WEBASSEMBLY
599
600 __ CallTSANRelaxedLoadStub(scratch0_, save_fp_mode, size_,
601 StubCallMode::kCallBuiltinPointer);
602 }
603
604 private:
605 Operand const operand_;
606 Register const scratch0_;
607 #if V8_ENABLE_WEBASSEMBLY
608 StubCallMode const stub_mode_;
609 #endif // V8_ENABLE_WEBASSEMBLY
610 int size_;
611 Zone* zone_;
612 };
613
EmitTSANRelaxedLoadOOLIfNeeded(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,X64OperandConverter & i,StubCallMode mode,int size)614 void EmitTSANRelaxedLoadOOLIfNeeded(Zone* zone, CodeGenerator* codegen,
615 TurboAssembler* tasm, Operand operand,
616 X64OperandConverter& i, StubCallMode mode,
617 int size) {
618 // The FOR_TESTING code doesn't initialize the root register. We can't call
619 // the TSAN builtin since we need to load the external reference through the
620 // root register.
621 // TODO(solanes, v8:7790, v8:11600): See if we can support the FOR_TESTING
622 // path. It is not crucial, but it would be nice to remove this if.
623 if (codegen->code_kind() == CodeKind::FOR_TESTING) return;
624
625 Register scratch0 = i.TempRegister(0);
626 auto tsan_ool = zone->New<OutOfLineTSANRelaxedLoad>(codegen, operand,
627 scratch0, mode, size);
628 tasm->jmp(tsan_ool->entry());
629 tasm->bind(tsan_ool->exit());
630 }
631
632 #else
633 template <std::memory_order order, typename ValueT>
EmitTSANAwareStore(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,ValueT value,X64OperandConverter & i,StubCallMode stub_call_mode,MachineRepresentation rep)634 void EmitTSANAwareStore(Zone* zone, CodeGenerator* codegen,
635 TurboAssembler* tasm, Operand operand, ValueT value,
636 X64OperandConverter& i, StubCallMode stub_call_mode,
637 MachineRepresentation rep) {
638 DCHECK(order == std::memory_order_relaxed ||
639 order == std::memory_order_seq_cst);
640 EmitStore<order>(tasm, operand, value, rep);
641 }
642
EmitTSANRelaxedLoadOOLIfNeeded(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,X64OperandConverter & i,StubCallMode mode,int size)643 void EmitTSANRelaxedLoadOOLIfNeeded(Zone* zone, CodeGenerator* codegen,
644 TurboAssembler* tasm, Operand operand,
645 X64OperandConverter& i, StubCallMode mode,
646 int size) {}
647 #endif // V8_IS_TSAN
648
649 #if V8_ENABLE_WEBASSEMBLY
650 class WasmOutOfLineTrap : public OutOfLineCode {
651 public:
WasmOutOfLineTrap(CodeGenerator * gen,Instruction * instr)652 WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
653 : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
654
Generate()655 void Generate() override {
656 X64OperandConverter i(gen_, instr_);
657 TrapId trap_id =
658 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
659 GenerateWithTrapId(trap_id);
660 }
661
662 protected:
663 CodeGenerator* gen_;
664
GenerateWithTrapId(TrapId trap_id)665 void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
666
667 private:
GenerateCallToTrap(TrapId trap_id)668 void GenerateCallToTrap(TrapId trap_id) {
669 if (!gen_->wasm_runtime_exception_support()) {
670 // We cannot test calls to the runtime in cctest/test-run-wasm.
671 // Therefore we emit a call to C here instead of a call to the runtime.
672 __ PrepareCallCFunction(0);
673 __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
674 0);
675 __ LeaveFrame(StackFrame::WASM);
676 auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
677 size_t pop_size =
678 call_descriptor->ParameterSlotCount() * kSystemPointerSize;
679 // Use rcx as a scratch register, we return anyways immediately.
680 __ Ret(static_cast<int>(pop_size), rcx);
681 } else {
682 gen_->AssembleSourcePosition(instr_);
683 // A direct call to a wasm runtime stub defined in this module.
684 // Just encode the stub index. This will be patched when the code
685 // is added to the native module and copied into wasm code space.
686 __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
687 ReferenceMap* reference_map =
688 gen_->zone()->New<ReferenceMap>(gen_->zone());
689 gen_->RecordSafepoint(reference_map);
690 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
691 }
692 }
693
694 Instruction* instr_;
695 };
696
697 class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
698 public:
WasmProtectedInstructionTrap(CodeGenerator * gen,int pc,Instruction * instr)699 WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
700 : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
701
Generate()702 void Generate() final {
703 DCHECK(FLAG_wasm_bounds_checks && !FLAG_wasm_enforce_bounds_checks);
704 gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
705 GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
706 }
707
708 private:
709 int pc_;
710 };
711
EmitOOLTrapIfNeeded(Zone * zone,CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,int pc)712 void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
713 InstructionCode opcode, Instruction* instr,
714 int pc) {
715 const MemoryAccessMode access_mode = instr->memory_access_mode();
716 if (access_mode == kMemoryAccessProtected) {
717 zone->New<WasmProtectedInstructionTrap>(codegen, pc, instr);
718 }
719 }
720
721 #else
722
EmitOOLTrapIfNeeded(Zone * zone,CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,int pc)723 void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
724 InstructionCode opcode, Instruction* instr, int pc) {
725 DCHECK_NE(kMemoryAccessProtected, instr->memory_access_mode());
726 }
727
728 #endif // V8_ENABLE_WEBASSEMBLY
729
730 } // namespace
731
732 #define ASSEMBLE_UNOP(asm_instr) \
733 do { \
734 if (instr->Output()->IsRegister()) { \
735 __ asm_instr(i.OutputRegister()); \
736 } else { \
737 __ asm_instr(i.OutputOperand()); \
738 } \
739 } while (false)
740
741 #define ASSEMBLE_BINOP(asm_instr) \
742 do { \
743 if (HasAddressingMode(instr)) { \
744 size_t index = 1; \
745 Operand right = i.MemoryOperand(&index); \
746 __ asm_instr(i.InputRegister(0), right); \
747 } else { \
748 if (HasImmediateInput(instr, 1)) { \
749 if (HasRegisterInput(instr, 0)) { \
750 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
751 } else { \
752 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
753 } \
754 } else { \
755 if (HasRegisterInput(instr, 1)) { \
756 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
757 } else { \
758 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
759 } \
760 } \
761 } \
762 } while (false)
763
764 #define ASSEMBLE_COMPARE(asm_instr) \
765 do { \
766 if (HasAddressingMode(instr)) { \
767 size_t index = 0; \
768 Operand left = i.MemoryOperand(&index); \
769 if (HasImmediateInput(instr, index)) { \
770 __ asm_instr(left, i.InputImmediate(index)); \
771 } else { \
772 __ asm_instr(left, i.InputRegister(index)); \
773 } \
774 } else { \
775 if (HasImmediateInput(instr, 1)) { \
776 if (HasRegisterInput(instr, 0)) { \
777 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
778 } else { \
779 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
780 } \
781 } else { \
782 if (HasRegisterInput(instr, 1)) { \
783 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
784 } else { \
785 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
786 } \
787 } \
788 } \
789 } while (false)
790
791 #define ASSEMBLE_MULT(asm_instr) \
792 do { \
793 if (HasImmediateInput(instr, 1)) { \
794 if (HasRegisterInput(instr, 0)) { \
795 __ asm_instr(i.OutputRegister(), i.InputRegister(0), \
796 i.InputImmediate(1)); \
797 } else { \
798 __ asm_instr(i.OutputRegister(), i.InputOperand(0), \
799 i.InputImmediate(1)); \
800 } \
801 } else { \
802 if (HasRegisterInput(instr, 1)) { \
803 __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
804 } else { \
805 __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \
806 } \
807 } \
808 } while (false)
809
810 #define ASSEMBLE_SHIFT(asm_instr, width) \
811 do { \
812 if (HasImmediateInput(instr, 1)) { \
813 if (instr->Output()->IsRegister()) { \
814 __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
815 } else { \
816 __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \
817 } \
818 } else { \
819 if (instr->Output()->IsRegister()) { \
820 __ asm_instr##_cl(i.OutputRegister()); \
821 } else { \
822 __ asm_instr##_cl(i.OutputOperand()); \
823 } \
824 } \
825 } while (false)
826
827 #define ASSEMBLE_MOVX(asm_instr) \
828 do { \
829 if (HasAddressingMode(instr)) { \
830 __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
831 } else if (HasRegisterInput(instr, 0)) { \
832 __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
833 } else { \
834 __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \
835 } \
836 } while (false)
837
838 #define ASSEMBLE_SSE_BINOP(asm_instr) \
839 do { \
840 if (HasAddressingMode(instr)) { \
841 size_t index = 1; \
842 Operand right = i.MemoryOperand(&index); \
843 __ asm_instr(i.InputDoubleRegister(0), right); \
844 } else { \
845 if (instr->InputAt(1)->IsFPRegister()) { \
846 __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
847 } else { \
848 __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
849 } \
850 } \
851 } while (false)
852
853 #define ASSEMBLE_SSE_UNOP(asm_instr) \
854 do { \
855 if (instr->InputAt(0)->IsFPRegister()) { \
856 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
857 } else { \
858 __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \
859 } \
860 } while (false)
861
862 #define ASSEMBLE_AVX_BINOP(asm_instr) \
863 do { \
864 CpuFeatureScope avx_scope(tasm(), AVX); \
865 if (HasAddressingMode(instr)) { \
866 size_t index = 1; \
867 Operand right = i.MemoryOperand(&index); \
868 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), right); \
869 } else { \
870 if (instr->InputAt(1)->IsFPRegister()) { \
871 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
872 i.InputDoubleRegister(1)); \
873 } else { \
874 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
875 i.InputOperand(1)); \
876 } \
877 } \
878 } while (false)
879
880 #define ASSEMBLE_IEEE754_BINOP(name) \
881 do { \
882 __ PrepareCallCFunction(2); \
883 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
884 } while (false)
885
886 #define ASSEMBLE_IEEE754_UNOP(name) \
887 do { \
888 __ PrepareCallCFunction(1); \
889 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
890 } while (false)
891
892 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
893 do { \
894 Label binop; \
895 __ bind(&binop); \
896 __ mov_inst(rax, i.MemoryOperand(1)); \
897 __ movl(i.TempRegister(0), rax); \
898 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
899 __ lock(); \
900 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
901 __ j(not_equal, &binop); \
902 } while (false)
903
904 #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
905 do { \
906 Label binop; \
907 __ bind(&binop); \
908 __ mov_inst(rax, i.MemoryOperand(1)); \
909 __ movq(i.TempRegister(0), rax); \
910 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
911 __ lock(); \
912 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
913 __ j(not_equal, &binop); \
914 } while (false)
915
916 // Handles both SSE and AVX codegen. For SSE we use DefineSameAsFirst, so the
917 // dst and first src will be the same. For AVX we don't restrict it that way, so
918 // we will omit unnecessary moves.
919 #define ASSEMBLE_SIMD_BINOP(opcode) \
920 do { \
921 if (CpuFeatures::IsSupported(AVX)) { \
922 CpuFeatureScope avx_scope(tasm(), AVX); \
923 __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
924 i.InputSimd128Register(1)); \
925 } else { \
926 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
927 __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1)); \
928 } \
929 } while (false)
930
931 #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index) \
932 do { \
933 if (instr->InputAt(index)->IsSimd128Register()) { \
934 __ opcode(dst_operand, i.InputSimd128Register(index)); \
935 } else { \
936 __ opcode(dst_operand, i.InputOperand(index)); \
937 } \
938 } while (false)
939
940 #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm) \
941 do { \
942 if (instr->InputAt(index)->IsSimd128Register()) { \
943 __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
944 } else { \
945 __ opcode(dst_operand, i.InputOperand(index), imm); \
946 } \
947 } while (false)
948
949 #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
950 do { \
951 XMMRegister dst = i.OutputSimd128Register(); \
952 byte input_index = instr->InputCount() == 2 ? 1 : 0; \
953 if (CpuFeatures::IsSupported(AVX)) { \
954 CpuFeatureScope avx_scope(tasm(), AVX); \
955 DCHECK(instr->InputAt(input_index)->IsSimd128Register()); \
956 __ v##opcode(dst, i.InputSimd128Register(0), \
957 i.InputSimd128Register(input_index)); \
958 } else { \
959 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
960 ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \
961 } \
962 } while (false)
963
964 #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm) \
965 do { \
966 XMMRegister dst = i.OutputSimd128Register(); \
967 XMMRegister src = i.InputSimd128Register(0); \
968 if (CpuFeatures::IsSupported(AVX)) { \
969 CpuFeatureScope avx_scope(tasm(), AVX); \
970 DCHECK(instr->InputAt(1)->IsSimd128Register()); \
971 __ v##opcode(dst, src, i.InputSimd128Register(1), imm); \
972 } else { \
973 DCHECK_EQ(dst, src); \
974 if (instr->InputAt(1)->IsSimd128Register()) { \
975 __ opcode(dst, i.InputSimd128Register(1), imm); \
976 } else { \
977 __ opcode(dst, i.InputOperand(1), imm); \
978 } \
979 } \
980 } while (false)
981
982 #define ASSEMBLE_SIMD_ALL_TRUE(opcode) \
983 do { \
984 Register dst = i.OutputRegister(); \
985 __ xorq(dst, dst); \
986 __ Pxor(kScratchDoubleReg, kScratchDoubleReg); \
987 __ opcode(kScratchDoubleReg, i.InputSimd128Register(0)); \
988 __ Ptest(kScratchDoubleReg, kScratchDoubleReg); \
989 __ setcc(equal, dst); \
990 } while (false)
991
992 // This macro will directly emit the opcode if the shift is an immediate - the
993 // shift value will be taken modulo 2^width. Otherwise, it will emit code to
994 // perform the modulus operation.
995 #define ASSEMBLE_SIMD_SHIFT(opcode, width) \
996 do { \
997 XMMRegister dst = i.OutputSimd128Register(); \
998 if (HasImmediateInput(instr, 1)) { \
999 if (CpuFeatures::IsSupported(AVX)) { \
1000 CpuFeatureScope avx_scope(tasm(), AVX); \
1001 __ v##opcode(dst, i.InputSimd128Register(0), \
1002 byte{i.InputInt##width(1)}); \
1003 } else { \
1004 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
1005 __ opcode(dst, byte{i.InputInt##width(1)}); \
1006 } \
1007 } else { \
1008 constexpr int mask = (1 << width) - 1; \
1009 __ movq(kScratchRegister, i.InputRegister(1)); \
1010 __ andq(kScratchRegister, Immediate(mask)); \
1011 __ Movq(kScratchDoubleReg, kScratchRegister); \
1012 if (CpuFeatures::IsSupported(AVX)) { \
1013 CpuFeatureScope avx_scope(tasm(), AVX); \
1014 __ v##opcode(dst, i.InputSimd128Register(0), kScratchDoubleReg); \
1015 } else { \
1016 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
1017 __ opcode(dst, kScratchDoubleReg); \
1018 } \
1019 } \
1020 } while (false)
1021
1022 #define ASSEMBLE_PINSR(ASM_INSTR) \
1023 do { \
1024 XMMRegister dst = i.OutputSimd128Register(); \
1025 XMMRegister src = i.InputSimd128Register(0); \
1026 uint8_t laneidx = i.InputUint8(1); \
1027 uint32_t load_offset; \
1028 if (HasAddressingMode(instr)) { \
1029 __ ASM_INSTR(dst, src, i.MemoryOperand(2), laneidx, &load_offset); \
1030 } else if (instr->InputAt(2)->IsFPRegister()) { \
1031 __ Movq(kScratchRegister, i.InputDoubleRegister(2)); \
1032 __ ASM_INSTR(dst, src, kScratchRegister, laneidx, &load_offset); \
1033 } else if (instr->InputAt(2)->IsRegister()) { \
1034 __ ASM_INSTR(dst, src, i.InputRegister(2), laneidx, &load_offset); \
1035 } else { \
1036 __ ASM_INSTR(dst, src, i.InputOperand(2), laneidx, &load_offset); \
1037 } \
1038 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, load_offset); \
1039 } while (false)
1040
1041 #define ASSEMBLE_SEQ_CST_STORE(rep) \
1042 do { \
1043 Register value = i.InputRegister(0); \
1044 Operand operand = i.MemoryOperand(1); \
1045 EmitTSANAwareStore<std::memory_order_seq_cst>( \
1046 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(), \
1047 rep); \
1048 } while (false)
1049
AssembleDeconstructFrame()1050 void CodeGenerator::AssembleDeconstructFrame() {
1051 unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
1052 __ movq(rsp, rbp);
1053 __ popq(rbp);
1054 }
1055
AssemblePrepareTailCall()1056 void CodeGenerator::AssemblePrepareTailCall() {
1057 if (frame_access_state()->has_frame()) {
1058 __ movq(rbp, MemOperand(rbp, 0));
1059 }
1060 frame_access_state()->SetFrameAccessToSP();
1061 }
1062
1063 namespace {
1064
AdjustStackPointerForTailCall(Instruction * instr,TurboAssembler * assembler,Linkage * linkage,OptimizedCompilationInfo * info,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)1065 void AdjustStackPointerForTailCall(Instruction* instr,
1066 TurboAssembler* assembler, Linkage* linkage,
1067 OptimizedCompilationInfo* info,
1068 FrameAccessState* state,
1069 int new_slot_above_sp,
1070 bool allow_shrinkage = true) {
1071 int stack_slot_delta;
1072 if (instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp)) {
1073 // For this special tail-call mode, the callee has the same arguments and
1074 // linkage as the caller, and arguments adapter frames must be preserved.
1075 // Thus we simply have reset the stack pointer register to its original
1076 // value before frame construction.
1077 // See also: AssembleConstructFrame.
1078 DCHECK(!info->is_osr());
1079 DCHECK(linkage->GetIncomingDescriptor()->CalleeSavedRegisters().is_empty());
1080 DCHECK(
1081 linkage->GetIncomingDescriptor()->CalleeSavedFPRegisters().is_empty());
1082 DCHECK_EQ(state->frame()->GetReturnSlotCount(), 0);
1083 stack_slot_delta = (state->frame()->GetTotalFrameSlotCount() -
1084 kReturnAddressStackSlotCount) *
1085 -1;
1086 DCHECK_LE(stack_slot_delta, 0);
1087 } else {
1088 int current_sp_offset = state->GetSPToFPSlotCount() +
1089 StandardFrameConstants::kFixedSlotCountAboveFp;
1090 stack_slot_delta = new_slot_above_sp - current_sp_offset;
1091 }
1092
1093 if (stack_slot_delta > 0) {
1094 assembler->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
1095 state->IncreaseSPDelta(stack_slot_delta);
1096 } else if (allow_shrinkage && stack_slot_delta < 0) {
1097 assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
1098 state->IncreaseSPDelta(stack_slot_delta);
1099 }
1100 }
1101
SetupSimdImmediateInRegister(TurboAssembler * assembler,uint32_t * imms,XMMRegister reg)1102 void SetupSimdImmediateInRegister(TurboAssembler* assembler, uint32_t* imms,
1103 XMMRegister reg) {
1104 assembler->Move(reg, make_uint64(imms[3], imms[2]),
1105 make_uint64(imms[1], imms[0]));
1106 }
1107
1108 } // namespace
1109
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_slot_offset)1110 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
1111 int first_unused_slot_offset) {
1112 CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
1113 ZoneVector<MoveOperands*> pushes(zone());
1114 GetPushCompatibleMoves(instr, flags, &pushes);
1115
1116 if (!pushes.empty() &&
1117 (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
1118 first_unused_slot_offset)) {
1119 DCHECK(!instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp));
1120 X64OperandConverter g(this, instr);
1121 for (auto move : pushes) {
1122 LocationOperand destination_location(
1123 LocationOperand::cast(move->destination()));
1124 InstructionOperand source(move->source());
1125 AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
1126 frame_access_state(),
1127 destination_location.index());
1128 if (source.IsStackSlot()) {
1129 LocationOperand source_location(LocationOperand::cast(source));
1130 __ Push(g.SlotToOperand(source_location.index()));
1131 } else if (source.IsRegister()) {
1132 LocationOperand source_location(LocationOperand::cast(source));
1133 __ Push(source_location.GetRegister());
1134 } else if (source.IsImmediate()) {
1135 __ Push(Immediate(ImmediateOperand::cast(source).inline_int32_value()));
1136 } else {
1137 // Pushes of non-scalar data types is not supported.
1138 UNIMPLEMENTED();
1139 }
1140 frame_access_state()->IncreaseSPDelta(1);
1141 move->Eliminate();
1142 }
1143 }
1144 AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
1145 frame_access_state(), first_unused_slot_offset,
1146 false);
1147 }
1148
AssembleTailCallAfterGap(Instruction * instr,int first_unused_slot_offset)1149 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
1150 int first_unused_slot_offset) {
1151 AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
1152 frame_access_state(), first_unused_slot_offset);
1153 }
1154
1155 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()1156 void CodeGenerator::AssembleCodeStartRegisterCheck() {
1157 __ ComputeCodeStartAddress(rbx);
1158 __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
1159 __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
1160 }
1161
BailoutIfDeoptimized()1162 void CodeGenerator::BailoutIfDeoptimized() { __ BailoutIfDeoptimized(rbx); }
1163
ShouldClearOutputRegisterBeforeInstruction(CodeGenerator * g,Instruction * instr)1164 bool ShouldClearOutputRegisterBeforeInstruction(CodeGenerator* g,
1165 Instruction* instr) {
1166 X64OperandConverter i(g, instr);
1167 FlagsMode mode = FlagsModeField::decode(instr->opcode());
1168 if (mode == kFlags_set) {
1169 FlagsCondition condition = FlagsConditionField::decode(instr->opcode());
1170 if (condition != kUnorderedEqual && condition != kUnorderedNotEqual) {
1171 Register reg = i.OutputRegister(instr->OutputCount() - 1);
1172 // Do not clear output register when it is also input register.
1173 for (size_t index = 0; index < instr->InputCount(); ++index) {
1174 if (HasRegisterInput(instr, index) && reg == i.InputRegister(index))
1175 return false;
1176 }
1177 return true;
1178 }
1179 }
1180 return false;
1181 }
1182
1183 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)1184 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
1185 Instruction* instr) {
1186 X64OperandConverter i(this, instr);
1187 InstructionCode opcode = instr->opcode();
1188 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
1189 if (ShouldClearOutputRegisterBeforeInstruction(this, instr)) {
1190 // Transform setcc + movzxbl into xorl + setcc to avoid register stall and
1191 // encode one byte shorter.
1192 Register reg = i.OutputRegister(instr->OutputCount() - 1);
1193 __ xorl(reg, reg);
1194 }
1195 switch (arch_opcode) {
1196 case kArchCallCodeObject: {
1197 if (HasImmediateInput(instr, 0)) {
1198 Handle<CodeT> code = i.InputCode(0);
1199 __ Call(code, RelocInfo::CODE_TARGET);
1200 } else {
1201 Register reg = i.InputRegister(0);
1202 DCHECK_IMPLIES(
1203 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
1204 reg == kJavaScriptCallCodeStartRegister);
1205 __ LoadCodeObjectEntry(reg, reg);
1206 __ call(reg);
1207 }
1208 RecordCallPosition(instr);
1209 frame_access_state()->ClearSPDelta();
1210 break;
1211 }
1212 case kArchCallBuiltinPointer: {
1213 DCHECK(!HasImmediateInput(instr, 0));
1214 Register builtin_index = i.InputRegister(0);
1215 __ CallBuiltinByIndex(builtin_index);
1216 RecordCallPosition(instr);
1217 frame_access_state()->ClearSPDelta();
1218 break;
1219 }
1220 #if V8_ENABLE_WEBASSEMBLY
1221 case kArchCallWasmFunction: {
1222 if (HasImmediateInput(instr, 0)) {
1223 Constant constant = i.ToConstant(instr->InputAt(0));
1224 Address wasm_code = static_cast<Address>(constant.ToInt64());
1225 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
1226 __ near_call(wasm_code, constant.rmode());
1227 } else {
1228 __ Call(wasm_code, constant.rmode());
1229 }
1230 } else {
1231 __ call(i.InputRegister(0));
1232 }
1233 RecordCallPosition(instr);
1234 frame_access_state()->ClearSPDelta();
1235 break;
1236 }
1237 case kArchTailCallWasm: {
1238 if (HasImmediateInput(instr, 0)) {
1239 Constant constant = i.ToConstant(instr->InputAt(0));
1240 Address wasm_code = static_cast<Address>(constant.ToInt64());
1241 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
1242 __ near_jmp(wasm_code, constant.rmode());
1243 } else {
1244 __ Move(kScratchRegister, wasm_code, constant.rmode());
1245 __ jmp(kScratchRegister);
1246 }
1247 } else {
1248 __ jmp(i.InputRegister(0));
1249 }
1250 unwinding_info_writer_.MarkBlockWillExit();
1251 frame_access_state()->ClearSPDelta();
1252 frame_access_state()->SetFrameAccessToDefault();
1253 break;
1254 }
1255 #endif // V8_ENABLE_WEBASSEMBLY
1256 case kArchTailCallCodeObject: {
1257 if (HasImmediateInput(instr, 0)) {
1258 Handle<CodeT> code = i.InputCode(0);
1259 __ Jump(code, RelocInfo::CODE_TARGET);
1260 } else {
1261 Register reg = i.InputRegister(0);
1262 DCHECK_IMPLIES(
1263 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
1264 reg == kJavaScriptCallCodeStartRegister);
1265 __ LoadCodeObjectEntry(reg, reg);
1266 __ jmp(reg);
1267 }
1268 unwinding_info_writer_.MarkBlockWillExit();
1269 frame_access_state()->ClearSPDelta();
1270 frame_access_state()->SetFrameAccessToDefault();
1271 break;
1272 }
1273 case kArchTailCallAddress: {
1274 CHECK(!HasImmediateInput(instr, 0));
1275 Register reg = i.InputRegister(0);
1276 DCHECK_IMPLIES(
1277 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
1278 reg == kJavaScriptCallCodeStartRegister);
1279 __ jmp(reg);
1280 unwinding_info_writer_.MarkBlockWillExit();
1281 frame_access_state()->ClearSPDelta();
1282 frame_access_state()->SetFrameAccessToDefault();
1283 break;
1284 }
1285 case kArchCallJSFunction: {
1286 Register func = i.InputRegister(0);
1287 if (FLAG_debug_code) {
1288 // Check the function's context matches the context argument.
1289 __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
1290 __ Assert(equal, AbortReason::kWrongFunctionContext);
1291 }
1292 static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
1293 __ LoadTaggedPointerField(rcx,
1294 FieldOperand(func, JSFunction::kCodeOffset));
1295 __ CallCodeTObject(rcx);
1296 frame_access_state()->ClearSPDelta();
1297 RecordCallPosition(instr);
1298 break;
1299 }
1300 case kArchPrepareCallCFunction: {
1301 // Frame alignment requires using FP-relative frame addressing.
1302 frame_access_state()->SetFrameAccessToFP();
1303 int const num_parameters = MiscField::decode(instr->opcode());
1304 __ PrepareCallCFunction(num_parameters);
1305 break;
1306 }
1307 case kArchSaveCallerRegisters: {
1308 fp_mode_ =
1309 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
1310 DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
1311 fp_mode_ == SaveFPRegsMode::kSave);
1312 // kReturnRegister0 should have been saved before entering the stub.
1313 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
1314 DCHECK(IsAligned(bytes, kSystemPointerSize));
1315 DCHECK_EQ(0, frame_access_state()->sp_delta());
1316 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
1317 DCHECK(!caller_registers_saved_);
1318 caller_registers_saved_ = true;
1319 break;
1320 }
1321 case kArchRestoreCallerRegisters: {
1322 DCHECK(fp_mode_ ==
1323 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
1324 DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
1325 fp_mode_ == SaveFPRegsMode::kSave);
1326 // Don't overwrite the returned value.
1327 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
1328 frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
1329 DCHECK_EQ(0, frame_access_state()->sp_delta());
1330 DCHECK(caller_registers_saved_);
1331 caller_registers_saved_ = false;
1332 break;
1333 }
1334 case kArchPrepareTailCall:
1335 AssemblePrepareTailCall();
1336 break;
1337 case kArchCallCFunction: {
1338 int const num_gp_parameters = ParamField::decode(instr->opcode());
1339 int const num_fp_parameters = FPParamField::decode(instr->opcode());
1340 Label return_location;
1341 #if V8_ENABLE_WEBASSEMBLY
1342 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
1343 // Put the return address in a stack slot.
1344 __ leaq(kScratchRegister, Operand(&return_location, 0));
1345 __ movq(MemOperand(rbp, WasmExitFrameConstants::kCallingPCOffset),
1346 kScratchRegister);
1347 }
1348 #endif // V8_ENABLE_WEBASSEMBLY
1349 if (HasImmediateInput(instr, 0)) {
1350 ExternalReference ref = i.InputExternalReference(0);
1351 __ CallCFunction(ref, num_gp_parameters + num_fp_parameters);
1352 } else {
1353 Register func = i.InputRegister(0);
1354 __ CallCFunction(func, num_gp_parameters + num_fp_parameters);
1355 }
1356 __ bind(&return_location);
1357 #if V8_ENABLE_WEBASSEMBLY
1358 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
1359 RecordSafepoint(instr->reference_map());
1360 }
1361 #endif // V8_ENABLE_WEBASSEMBLY
1362 frame_access_state()->SetFrameAccessToDefault();
1363 // Ideally, we should decrement SP delta to match the change of stack
1364 // pointer in CallCFunction. However, for certain architectures (e.g.
1365 // ARM), there may be more strict alignment requirement, causing old SP
1366 // to be saved on the stack. In those cases, we can not calculate the SP
1367 // delta statically.
1368 frame_access_state()->ClearSPDelta();
1369 if (caller_registers_saved_) {
1370 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
1371 // Here, we assume the sequence to be:
1372 // kArchSaveCallerRegisters;
1373 // kArchCallCFunction;
1374 // kArchRestoreCallerRegisters;
1375 int bytes =
1376 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
1377 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
1378 }
1379 // TODO(turbofan): Do we need an lfence here?
1380 break;
1381 }
1382 case kArchJmp:
1383 AssembleArchJump(i.InputRpo(0));
1384 break;
1385 case kArchBinarySearchSwitch:
1386 AssembleArchBinarySearchSwitch(instr);
1387 break;
1388 case kArchTableSwitch:
1389 AssembleArchTableSwitch(instr);
1390 break;
1391 case kArchComment:
1392 __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
1393 break;
1394 case kArchAbortCSADcheck:
1395 DCHECK(i.InputRegister(0) == rdx);
1396 {
1397 // We don't actually want to generate a pile of code for this, so just
1398 // claim there is a stack frame, without generating one.
1399 FrameScope scope(tasm(), StackFrame::NO_FRAME_TYPE);
1400 __ Call(BUILTIN_CODE(isolate(), AbortCSADcheck),
1401 RelocInfo::CODE_TARGET);
1402 }
1403 __ int3();
1404 unwinding_info_writer_.MarkBlockWillExit();
1405 break;
1406 case kArchDebugBreak:
1407 __ DebugBreak();
1408 break;
1409 case kArchThrowTerminator:
1410 unwinding_info_writer_.MarkBlockWillExit();
1411 break;
1412 case kArchNop:
1413 // don't emit code for nops.
1414 break;
1415 case kArchDeoptimize: {
1416 DeoptimizationExit* exit =
1417 BuildTranslation(instr, -1, 0, 0, OutputFrameStateCombine::Ignore());
1418 __ jmp(exit->label());
1419 break;
1420 }
1421 case kArchRet:
1422 AssembleReturn(instr->InputAt(0));
1423 break;
1424 case kArchFramePointer:
1425 __ movq(i.OutputRegister(), rbp);
1426 break;
1427 case kArchParentFramePointer:
1428 if (frame_access_state()->has_frame()) {
1429 __ movq(i.OutputRegister(), Operand(rbp, 0));
1430 } else {
1431 __ movq(i.OutputRegister(), rbp);
1432 }
1433 break;
1434 case kArchStackPointerGreaterThan: {
1435 // Potentially apply an offset to the current stack pointer before the
1436 // comparison to consider the size difference of an optimized frame versus
1437 // the contained unoptimized frames.
1438
1439 Register lhs_register = rsp;
1440 uint32_t offset;
1441
1442 if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
1443 lhs_register = kScratchRegister;
1444 __ leaq(lhs_register, Operand(rsp, static_cast<int32_t>(offset) * -1));
1445 }
1446
1447 constexpr size_t kValueIndex = 0;
1448 if (HasAddressingMode(instr)) {
1449 __ cmpq(lhs_register, i.MemoryOperand(kValueIndex));
1450 } else {
1451 __ cmpq(lhs_register, i.InputRegister(kValueIndex));
1452 }
1453 break;
1454 }
1455 case kArchStackCheckOffset:
1456 __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
1457 break;
1458 case kArchTruncateDoubleToI: {
1459 auto result = i.OutputRegister();
1460 auto input = i.InputDoubleRegister(0);
1461 auto ool = zone()->New<OutOfLineTruncateDoubleToI>(
1462 this, result, input, DetermineStubCallMode(),
1463 &unwinding_info_writer_);
1464 // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
1465 // use of Cvttsd2siq requires the movl below to avoid sign extension.
1466 __ Cvttsd2siq(result, input);
1467 __ cmpq(result, Immediate(1));
1468 __ j(overflow, ool->entry());
1469 __ bind(ool->exit());
1470 __ movl(result, result);
1471 break;
1472 }
1473 case kArchStoreWithWriteBarrier: // Fall through.
1474 case kArchAtomicStoreWithWriteBarrier: {
1475 RecordWriteMode mode =
1476 static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
1477 Register object = i.InputRegister(0);
1478 size_t index = 0;
1479 Operand operand = i.MemoryOperand(&index);
1480 Register value = i.InputRegister(index);
1481 Register scratch0 = i.TempRegister(0);
1482 Register scratch1 = i.TempRegister(1);
1483
1484 if (FLAG_debug_code) {
1485 // Checking that |value| is not a cleared weakref: our write barrier
1486 // does not support that for now.
1487 __ Cmp(value, kClearedWeakHeapObjectLower32);
1488 __ Check(not_equal, AbortReason::kOperandIsCleared);
1489 }
1490
1491 auto ool = zone()->New<OutOfLineRecordWrite>(this, object, operand, value,
1492 scratch0, scratch1, mode,
1493 DetermineStubCallMode());
1494 if (arch_opcode == kArchStoreWithWriteBarrier) {
1495 EmitTSANAwareStore<std::memory_order_relaxed>(
1496 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
1497 MachineRepresentation::kTagged);
1498 } else {
1499 DCHECK_EQ(arch_opcode, kArchAtomicStoreWithWriteBarrier);
1500 EmitTSANAwareStore<std::memory_order_seq_cst>(
1501 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
1502 MachineRepresentation::kTagged);
1503 }
1504 if (mode > RecordWriteMode::kValueIsPointer) {
1505 __ JumpIfSmi(value, ool->exit());
1506 }
1507 __ CheckPageFlag(object, scratch0,
1508 MemoryChunk::kPointersFromHereAreInterestingMask,
1509 not_zero, ool->entry());
1510 __ bind(ool->exit());
1511 break;
1512 }
1513 case kX64MFence:
1514 __ mfence();
1515 break;
1516 case kX64LFence:
1517 __ lfence();
1518 break;
1519 case kArchStackSlot: {
1520 FrameOffset offset =
1521 frame_access_state()->GetFrameOffset(i.InputInt32(0));
1522 Register base = offset.from_stack_pointer() ? rsp : rbp;
1523 __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
1524 break;
1525 }
1526 case kIeee754Float64Acos:
1527 ASSEMBLE_IEEE754_UNOP(acos);
1528 break;
1529 case kIeee754Float64Acosh:
1530 ASSEMBLE_IEEE754_UNOP(acosh);
1531 break;
1532 case kIeee754Float64Asin:
1533 ASSEMBLE_IEEE754_UNOP(asin);
1534 break;
1535 case kIeee754Float64Asinh:
1536 ASSEMBLE_IEEE754_UNOP(asinh);
1537 break;
1538 case kIeee754Float64Atan:
1539 ASSEMBLE_IEEE754_UNOP(atan);
1540 break;
1541 case kIeee754Float64Atanh:
1542 ASSEMBLE_IEEE754_UNOP(atanh);
1543 break;
1544 case kIeee754Float64Atan2:
1545 ASSEMBLE_IEEE754_BINOP(atan2);
1546 break;
1547 case kIeee754Float64Cbrt:
1548 ASSEMBLE_IEEE754_UNOP(cbrt);
1549 break;
1550 case kIeee754Float64Cos:
1551 ASSEMBLE_IEEE754_UNOP(cos);
1552 break;
1553 case kIeee754Float64Cosh:
1554 ASSEMBLE_IEEE754_UNOP(cosh);
1555 break;
1556 case kIeee754Float64Exp:
1557 ASSEMBLE_IEEE754_UNOP(exp);
1558 break;
1559 case kIeee754Float64Expm1:
1560 ASSEMBLE_IEEE754_UNOP(expm1);
1561 break;
1562 case kIeee754Float64Log:
1563 ASSEMBLE_IEEE754_UNOP(log);
1564 break;
1565 case kIeee754Float64Log1p:
1566 ASSEMBLE_IEEE754_UNOP(log1p);
1567 break;
1568 case kIeee754Float64Log2:
1569 ASSEMBLE_IEEE754_UNOP(log2);
1570 break;
1571 case kIeee754Float64Log10:
1572 ASSEMBLE_IEEE754_UNOP(log10);
1573 break;
1574 case kIeee754Float64Pow:
1575 ASSEMBLE_IEEE754_BINOP(pow);
1576 break;
1577 case kIeee754Float64Sin:
1578 ASSEMBLE_IEEE754_UNOP(sin);
1579 break;
1580 case kIeee754Float64Sinh:
1581 ASSEMBLE_IEEE754_UNOP(sinh);
1582 break;
1583 case kIeee754Float64Tan:
1584 ASSEMBLE_IEEE754_UNOP(tan);
1585 break;
1586 case kIeee754Float64Tanh:
1587 ASSEMBLE_IEEE754_UNOP(tanh);
1588 break;
1589 case kX64Add32:
1590 ASSEMBLE_BINOP(addl);
1591 break;
1592 case kX64Add:
1593 ASSEMBLE_BINOP(addq);
1594 break;
1595 case kX64Sub32:
1596 ASSEMBLE_BINOP(subl);
1597 break;
1598 case kX64Sub:
1599 ASSEMBLE_BINOP(subq);
1600 break;
1601 case kX64And32:
1602 ASSEMBLE_BINOP(andl);
1603 break;
1604 case kX64And:
1605 ASSEMBLE_BINOP(andq);
1606 break;
1607 case kX64Cmp8:
1608 ASSEMBLE_COMPARE(cmpb);
1609 break;
1610 case kX64Cmp16:
1611 ASSEMBLE_COMPARE(cmpw);
1612 break;
1613 case kX64Cmp32:
1614 ASSEMBLE_COMPARE(cmpl);
1615 break;
1616 case kX64Cmp:
1617 ASSEMBLE_COMPARE(cmpq);
1618 break;
1619 case kX64Test8:
1620 ASSEMBLE_COMPARE(testb);
1621 break;
1622 case kX64Test16:
1623 ASSEMBLE_COMPARE(testw);
1624 break;
1625 case kX64Test32:
1626 ASSEMBLE_COMPARE(testl);
1627 break;
1628 case kX64Test:
1629 ASSEMBLE_COMPARE(testq);
1630 break;
1631 case kX64Imul32:
1632 ASSEMBLE_MULT(imull);
1633 break;
1634 case kX64Imul:
1635 ASSEMBLE_MULT(imulq);
1636 break;
1637 case kX64ImulHigh32:
1638 if (HasRegisterInput(instr, 1)) {
1639 __ imull(i.InputRegister(1));
1640 } else {
1641 __ imull(i.InputOperand(1));
1642 }
1643 break;
1644 case kX64UmulHigh32:
1645 if (HasRegisterInput(instr, 1)) {
1646 __ mull(i.InputRegister(1));
1647 } else {
1648 __ mull(i.InputOperand(1));
1649 }
1650 break;
1651 case kX64Idiv32:
1652 __ cdq();
1653 __ idivl(i.InputRegister(1));
1654 break;
1655 case kX64Idiv:
1656 __ cqo();
1657 __ idivq(i.InputRegister(1));
1658 break;
1659 case kX64Udiv32:
1660 __ xorl(rdx, rdx);
1661 __ divl(i.InputRegister(1));
1662 break;
1663 case kX64Udiv:
1664 __ xorq(rdx, rdx);
1665 __ divq(i.InputRegister(1));
1666 break;
1667 case kX64Not:
1668 ASSEMBLE_UNOP(notq);
1669 break;
1670 case kX64Not32:
1671 ASSEMBLE_UNOP(notl);
1672 break;
1673 case kX64Neg:
1674 ASSEMBLE_UNOP(negq);
1675 break;
1676 case kX64Neg32:
1677 ASSEMBLE_UNOP(negl);
1678 break;
1679 case kX64Or32:
1680 ASSEMBLE_BINOP(orl);
1681 break;
1682 case kX64Or:
1683 ASSEMBLE_BINOP(orq);
1684 break;
1685 case kX64Xor32:
1686 ASSEMBLE_BINOP(xorl);
1687 break;
1688 case kX64Xor:
1689 ASSEMBLE_BINOP(xorq);
1690 break;
1691 case kX64Shl32:
1692 ASSEMBLE_SHIFT(shll, 5);
1693 break;
1694 case kX64Shl:
1695 ASSEMBLE_SHIFT(shlq, 6);
1696 break;
1697 case kX64Shr32:
1698 ASSEMBLE_SHIFT(shrl, 5);
1699 break;
1700 case kX64Shr:
1701 ASSEMBLE_SHIFT(shrq, 6);
1702 break;
1703 case kX64Sar32:
1704 ASSEMBLE_SHIFT(sarl, 5);
1705 break;
1706 case kX64Sar:
1707 ASSEMBLE_SHIFT(sarq, 6);
1708 break;
1709 case kX64Rol32:
1710 ASSEMBLE_SHIFT(roll, 5);
1711 break;
1712 case kX64Rol:
1713 ASSEMBLE_SHIFT(rolq, 6);
1714 break;
1715 case kX64Ror32:
1716 ASSEMBLE_SHIFT(rorl, 5);
1717 break;
1718 case kX64Ror:
1719 ASSEMBLE_SHIFT(rorq, 6);
1720 break;
1721 case kX64Lzcnt:
1722 if (HasRegisterInput(instr, 0)) {
1723 __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
1724 } else {
1725 __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
1726 }
1727 break;
1728 case kX64Lzcnt32:
1729 if (HasRegisterInput(instr, 0)) {
1730 __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
1731 } else {
1732 __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
1733 }
1734 break;
1735 case kX64Tzcnt:
1736 if (HasRegisterInput(instr, 0)) {
1737 __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
1738 } else {
1739 __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
1740 }
1741 break;
1742 case kX64Tzcnt32:
1743 if (HasRegisterInput(instr, 0)) {
1744 __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
1745 } else {
1746 __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
1747 }
1748 break;
1749 case kX64Popcnt:
1750 if (HasRegisterInput(instr, 0)) {
1751 __ Popcntq(i.OutputRegister(), i.InputRegister(0));
1752 } else {
1753 __ Popcntq(i.OutputRegister(), i.InputOperand(0));
1754 }
1755 break;
1756 case kX64Popcnt32:
1757 if (HasRegisterInput(instr, 0)) {
1758 __ Popcntl(i.OutputRegister(), i.InputRegister(0));
1759 } else {
1760 __ Popcntl(i.OutputRegister(), i.InputOperand(0));
1761 }
1762 break;
1763 case kX64Bswap:
1764 __ bswapq(i.OutputRegister());
1765 break;
1766 case kX64Bswap32:
1767 __ bswapl(i.OutputRegister());
1768 break;
1769 case kSSEFloat32Cmp:
1770 ASSEMBLE_SSE_BINOP(Ucomiss);
1771 break;
1772 case kSSEFloat32Add:
1773 ASSEMBLE_SSE_BINOP(addss);
1774 break;
1775 case kSSEFloat32Sub:
1776 ASSEMBLE_SSE_BINOP(subss);
1777 break;
1778 case kSSEFloat32Mul:
1779 ASSEMBLE_SSE_BINOP(mulss);
1780 break;
1781 case kSSEFloat32Div:
1782 ASSEMBLE_SSE_BINOP(divss);
1783 // Don't delete this mov. It may improve performance on some CPUs,
1784 // when there is a (v)mulss depending on the result.
1785 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1786 break;
1787 case kSSEFloat32Sqrt:
1788 ASSEMBLE_SSE_UNOP(sqrtss);
1789 break;
1790 case kSSEFloat32ToFloat64:
1791 ASSEMBLE_SSE_UNOP(Cvtss2sd);
1792 break;
1793 case kSSEFloat32Round: {
1794 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1795 RoundingMode const mode =
1796 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1797 __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1798 break;
1799 }
1800 case kSSEFloat32ToInt32:
1801 if (instr->InputAt(0)->IsFPRegister()) {
1802 __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
1803 } else {
1804 __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1805 }
1806 break;
1807 case kSSEFloat32ToUint32: {
1808 if (instr->InputAt(0)->IsFPRegister()) {
1809 __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1810 } else {
1811 __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1812 }
1813 break;
1814 }
1815 case kSSEFloat64Cmp:
1816 ASSEMBLE_SSE_BINOP(Ucomisd);
1817 break;
1818 case kSSEFloat64Add:
1819 ASSEMBLE_SSE_BINOP(addsd);
1820 break;
1821 case kSSEFloat64Sub:
1822 ASSEMBLE_SSE_BINOP(subsd);
1823 break;
1824 case kSSEFloat64Mul:
1825 ASSEMBLE_SSE_BINOP(mulsd);
1826 break;
1827 case kSSEFloat64Div:
1828 ASSEMBLE_SSE_BINOP(divsd);
1829 // Don't delete this mov. It may improve performance on some CPUs,
1830 // when there is a (v)mulsd depending on the result.
1831 __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1832 break;
1833 case kSSEFloat64Mod: {
1834 __ AllocateStackSpace(kDoubleSize);
1835 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1836 kDoubleSize);
1837 // Move values to st(0) and st(1).
1838 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
1839 __ fld_d(Operand(rsp, 0));
1840 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
1841 __ fld_d(Operand(rsp, 0));
1842 // Loop while fprem isn't done.
1843 Label mod_loop;
1844 __ bind(&mod_loop);
1845 // This instructions traps on all kinds inputs, but we are assuming the
1846 // floating point control word is set to ignore them all.
1847 __ fprem();
1848 // The following 2 instruction implicitly use rax.
1849 __ fnstsw_ax();
1850 if (CpuFeatures::IsSupported(SAHF)) {
1851 CpuFeatureScope sahf_scope(tasm(), SAHF);
1852 __ sahf();
1853 } else {
1854 __ shrl(rax, Immediate(8));
1855 __ andl(rax, Immediate(0xFF));
1856 __ pushq(rax);
1857 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1858 kSystemPointerSize);
1859 __ popfq();
1860 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1861 -kSystemPointerSize);
1862 }
1863 __ j(parity_even, &mod_loop);
1864 // Move output to stack and clean up.
1865 __ fstp(1);
1866 __ fstp_d(Operand(rsp, 0));
1867 __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
1868 __ addq(rsp, Immediate(kDoubleSize));
1869 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1870 -kDoubleSize);
1871 break;
1872 }
1873 case kSSEFloat32Max: {
1874 Label compare_swap, done_compare;
1875 if (instr->InputAt(1)->IsFPRegister()) {
1876 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1877 } else {
1878 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1879 }
1880 auto ool =
1881 zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1882 __ j(parity_even, ool->entry());
1883 __ j(above, &done_compare, Label::kNear);
1884 __ j(below, &compare_swap, Label::kNear);
1885 __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
1886 __ testl(kScratchRegister, Immediate(1));
1887 __ j(zero, &done_compare, Label::kNear);
1888 __ bind(&compare_swap);
1889 if (instr->InputAt(1)->IsFPRegister()) {
1890 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1891 } else {
1892 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1893 }
1894 __ bind(&done_compare);
1895 __ bind(ool->exit());
1896 break;
1897 }
1898 case kSSEFloat32Min: {
1899 Label compare_swap, done_compare;
1900 if (instr->InputAt(1)->IsFPRegister()) {
1901 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1902 } else {
1903 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1904 }
1905 auto ool =
1906 zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1907 __ j(parity_even, ool->entry());
1908 __ j(below, &done_compare, Label::kNear);
1909 __ j(above, &compare_swap, Label::kNear);
1910 if (instr->InputAt(1)->IsFPRegister()) {
1911 __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
1912 } else {
1913 __ Movss(kScratchDoubleReg, i.InputOperand(1));
1914 __ Movmskps(kScratchRegister, kScratchDoubleReg);
1915 }
1916 __ testl(kScratchRegister, Immediate(1));
1917 __ j(zero, &done_compare, Label::kNear);
1918 __ bind(&compare_swap);
1919 if (instr->InputAt(1)->IsFPRegister()) {
1920 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1921 } else {
1922 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1923 }
1924 __ bind(&done_compare);
1925 __ bind(ool->exit());
1926 break;
1927 }
1928 case kSSEFloat64Max: {
1929 Label compare_swap, done_compare;
1930 if (instr->InputAt(1)->IsFPRegister()) {
1931 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1932 } else {
1933 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1934 }
1935 auto ool =
1936 zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1937 __ j(parity_even, ool->entry());
1938 __ j(above, &done_compare, Label::kNear);
1939 __ j(below, &compare_swap, Label::kNear);
1940 __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
1941 __ testl(kScratchRegister, Immediate(1));
1942 __ j(zero, &done_compare, Label::kNear);
1943 __ bind(&compare_swap);
1944 if (instr->InputAt(1)->IsFPRegister()) {
1945 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1946 } else {
1947 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1948 }
1949 __ bind(&done_compare);
1950 __ bind(ool->exit());
1951 break;
1952 }
1953 case kSSEFloat64Min: {
1954 Label compare_swap, done_compare;
1955 if (instr->InputAt(1)->IsFPRegister()) {
1956 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1957 } else {
1958 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1959 }
1960 auto ool =
1961 zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1962 __ j(parity_even, ool->entry());
1963 __ j(below, &done_compare, Label::kNear);
1964 __ j(above, &compare_swap, Label::kNear);
1965 if (instr->InputAt(1)->IsFPRegister()) {
1966 __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
1967 } else {
1968 __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1969 __ Movmskpd(kScratchRegister, kScratchDoubleReg);
1970 }
1971 __ testl(kScratchRegister, Immediate(1));
1972 __ j(zero, &done_compare, Label::kNear);
1973 __ bind(&compare_swap);
1974 if (instr->InputAt(1)->IsFPRegister()) {
1975 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1976 } else {
1977 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1978 }
1979 __ bind(&done_compare);
1980 __ bind(ool->exit());
1981 break;
1982 }
1983 case kSSEFloat64Sqrt:
1984 ASSEMBLE_SSE_UNOP(Sqrtsd);
1985 break;
1986 case kSSEFloat64Round: {
1987 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1988 RoundingMode const mode =
1989 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1990 __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1991 break;
1992 }
1993 case kSSEFloat64ToFloat32:
1994 ASSEMBLE_SSE_UNOP(Cvtsd2ss);
1995 break;
1996 case kSSEFloat64ToInt32:
1997 if (instr->InputAt(0)->IsFPRegister()) {
1998 __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
1999 } else {
2000 __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
2001 }
2002 break;
2003 case kSSEFloat64ToUint32: {
2004 if (instr->InputAt(0)->IsFPRegister()) {
2005 __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
2006 } else {
2007 __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
2008 }
2009 if (MiscField::decode(instr->opcode())) {
2010 __ AssertZeroExtended(i.OutputRegister());
2011 }
2012 break;
2013 }
2014 case kSSEFloat32ToInt64: {
2015 Register output_reg = i.OutputRegister(0);
2016 if (instr->OutputCount() == 1) {
2017 if (instr->InputAt(0)->IsFPRegister()) {
2018 __ Cvttss2siq(output_reg, i.InputDoubleRegister(0));
2019 } else {
2020 __ Cvttss2siq(output_reg, i.InputOperand(0));
2021 }
2022 break;
2023 }
2024 DCHECK_EQ(2, instr->OutputCount());
2025 Register success_reg = i.OutputRegister(1);
2026 if (CpuFeatures::IsSupported(SSE4_1) || CpuFeatures::IsSupported(AVX)) {
2027 DoubleRegister rounded = kScratchDoubleReg;
2028 if (instr->InputAt(0)->IsFPRegister()) {
2029 __ Roundss(rounded, i.InputDoubleRegister(0), kRoundToZero);
2030 __ Cvttss2siq(output_reg, i.InputDoubleRegister(0));
2031 } else {
2032 __ Roundss(rounded, i.InputOperand(0), kRoundToZero);
2033 // Convert {rounded} instead of the input operand, to avoid another
2034 // load.
2035 __ Cvttss2siq(output_reg, rounded);
2036 }
2037 DoubleRegister converted_back = i.TempSimd128Register(0);
2038 __ Cvtqsi2ss(converted_back, output_reg);
2039 // Compare the converted back value to the rounded value, set
2040 // success_reg to 0 if they differ, or 1 on success.
2041 __ Cmpeqss(converted_back, rounded);
2042 __ Movq(success_reg, converted_back);
2043 __ And(success_reg, Immediate(1));
2044 } else {
2045 // Less efficient code for non-AVX and non-SSE4_1 CPUs.
2046 if (instr->InputAt(0)->IsFPRegister()) {
2047 __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
2048 } else {
2049 __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
2050 }
2051 __ Move(success_reg, 1);
2052 Label done;
2053 Label fail;
2054 __ Move(kScratchDoubleReg, float{INT64_MIN});
2055 if (instr->InputAt(0)->IsFPRegister()) {
2056 __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
2057 } else {
2058 __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
2059 }
2060 // If the input is NaN, then the conversion fails.
2061 __ j(parity_even, &fail, Label::kNear);
2062 // If the input is INT64_MIN, then the conversion succeeds.
2063 __ j(equal, &done, Label::kNear);
2064 __ cmpq(output_reg, Immediate(1));
2065 // If the conversion results in INT64_MIN, but the input was not
2066 // INT64_MIN, then the conversion fails.
2067 __ j(no_overflow, &done, Label::kNear);
2068 __ bind(&fail);
2069 __ Move(success_reg, 0);
2070 __ bind(&done);
2071 }
2072 break;
2073 }
2074 case kSSEFloat64ToInt64: {
2075 Register output_reg = i.OutputRegister(0);
2076 if (instr->OutputCount() == 1) {
2077 if (instr->InputAt(0)->IsFPRegister()) {
2078 __ Cvttsd2siq(output_reg, i.InputDoubleRegister(0));
2079 } else {
2080 __ Cvttsd2siq(output_reg, i.InputOperand(0));
2081 }
2082 break;
2083 }
2084 DCHECK_EQ(2, instr->OutputCount());
2085 Register success_reg = i.OutputRegister(1);
2086 if (CpuFeatures::IsSupported(SSE4_1) || CpuFeatures::IsSupported(AVX)) {
2087 DoubleRegister rounded = kScratchDoubleReg;
2088 if (instr->InputAt(0)->IsFPRegister()) {
2089 __ Roundsd(rounded, i.InputDoubleRegister(0), kRoundToZero);
2090 __ Cvttsd2siq(output_reg, i.InputDoubleRegister(0));
2091 } else {
2092 __ Roundsd(rounded, i.InputOperand(0), kRoundToZero);
2093 // Convert {rounded} instead of the input operand, to avoid another
2094 // load.
2095 __ Cvttsd2siq(output_reg, rounded);
2096 }
2097 DoubleRegister converted_back = i.TempSimd128Register(0);
2098 __ Cvtqsi2sd(converted_back, output_reg);
2099 // Compare the converted back value to the rounded value, set
2100 // success_reg to 0 if they differ, or 1 on success.
2101 __ Cmpeqsd(converted_back, rounded);
2102 __ Movq(success_reg, converted_back);
2103 __ And(success_reg, Immediate(1));
2104 } else {
2105 // Less efficient code for non-AVX and non-SSE4_1 CPUs.
2106 if (instr->InputAt(0)->IsFPRegister()) {
2107 __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
2108 } else {
2109 __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
2110 }
2111 __ Move(success_reg, 1);
2112 Label done;
2113 Label fail;
2114 __ Move(kScratchDoubleReg, double{INT64_MIN});
2115 if (instr->InputAt(0)->IsFPRegister()) {
2116 __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
2117 } else {
2118 __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
2119 }
2120 // If the input is NaN, then the conversion fails.
2121 __ j(parity_even, &fail, Label::kNear);
2122 // If the input is INT64_MIN, then the conversion succeeds.
2123 __ j(equal, &done, Label::kNear);
2124 __ cmpq(output_reg, Immediate(1));
2125 // If the conversion results in INT64_MIN, but the input was not
2126 // INT64_MIN, then the conversion fails.
2127 __ j(no_overflow, &done, Label::kNear);
2128 __ bind(&fail);
2129 __ Move(success_reg, 0);
2130 __ bind(&done);
2131 }
2132 break;
2133 }
2134 case kSSEFloat32ToUint64: {
2135 Label fail;
2136 if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 0);
2137 if (instr->InputAt(0)->IsFPRegister()) {
2138 __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
2139 } else {
2140 __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
2141 }
2142 if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 1);
2143 __ bind(&fail);
2144 break;
2145 }
2146 case kSSEFloat64ToUint64: {
2147 Label fail;
2148 if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 0);
2149 if (instr->InputAt(0)->IsFPRegister()) {
2150 __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
2151 } else {
2152 __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
2153 }
2154 if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 1);
2155 __ bind(&fail);
2156 break;
2157 }
2158 case kSSEInt32ToFloat64:
2159 if (HasRegisterInput(instr, 0)) {
2160 __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2161 } else {
2162 __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2163 }
2164 break;
2165 case kSSEInt32ToFloat32:
2166 if (HasRegisterInput(instr, 0)) {
2167 __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2168 } else {
2169 __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2170 }
2171 break;
2172 case kSSEInt64ToFloat32:
2173 if (HasRegisterInput(instr, 0)) {
2174 __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2175 } else {
2176 __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2177 }
2178 break;
2179 case kSSEInt64ToFloat64:
2180 if (HasRegisterInput(instr, 0)) {
2181 __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2182 } else {
2183 __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2184 }
2185 break;
2186 case kSSEUint64ToFloat32:
2187 if (HasRegisterInput(instr, 0)) {
2188 __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2189 } else {
2190 __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2191 }
2192 break;
2193 case kSSEUint64ToFloat64:
2194 if (HasRegisterInput(instr, 0)) {
2195 __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2196 } else {
2197 __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2198 }
2199 break;
2200 case kSSEUint32ToFloat64:
2201 if (HasRegisterInput(instr, 0)) {
2202 __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2203 } else {
2204 __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2205 }
2206 break;
2207 case kSSEUint32ToFloat32:
2208 if (HasRegisterInput(instr, 0)) {
2209 __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2210 } else {
2211 __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2212 }
2213 break;
2214 case kSSEFloat64ExtractLowWord32:
2215 if (instr->InputAt(0)->IsFPStackSlot()) {
2216 __ movl(i.OutputRegister(), i.InputOperand(0));
2217 } else {
2218 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2219 }
2220 break;
2221 case kSSEFloat64ExtractHighWord32:
2222 if (instr->InputAt(0)->IsFPStackSlot()) {
2223 __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
2224 } else {
2225 __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
2226 }
2227 break;
2228 case kSSEFloat64InsertLowWord32:
2229 if (HasRegisterInput(instr, 1)) {
2230 __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
2231 } else {
2232 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
2233 }
2234 break;
2235 case kSSEFloat64InsertHighWord32:
2236 if (HasRegisterInput(instr, 1)) {
2237 __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
2238 } else {
2239 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
2240 }
2241 break;
2242 case kSSEFloat64LoadLowWord32:
2243 if (HasRegisterInput(instr, 0)) {
2244 __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2245 } else {
2246 __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
2247 }
2248 break;
2249 case kAVXFloat32Cmp: {
2250 CpuFeatureScope avx_scope(tasm(), AVX);
2251 if (instr->InputAt(1)->IsFPRegister()) {
2252 __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2253 } else {
2254 __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
2255 }
2256 break;
2257 }
2258 case kAVXFloat32Add:
2259 ASSEMBLE_AVX_BINOP(vaddss);
2260 break;
2261 case kAVXFloat32Sub:
2262 ASSEMBLE_AVX_BINOP(vsubss);
2263 break;
2264 case kAVXFloat32Mul:
2265 ASSEMBLE_AVX_BINOP(vmulss);
2266 break;
2267 case kAVXFloat32Div:
2268 ASSEMBLE_AVX_BINOP(vdivss);
2269 // Don't delete this mov. It may improve performance on some CPUs,
2270 // when there is a (v)mulss depending on the result.
2271 __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
2272 break;
2273 case kAVXFloat64Cmp: {
2274 CpuFeatureScope avx_scope(tasm(), AVX);
2275 if (instr->InputAt(1)->IsFPRegister()) {
2276 __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2277 } else {
2278 __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
2279 }
2280 break;
2281 }
2282 case kAVXFloat64Add:
2283 ASSEMBLE_AVX_BINOP(vaddsd);
2284 break;
2285 case kAVXFloat64Sub:
2286 ASSEMBLE_AVX_BINOP(vsubsd);
2287 break;
2288 case kAVXFloat64Mul:
2289 ASSEMBLE_AVX_BINOP(vmulsd);
2290 break;
2291 case kAVXFloat64Div:
2292 ASSEMBLE_AVX_BINOP(vdivsd);
2293 // Don't delete this mov. It may improve performance on some CPUs,
2294 // when there is a (v)mulsd depending on the result.
2295 __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
2296 break;
2297 case kX64Float32Abs: {
2298 __ Absps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2299 kScratchRegister);
2300 break;
2301 }
2302 case kX64Float32Neg: {
2303 __ Negps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2304 kScratchRegister);
2305 break;
2306 }
2307 case kX64F64x2Abs:
2308 case kX64Float64Abs: {
2309 __ Abspd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2310 kScratchRegister);
2311 break;
2312 }
2313 case kX64F64x2Neg:
2314 case kX64Float64Neg: {
2315 __ Negpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2316 kScratchRegister);
2317 break;
2318 }
2319 case kSSEFloat64SilenceNaN:
2320 __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
2321 __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
2322 break;
2323 case kX64Movsxbl:
2324 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2325 ASSEMBLE_MOVX(movsxbl);
2326 __ AssertZeroExtended(i.OutputRegister());
2327 break;
2328 case kX64Movzxbl:
2329 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2330 ASSEMBLE_MOVX(movzxbl);
2331 __ AssertZeroExtended(i.OutputRegister());
2332 break;
2333 case kX64Movsxbq:
2334 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2335 ASSEMBLE_MOVX(movsxbq);
2336 break;
2337 case kX64Movzxbq:
2338 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2339 ASSEMBLE_MOVX(movzxbq);
2340 __ AssertZeroExtended(i.OutputRegister());
2341 break;
2342 case kX64Movb: {
2343 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2344 size_t index = 0;
2345 Operand operand = i.MemoryOperand(&index);
2346 if (HasImmediateInput(instr, index)) {
2347 Immediate value(Immediate(i.InputInt8(index)));
2348 EmitTSANAwareStore<std::memory_order_relaxed>(
2349 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2350 MachineRepresentation::kWord8);
2351 } else {
2352 Register value(i.InputRegister(index));
2353 EmitTSANAwareStore<std::memory_order_relaxed>(
2354 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2355 MachineRepresentation::kWord8);
2356 }
2357 break;
2358 }
2359 case kX64Movsxwl:
2360 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2361 ASSEMBLE_MOVX(movsxwl);
2362 __ AssertZeroExtended(i.OutputRegister());
2363 break;
2364 case kX64Movzxwl:
2365 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2366 ASSEMBLE_MOVX(movzxwl);
2367 __ AssertZeroExtended(i.OutputRegister());
2368 break;
2369 case kX64Movsxwq:
2370 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2371 ASSEMBLE_MOVX(movsxwq);
2372 break;
2373 case kX64Movzxwq:
2374 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2375 ASSEMBLE_MOVX(movzxwq);
2376 __ AssertZeroExtended(i.OutputRegister());
2377 break;
2378 case kX64Movw: {
2379 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2380 size_t index = 0;
2381 Operand operand = i.MemoryOperand(&index);
2382 if (HasImmediateInput(instr, index)) {
2383 Immediate value(Immediate(i.InputInt16(index)));
2384 EmitTSANAwareStore<std::memory_order_relaxed>(
2385 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2386 MachineRepresentation::kWord16);
2387 } else {
2388 Register value(i.InputRegister(index));
2389 EmitTSANAwareStore<std::memory_order_relaxed>(
2390 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2391 MachineRepresentation::kWord16);
2392 }
2393 break;
2394 }
2395 case kX64Movl:
2396 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2397 if (instr->HasOutput()) {
2398 if (HasAddressingMode(instr)) {
2399 Operand address(i.MemoryOperand());
2400 __ movl(i.OutputRegister(), address);
2401 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2402 DetermineStubCallMode(), kInt32Size);
2403 } else {
2404 if (HasRegisterInput(instr, 0)) {
2405 __ movl(i.OutputRegister(), i.InputRegister(0));
2406 } else {
2407 __ movl(i.OutputRegister(), i.InputOperand(0));
2408 }
2409 }
2410 __ AssertZeroExtended(i.OutputRegister());
2411 } else {
2412 size_t index = 0;
2413 Operand operand = i.MemoryOperand(&index);
2414 if (HasImmediateInput(instr, index)) {
2415 Immediate value(i.InputImmediate(index));
2416 EmitTSANAwareStore<std::memory_order_relaxed>(
2417 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2418 MachineRepresentation::kWord32);
2419 } else {
2420 Register value(i.InputRegister(index));
2421 EmitTSANAwareStore<std::memory_order_relaxed>(
2422 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2423 MachineRepresentation::kWord32);
2424 }
2425 }
2426 break;
2427 case kX64Movsxlq:
2428 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2429 ASSEMBLE_MOVX(movsxlq);
2430 break;
2431 case kX64MovqDecompressTaggedSigned: {
2432 CHECK(instr->HasOutput());
2433 Operand address(i.MemoryOperand());
2434 __ DecompressTaggedSigned(i.OutputRegister(), address);
2435 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2436 DetermineStubCallMode(), kTaggedSize);
2437 break;
2438 }
2439 case kX64MovqDecompressTaggedPointer: {
2440 CHECK(instr->HasOutput());
2441 Operand address(i.MemoryOperand());
2442 __ DecompressTaggedPointer(i.OutputRegister(), address);
2443 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2444 DetermineStubCallMode(), kTaggedSize);
2445 break;
2446 }
2447 case kX64MovqDecompressAnyTagged: {
2448 CHECK(instr->HasOutput());
2449 Operand address(i.MemoryOperand());
2450 __ DecompressAnyTagged(i.OutputRegister(), address);
2451 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2452 DetermineStubCallMode(), kTaggedSize);
2453 break;
2454 }
2455 case kX64MovqCompressTagged: {
2456 CHECK(!instr->HasOutput());
2457 size_t index = 0;
2458 Operand operand = i.MemoryOperand(&index);
2459 if (HasImmediateInput(instr, index)) {
2460 Immediate value(i.InputImmediate(index));
2461 EmitTSANAwareStore<std::memory_order_relaxed>(
2462 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2463 MachineRepresentation::kTagged);
2464 } else {
2465 Register value(i.InputRegister(index));
2466 EmitTSANAwareStore<std::memory_order_relaxed>(
2467 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2468 MachineRepresentation::kTagged);
2469 }
2470 break;
2471 }
2472 case kX64MovqDecodeSandboxedPointer: {
2473 CHECK(instr->HasOutput());
2474 Operand address(i.MemoryOperand());
2475 Register dst = i.OutputRegister();
2476 __ movq(dst, address);
2477 __ DecodeSandboxedPointer(dst);
2478 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2479 DetermineStubCallMode(),
2480 kSystemPointerSize);
2481 break;
2482 }
2483 case kX64MovqEncodeSandboxedPointer: {
2484 CHECK(!instr->HasOutput());
2485 size_t index = 0;
2486 Operand operand = i.MemoryOperand(&index);
2487 CHECK(!HasImmediateInput(instr, index));
2488 Register value(i.InputRegister(index));
2489 EmitTSANAwareStore<std::memory_order_relaxed>(
2490 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2491 MachineRepresentation::kSandboxedPointer);
2492 break;
2493 }
2494 case kX64Movq:
2495 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2496 if (instr->HasOutput()) {
2497 Operand address(i.MemoryOperand());
2498 __ movq(i.OutputRegister(), address);
2499 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2500 DetermineStubCallMode(), kInt64Size);
2501 } else {
2502 size_t index = 0;
2503 Operand operand = i.MemoryOperand(&index);
2504 if (HasImmediateInput(instr, index)) {
2505 Immediate value(i.InputImmediate(index));
2506 EmitTSANAwareStore<std::memory_order_relaxed>(
2507 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2508 MachineRepresentation::kWord64);
2509 } else {
2510 Register value(i.InputRegister(index));
2511 EmitTSANAwareStore<std::memory_order_relaxed>(
2512 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2513 MachineRepresentation::kWord64);
2514 }
2515 }
2516 break;
2517 case kX64Movss:
2518 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2519 if (instr->HasOutput()) {
2520 __ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
2521 } else {
2522 size_t index = 0;
2523 Operand operand = i.MemoryOperand(&index);
2524 __ Movss(operand, i.InputDoubleRegister(index));
2525 }
2526 break;
2527 case kX64Movsd: {
2528 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2529 if (instr->HasOutput()) {
2530 __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
2531 } else {
2532 size_t index = 0;
2533 Operand operand = i.MemoryOperand(&index);
2534 __ Movsd(operand, i.InputDoubleRegister(index));
2535 }
2536 break;
2537 }
2538 case kX64Movdqu: {
2539 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2540 if (instr->HasOutput()) {
2541 __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
2542 } else {
2543 size_t index = 0;
2544 Operand operand = i.MemoryOperand(&index);
2545 __ Movdqu(operand, i.InputSimd128Register(index));
2546 }
2547 break;
2548 }
2549 case kX64BitcastFI:
2550 if (instr->InputAt(0)->IsFPStackSlot()) {
2551 __ movl(i.OutputRegister(), i.InputOperand(0));
2552 } else {
2553 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2554 }
2555 break;
2556 case kX64BitcastDL:
2557 if (instr->InputAt(0)->IsFPStackSlot()) {
2558 __ movq(i.OutputRegister(), i.InputOperand(0));
2559 } else {
2560 __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
2561 }
2562 break;
2563 case kX64BitcastIF:
2564 if (HasRegisterInput(instr, 0)) {
2565 __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2566 } else {
2567 __ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
2568 }
2569 break;
2570 case kX64BitcastLD:
2571 if (HasRegisterInput(instr, 0)) {
2572 __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
2573 } else {
2574 __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
2575 }
2576 break;
2577 case kX64Lea32: {
2578 AddressingMode mode = AddressingModeField::decode(instr->opcode());
2579 // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
2580 // and addressing mode just happens to work out. The "addl"/"subl" forms
2581 // in these cases are faster based on measurements.
2582 if (i.InputRegister(0) == i.OutputRegister()) {
2583 if (mode == kMode_MRI) {
2584 int32_t constant_summand = i.InputInt32(1);
2585 DCHECK_NE(0, constant_summand);
2586 if (constant_summand > 0) {
2587 __ addl(i.OutputRegister(), Immediate(constant_summand));
2588 } else {
2589 __ subl(i.OutputRegister(),
2590 Immediate(base::NegateWithWraparound(constant_summand)));
2591 }
2592 } else if (mode == kMode_MR1) {
2593 if (i.InputRegister(1) == i.OutputRegister()) {
2594 __ shll(i.OutputRegister(), Immediate(1));
2595 } else {
2596 __ addl(i.OutputRegister(), i.InputRegister(1));
2597 }
2598 } else if (mode == kMode_M2) {
2599 __ shll(i.OutputRegister(), Immediate(1));
2600 } else if (mode == kMode_M4) {
2601 __ shll(i.OutputRegister(), Immediate(2));
2602 } else if (mode == kMode_M8) {
2603 __ shll(i.OutputRegister(), Immediate(3));
2604 } else {
2605 __ leal(i.OutputRegister(), i.MemoryOperand());
2606 }
2607 } else if (mode == kMode_MR1 &&
2608 i.InputRegister(1) == i.OutputRegister()) {
2609 __ addl(i.OutputRegister(), i.InputRegister(0));
2610 } else {
2611 __ leal(i.OutputRegister(), i.MemoryOperand());
2612 }
2613 __ AssertZeroExtended(i.OutputRegister());
2614 break;
2615 }
2616 case kX64Lea: {
2617 AddressingMode mode = AddressingModeField::decode(instr->opcode());
2618 // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
2619 // and addressing mode just happens to work out. The "addq"/"subq" forms
2620 // in these cases are faster based on measurements.
2621 if (i.InputRegister(0) == i.OutputRegister()) {
2622 if (mode == kMode_MRI) {
2623 int32_t constant_summand = i.InputInt32(1);
2624 if (constant_summand > 0) {
2625 __ addq(i.OutputRegister(), Immediate(constant_summand));
2626 } else if (constant_summand < 0) {
2627 __ subq(i.OutputRegister(), Immediate(-constant_summand));
2628 }
2629 } else if (mode == kMode_MR1) {
2630 if (i.InputRegister(1) == i.OutputRegister()) {
2631 __ shlq(i.OutputRegister(), Immediate(1));
2632 } else {
2633 __ addq(i.OutputRegister(), i.InputRegister(1));
2634 }
2635 } else if (mode == kMode_M2) {
2636 __ shlq(i.OutputRegister(), Immediate(1));
2637 } else if (mode == kMode_M4) {
2638 __ shlq(i.OutputRegister(), Immediate(2));
2639 } else if (mode == kMode_M8) {
2640 __ shlq(i.OutputRegister(), Immediate(3));
2641 } else {
2642 __ leaq(i.OutputRegister(), i.MemoryOperand());
2643 }
2644 } else if (mode == kMode_MR1 &&
2645 i.InputRegister(1) == i.OutputRegister()) {
2646 __ addq(i.OutputRegister(), i.InputRegister(0));
2647 } else {
2648 __ leaq(i.OutputRegister(), i.MemoryOperand());
2649 }
2650 break;
2651 }
2652 case kX64Dec32:
2653 __ decl(i.OutputRegister());
2654 break;
2655 case kX64Inc32:
2656 __ incl(i.OutputRegister());
2657 break;
2658 case kX64Push: {
2659 int stack_decrement = i.InputInt32(0);
2660 int slots = stack_decrement / kSystemPointerSize;
2661 // Whenever codegen uses pushq, we need to check if stack_decrement
2662 // contains any extra padding and adjust the stack before the pushq.
2663 if (HasImmediateInput(instr, 1)) {
2664 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
2665 __ pushq(i.InputImmediate(1));
2666 } else if (HasAddressingMode(instr)) {
2667 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
2668 size_t index = 1;
2669 Operand operand = i.MemoryOperand(&index);
2670 __ pushq(operand);
2671 } else {
2672 InstructionOperand* input = instr->InputAt(1);
2673 if (input->IsRegister()) {
2674 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
2675 __ pushq(i.InputRegister(1));
2676 } else if (input->IsFloatRegister() || input->IsDoubleRegister()) {
2677 DCHECK_GE(stack_decrement, kSystemPointerSize);
2678 __ AllocateStackSpace(stack_decrement);
2679 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
2680 } else if (input->IsSimd128Register()) {
2681 DCHECK_GE(stack_decrement, kSimd128Size);
2682 __ AllocateStackSpace(stack_decrement);
2683 // TODO(bbudge) Use Movaps when slots are aligned.
2684 __ Movups(Operand(rsp, 0), i.InputSimd128Register(1));
2685 } else if (input->IsStackSlot() || input->IsFloatStackSlot() ||
2686 input->IsDoubleStackSlot()) {
2687 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
2688 __ pushq(i.InputOperand(1));
2689 } else {
2690 DCHECK(input->IsSimd128StackSlot());
2691 DCHECK_GE(stack_decrement, kSimd128Size);
2692 // TODO(bbudge) Use Movaps when slots are aligned.
2693 __ Movups(kScratchDoubleReg, i.InputOperand(1));
2694 __ AllocateStackSpace(stack_decrement);
2695 __ Movups(Operand(rsp, 0), kScratchDoubleReg);
2696 }
2697 }
2698 frame_access_state()->IncreaseSPDelta(slots);
2699 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2700 stack_decrement);
2701 break;
2702 }
2703 case kX64Poke: {
2704 int slot = MiscField::decode(instr->opcode());
2705 if (HasImmediateInput(instr, 0)) {
2706 __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
2707 } else if (instr->InputAt(0)->IsFPRegister()) {
2708 LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
2709 if (op->representation() == MachineRepresentation::kFloat64) {
2710 __ Movsd(Operand(rsp, slot * kSystemPointerSize),
2711 i.InputDoubleRegister(0));
2712 } else {
2713 DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
2714 __ Movss(Operand(rsp, slot * kSystemPointerSize),
2715 i.InputFloatRegister(0));
2716 }
2717 } else {
2718 __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
2719 }
2720 break;
2721 }
2722 case kX64Peek: {
2723 int reverse_slot = i.InputInt32(0);
2724 int offset =
2725 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
2726 if (instr->OutputAt(0)->IsFPRegister()) {
2727 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
2728 if (op->representation() == MachineRepresentation::kFloat64) {
2729 __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
2730 } else if (op->representation() == MachineRepresentation::kFloat32) {
2731 __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
2732 } else {
2733 DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
2734 __ Movdqu(i.OutputSimd128Register(), Operand(rbp, offset));
2735 }
2736 } else {
2737 __ movq(i.OutputRegister(), Operand(rbp, offset));
2738 }
2739 break;
2740 }
2741 case kX64F64x2Splat: {
2742 XMMRegister dst = i.OutputSimd128Register();
2743 if (instr->InputAt(0)->IsFPRegister()) {
2744 __ Movddup(dst, i.InputDoubleRegister(0));
2745 } else {
2746 __ Movddup(dst, i.InputOperand(0));
2747 }
2748 break;
2749 }
2750 case kX64F64x2ExtractLane: {
2751 __ F64x2ExtractLane(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2752 i.InputUint8(1));
2753 break;
2754 }
2755 case kX64F64x2ReplaceLane: {
2756 __ F64x2ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2757 i.InputDoubleRegister(2), i.InputInt8(1));
2758 break;
2759 }
2760 case kX64F64x2Sqrt: {
2761 __ Sqrtpd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2762 break;
2763 }
2764 case kX64F64x2Add: {
2765 ASSEMBLE_SIMD_BINOP(addpd);
2766 break;
2767 }
2768 case kX64F64x2Sub: {
2769 ASSEMBLE_SIMD_BINOP(subpd);
2770 break;
2771 }
2772 case kX64F64x2Mul: {
2773 ASSEMBLE_SIMD_BINOP(mulpd);
2774 break;
2775 }
2776 case kX64F64x2Div: {
2777 ASSEMBLE_SIMD_BINOP(divpd);
2778 break;
2779 }
2780 case kX64F64x2Min: {
2781 // Avoids a move in no-AVX case if dst = src0.
2782 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2783 __ F64x2Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
2784 i.InputSimd128Register(1), kScratchDoubleReg);
2785 break;
2786 }
2787 case kX64F64x2Max: {
2788 // Avoids a move in no-AVX case if dst = src0.
2789 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2790 __ F64x2Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
2791 i.InputSimd128Register(1), kScratchDoubleReg);
2792 break;
2793 }
2794 case kX64F64x2Eq: {
2795 ASSEMBLE_SIMD_BINOP(cmpeqpd);
2796 break;
2797 }
2798 case kX64F64x2Ne: {
2799 ASSEMBLE_SIMD_BINOP(cmpneqpd);
2800 break;
2801 }
2802 case kX64F64x2Lt: {
2803 ASSEMBLE_SIMD_BINOP(cmpltpd);
2804 break;
2805 }
2806 case kX64F64x2Le: {
2807 ASSEMBLE_SIMD_BINOP(cmplepd);
2808 break;
2809 }
2810 case kX64F64x2Qfma: {
2811 __ F64x2Qfma(i.OutputSimd128Register(), i.InputSimd128Register(0),
2812 i.InputSimd128Register(1), i.InputSimd128Register(2),
2813 kScratchDoubleReg);
2814 break;
2815 }
2816 case kX64F64x2Qfms: {
2817 __ F64x2Qfms(i.OutputSimd128Register(), i.InputSimd128Register(0),
2818 i.InputSimd128Register(1), i.InputSimd128Register(2),
2819 kScratchDoubleReg);
2820 break;
2821 }
2822 case kX64F64x2ConvertLowI32x4S: {
2823 __ Cvtdq2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2824 break;
2825 }
2826 case kX64F64x2ConvertLowI32x4U: {
2827 __ F64x2ConvertLowI32x4U(i.OutputSimd128Register(),
2828 i.InputSimd128Register(0), kScratchRegister);
2829 break;
2830 }
2831 case kX64F64x2PromoteLowF32x4: {
2832 if (HasAddressingMode(instr)) {
2833 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2834 __ Cvtps2pd(i.OutputSimd128Register(), i.MemoryOperand());
2835 } else {
2836 __ Cvtps2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2837 }
2838 break;
2839 }
2840 case kX64F32x4DemoteF64x2Zero: {
2841 __ Cvtpd2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2842 break;
2843 }
2844 case kX64I32x4TruncSatF64x2SZero: {
2845 __ I32x4TruncSatF64x2SZero(i.OutputSimd128Register(),
2846 i.InputSimd128Register(0), kScratchDoubleReg,
2847 kScratchRegister);
2848 break;
2849 }
2850 case kX64I32x4TruncSatF64x2UZero: {
2851 __ I32x4TruncSatF64x2UZero(i.OutputSimd128Register(),
2852 i.InputSimd128Register(0), kScratchDoubleReg,
2853 kScratchRegister);
2854 break;
2855 }
2856 case kX64F32x4Splat: {
2857 __ F32x4Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0));
2858 break;
2859 }
2860 case kX64F32x4ExtractLane: {
2861 __ F32x4ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
2862 i.InputUint8(1));
2863 break;
2864 }
2865 case kX64F32x4ReplaceLane: {
2866 // The insertps instruction uses imm8[5:4] to indicate the lane
2867 // that needs to be replaced.
2868 byte select = i.InputInt8(1) << 4 & 0x30;
2869 if (instr->InputAt(2)->IsFPRegister()) {
2870 __ Insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
2871 select);
2872 } else {
2873 __ Insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
2874 }
2875 break;
2876 }
2877 case kX64F32x4SConvertI32x4: {
2878 __ Cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2879 break;
2880 }
2881 case kX64F32x4UConvertI32x4: {
2882 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2883 DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
2884 XMMRegister dst = i.OutputSimd128Register();
2885 __ Pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
2886 __ Pblendw(kScratchDoubleReg, dst, uint8_t{0x55}); // get lo 16 bits
2887 __ Psubd(dst, kScratchDoubleReg); // get hi 16 bits
2888 __ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
2889 __ Psrld(dst, byte{1}); // divide by 2 to get in unsigned range
2890 __ Cvtdq2ps(dst, dst); // convert hi exactly
2891 __ Addps(dst, dst); // double hi, exactly
2892 __ Addps(dst, kScratchDoubleReg); // add hi and lo, may round.
2893 break;
2894 }
2895 case kX64F32x4Abs: {
2896 XMMRegister dst = i.OutputSimd128Register();
2897 XMMRegister src = i.InputSimd128Register(0);
2898 if (dst == src) {
2899 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2900 __ Psrld(kScratchDoubleReg, byte{1});
2901 __ Andps(dst, kScratchDoubleReg);
2902 } else {
2903 __ Pcmpeqd(dst, dst);
2904 __ Psrld(dst, byte{1});
2905 __ Andps(dst, src);
2906 }
2907 break;
2908 }
2909 case kX64F32x4Neg: {
2910 XMMRegister dst = i.OutputSimd128Register();
2911 XMMRegister src = i.InputSimd128Register(0);
2912 if (dst == src) {
2913 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2914 __ Pslld(kScratchDoubleReg, byte{31});
2915 __ Xorps(dst, kScratchDoubleReg);
2916 } else {
2917 __ Pcmpeqd(dst, dst);
2918 __ Pslld(dst, byte{31});
2919 __ Xorps(dst, src);
2920 }
2921 break;
2922 }
2923 case kX64F32x4Sqrt: {
2924 __ Sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2925 break;
2926 }
2927 case kX64F32x4RecipApprox: {
2928 __ Rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2929 break;
2930 }
2931 case kX64F32x4RecipSqrtApprox: {
2932 __ Rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2933 break;
2934 }
2935 case kX64F32x4Add: {
2936 ASSEMBLE_SIMD_BINOP(addps);
2937 break;
2938 }
2939 case kX64F32x4Sub: {
2940 ASSEMBLE_SIMD_BINOP(subps);
2941 break;
2942 }
2943 case kX64F32x4Mul: {
2944 ASSEMBLE_SIMD_BINOP(mulps);
2945 break;
2946 }
2947 case kX64F32x4Div: {
2948 ASSEMBLE_SIMD_BINOP(divps);
2949 break;
2950 }
2951 case kX64F32x4Min: {
2952 __ F32x4Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
2953 i.InputSimd128Register(1), kScratchDoubleReg);
2954 break;
2955 }
2956 case kX64F32x4Max: {
2957 __ F32x4Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
2958 i.InputSimd128Register(1), kScratchDoubleReg);
2959 break;
2960 }
2961 case kX64F32x4Eq: {
2962 ASSEMBLE_SIMD_BINOP(cmpeqps);
2963 break;
2964 }
2965 case kX64F32x4Ne: {
2966 ASSEMBLE_SIMD_BINOP(cmpneqps);
2967 break;
2968 }
2969 case kX64F32x4Lt: {
2970 ASSEMBLE_SIMD_BINOP(cmpltps);
2971 break;
2972 }
2973 case kX64F32x4Le: {
2974 ASSEMBLE_SIMD_BINOP(cmpleps);
2975 break;
2976 }
2977 case kX64F32x4Qfma: {
2978 __ F32x4Qfma(i.OutputSimd128Register(), i.InputSimd128Register(0),
2979 i.InputSimd128Register(1), i.InputSimd128Register(2),
2980 kScratchDoubleReg);
2981 break;
2982 }
2983 case kX64F32x4Qfms: {
2984 __ F32x4Qfms(i.OutputSimd128Register(), i.InputSimd128Register(0),
2985 i.InputSimd128Register(1), i.InputSimd128Register(2),
2986 kScratchDoubleReg);
2987 break;
2988 }
2989 case kX64Minps: {
2990 ASSEMBLE_SIMD_BINOP(minps);
2991 break;
2992 }
2993 case kX64Maxps: {
2994 ASSEMBLE_SIMD_BINOP(maxps);
2995 break;
2996 }
2997 case kX64F32x4Round: {
2998 RoundingMode const mode =
2999 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
3000 __ Roundps(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
3001 break;
3002 }
3003 case kX64F64x2Round: {
3004 RoundingMode const mode =
3005 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
3006 __ Roundpd(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
3007 break;
3008 }
3009 case kX64Minpd: {
3010 ASSEMBLE_SIMD_BINOP(minpd);
3011 break;
3012 }
3013 case kX64Maxpd: {
3014 ASSEMBLE_SIMD_BINOP(maxpd);
3015 break;
3016 }
3017 case kX64I64x2Splat: {
3018 XMMRegister dst = i.OutputSimd128Register();
3019 if (HasRegisterInput(instr, 0)) {
3020 __ Movq(dst, i.InputRegister(0));
3021 __ Movddup(dst, dst);
3022 } else {
3023 __ Movddup(dst, i.InputOperand(0));
3024 }
3025 break;
3026 }
3027 case kX64I64x2ExtractLane: {
3028 __ Pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
3029 break;
3030 }
3031 case kX64I64x2Abs: {
3032 __ I64x2Abs(i.OutputSimd128Register(), i.InputSimd128Register(0),
3033 kScratchDoubleReg);
3034 break;
3035 }
3036 case kX64I64x2Neg: {
3037 __ I64x2Neg(i.OutputSimd128Register(), i.InputSimd128Register(0),
3038 kScratchDoubleReg);
3039 break;
3040 }
3041 case kX64I64x2BitMask: {
3042 __ Movmskpd(i.OutputRegister(), i.InputSimd128Register(0));
3043 break;
3044 }
3045 case kX64I64x2Shl: {
3046 // Take shift value modulo 2^6.
3047 ASSEMBLE_SIMD_SHIFT(psllq, 6);
3048 break;
3049 }
3050 case kX64I64x2ShrS: {
3051 // TODO(zhin): there is vpsraq but requires AVX512
3052 XMMRegister dst = i.OutputSimd128Register();
3053 XMMRegister src = i.InputSimd128Register(0);
3054 if (HasImmediateInput(instr, 1)) {
3055 __ I64x2ShrS(dst, src, i.InputInt6(1), kScratchDoubleReg);
3056 } else {
3057 __ I64x2ShrS(dst, src, i.InputRegister(1), kScratchDoubleReg,
3058 i.TempSimd128Register(0), kScratchRegister);
3059 }
3060 break;
3061 }
3062 case kX64I64x2Add: {
3063 ASSEMBLE_SIMD_BINOP(paddq);
3064 break;
3065 }
3066 case kX64I64x2Sub: {
3067 ASSEMBLE_SIMD_BINOP(psubq);
3068 break;
3069 }
3070 case kX64I64x2Mul: {
3071 __ I64x2Mul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3072 i.InputSimd128Register(1), i.TempSimd128Register(0),
3073 kScratchDoubleReg);
3074 break;
3075 }
3076 case kX64I64x2Eq: {
3077 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3078 ASSEMBLE_SIMD_BINOP(pcmpeqq);
3079 break;
3080 }
3081 case kX64I64x2Ne: {
3082 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3083 __ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1));
3084 __ Pcmpeqq(kScratchDoubleReg, kScratchDoubleReg);
3085 __ Pxor(i.OutputSimd128Register(), kScratchDoubleReg);
3086 break;
3087 }
3088 case kX64I64x2GtS: {
3089 __ I64x2GtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
3090 i.InputSimd128Register(1), kScratchDoubleReg);
3091 break;
3092 }
3093 case kX64I64x2GeS: {
3094 __ I64x2GeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
3095 i.InputSimd128Register(1), kScratchDoubleReg);
3096 break;
3097 }
3098 case kX64I64x2ShrU: {
3099 // Take shift value modulo 2^6.
3100 ASSEMBLE_SIMD_SHIFT(psrlq, 6);
3101 break;
3102 }
3103 case kX64I64x2ExtMulLowI32x4S: {
3104 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3105 i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
3106 /*is_signed=*/true);
3107 break;
3108 }
3109 case kX64I64x2ExtMulHighI32x4S: {
3110 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3111 i.InputSimd128Register(1), kScratchDoubleReg,
3112 /*low=*/false,
3113 /*is_signed=*/true);
3114 break;
3115 }
3116 case kX64I64x2ExtMulLowI32x4U: {
3117 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3118 i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
3119 /*is_signed=*/false);
3120 break;
3121 }
3122 case kX64I64x2ExtMulHighI32x4U: {
3123 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3124 i.InputSimd128Register(1), kScratchDoubleReg,
3125 /*low=*/false,
3126 /*is_signed=*/false);
3127 break;
3128 }
3129 case kX64I64x2SConvertI32x4Low: {
3130 __ Pmovsxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
3131 break;
3132 }
3133 case kX64I64x2SConvertI32x4High: {
3134 __ I64x2SConvertI32x4High(i.OutputSimd128Register(),
3135 i.InputSimd128Register(0));
3136 break;
3137 }
3138 case kX64I64x2UConvertI32x4Low: {
3139 __ Pmovzxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
3140 break;
3141 }
3142 case kX64I64x2UConvertI32x4High: {
3143 __ I64x2UConvertI32x4High(i.OutputSimd128Register(),
3144 i.InputSimd128Register(0), kScratchDoubleReg);
3145 break;
3146 }
3147 case kX64I32x4Splat: {
3148 XMMRegister dst = i.OutputSimd128Register();
3149 if (HasRegisterInput(instr, 0)) {
3150 __ Movd(dst, i.InputRegister(0));
3151 } else {
3152 // TODO(v8:9198): Pshufd can load from aligned memory once supported.
3153 __ Movd(dst, i.InputOperand(0));
3154 }
3155 __ Pshufd(dst, dst, uint8_t{0x0});
3156 break;
3157 }
3158 case kX64I32x4ExtractLane: {
3159 __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
3160 break;
3161 }
3162 case kX64I32x4SConvertF32x4: {
3163 __ I32x4SConvertF32x4(i.OutputSimd128Register(),
3164 i.InputSimd128Register(0), kScratchDoubleReg,
3165 kScratchRegister);
3166 break;
3167 }
3168 case kX64I32x4SConvertI16x8Low: {
3169 __ Pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
3170 break;
3171 }
3172 case kX64I32x4SConvertI16x8High: {
3173 __ I32x4SConvertI16x8High(i.OutputSimd128Register(),
3174 i.InputSimd128Register(0));
3175 break;
3176 }
3177 case kX64I32x4Neg: {
3178 XMMRegister dst = i.OutputSimd128Register();
3179 XMMRegister src = i.InputSimd128Register(0);
3180 if (dst == src) {
3181 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3182 __ Psignd(dst, kScratchDoubleReg);
3183 } else {
3184 __ Pxor(dst, dst);
3185 __ Psubd(dst, src);
3186 }
3187 break;
3188 }
3189 case kX64I32x4Shl: {
3190 // Take shift value modulo 2^5.
3191 ASSEMBLE_SIMD_SHIFT(pslld, 5);
3192 break;
3193 }
3194 case kX64I32x4ShrS: {
3195 // Take shift value modulo 2^5.
3196 ASSEMBLE_SIMD_SHIFT(psrad, 5);
3197 break;
3198 }
3199 case kX64I32x4Add: {
3200 ASSEMBLE_SIMD_BINOP(paddd);
3201 break;
3202 }
3203 case kX64I32x4Sub: {
3204 ASSEMBLE_SIMD_BINOP(psubd);
3205 break;
3206 }
3207 case kX64I32x4Mul: {
3208 ASSEMBLE_SIMD_BINOP(pmulld);
3209 break;
3210 }
3211 case kX64I32x4MinS: {
3212 ASSEMBLE_SIMD_BINOP(pminsd);
3213 break;
3214 }
3215 case kX64I32x4MaxS: {
3216 ASSEMBLE_SIMD_BINOP(pmaxsd);
3217 break;
3218 }
3219 case kX64I32x4Eq: {
3220 ASSEMBLE_SIMD_BINOP(pcmpeqd);
3221 break;
3222 }
3223 case kX64I32x4Ne: {
3224 __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
3225 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3226 __ Pxor(i.OutputSimd128Register(), kScratchDoubleReg);
3227 break;
3228 }
3229 case kX64I32x4GtS: {
3230 ASSEMBLE_SIMD_BINOP(pcmpgtd);
3231 break;
3232 }
3233 case kX64I32x4GeS: {
3234 XMMRegister dst = i.OutputSimd128Register();
3235 XMMRegister src = i.InputSimd128Register(1);
3236 __ Pminsd(dst, src);
3237 __ Pcmpeqd(dst, src);
3238 break;
3239 }
3240 case kX64I32x4UConvertF32x4: {
3241 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3242 XMMRegister dst = i.OutputSimd128Register();
3243 XMMRegister tmp = i.TempSimd128Register(0);
3244 XMMRegister tmp2 = i.TempSimd128Register(1);
3245 // NAN->0, negative->0
3246 __ Pxor(tmp2, tmp2);
3247 __ Maxps(dst, tmp2);
3248 // scratch: float representation of max_signed
3249 __ Pcmpeqd(tmp2, tmp2);
3250 __ Psrld(tmp2, uint8_t{1}); // 0x7fffffff
3251 __ Cvtdq2ps(tmp2, tmp2); // 0x4f000000
3252 // tmp: convert (src-max_signed).
3253 // Positive overflow lanes -> 0x7FFFFFFF
3254 // Negative lanes -> 0
3255 __ Movaps(tmp, dst);
3256 __ Subps(tmp, tmp2);
3257 __ Cmpleps(tmp2, tmp);
3258 __ Cvttps2dq(tmp, tmp);
3259 __ Pxor(tmp, tmp2);
3260 __ Pxor(tmp2, tmp2);
3261 __ Pmaxsd(tmp, tmp2);
3262 // convert. Overflow lanes above max_signed will be 0x80000000
3263 __ Cvttps2dq(dst, dst);
3264 // Add (src-max_signed) for overflow lanes.
3265 __ Paddd(dst, tmp);
3266 break;
3267 }
3268 case kX64I32x4UConvertI16x8Low: {
3269 __ Pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
3270 break;
3271 }
3272 case kX64I32x4UConvertI16x8High: {
3273 __ I32x4UConvertI16x8High(i.OutputSimd128Register(),
3274 i.InputSimd128Register(0), kScratchDoubleReg);
3275 break;
3276 }
3277 case kX64I32x4ShrU: {
3278 // Take shift value modulo 2^5.
3279 ASSEMBLE_SIMD_SHIFT(psrld, 5);
3280 break;
3281 }
3282 case kX64I32x4MinU: {
3283 ASSEMBLE_SIMD_BINOP(pminud);
3284 break;
3285 }
3286 case kX64I32x4MaxU: {
3287 ASSEMBLE_SIMD_BINOP(pmaxud);
3288 break;
3289 }
3290 case kX64I32x4GtU: {
3291 XMMRegister dst = i.OutputSimd128Register();
3292 XMMRegister src = i.InputSimd128Register(1);
3293 __ Pmaxud(dst, src);
3294 __ Pcmpeqd(dst, src);
3295 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3296 __ Pxor(dst, kScratchDoubleReg);
3297 break;
3298 }
3299 case kX64I32x4GeU: {
3300 XMMRegister dst = i.OutputSimd128Register();
3301 XMMRegister src = i.InputSimd128Register(1);
3302 __ Pminud(dst, src);
3303 __ Pcmpeqd(dst, src);
3304 break;
3305 }
3306 case kX64I32x4Abs: {
3307 __ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
3308 break;
3309 }
3310 case kX64I32x4BitMask: {
3311 __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
3312 break;
3313 }
3314 case kX64I32x4DotI16x8S: {
3315 ASSEMBLE_SIMD_BINOP(pmaddwd);
3316 break;
3317 }
3318 case kX64I32x4ExtAddPairwiseI16x8S: {
3319 __ I32x4ExtAddPairwiseI16x8S(i.OutputSimd128Register(),
3320 i.InputSimd128Register(0), kScratchRegister);
3321 break;
3322 }
3323 case kX64I32x4ExtAddPairwiseI16x8U: {
3324 __ I32x4ExtAddPairwiseI16x8U(i.OutputSimd128Register(),
3325 i.InputSimd128Register(0),
3326 kScratchDoubleReg);
3327 break;
3328 }
3329 case kX64S128Const: {
3330 // Emit code for generic constants as all zeros, or ones cases will be
3331 // handled separately by the selector.
3332 XMMRegister dst = i.OutputSimd128Register();
3333 uint32_t imm[4] = {};
3334 for (int j = 0; j < 4; j++) {
3335 imm[j] = i.InputUint32(j);
3336 }
3337 SetupSimdImmediateInRegister(tasm(), imm, dst);
3338 break;
3339 }
3340 case kX64S128Zero: {
3341 XMMRegister dst = i.OutputSimd128Register();
3342 __ Pxor(dst, dst);
3343 break;
3344 }
3345 case kX64S128AllOnes: {
3346 XMMRegister dst = i.OutputSimd128Register();
3347 __ Pcmpeqd(dst, dst);
3348 break;
3349 }
3350 case kX64I16x8Splat: {
3351 XMMRegister dst = i.OutputSimd128Register();
3352 if (HasRegisterInput(instr, 0)) {
3353 __ I16x8Splat(dst, i.InputRegister(0));
3354 } else {
3355 __ I16x8Splat(dst, i.InputOperand(0));
3356 }
3357 break;
3358 }
3359 case kX64I16x8ExtractLaneS: {
3360 Register dst = i.OutputRegister();
3361 __ Pextrw(dst, i.InputSimd128Register(0), i.InputUint8(1));
3362 __ movsxwl(dst, dst);
3363 break;
3364 }
3365 case kX64I16x8SConvertI8x16Low: {
3366 __ Pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3367 break;
3368 }
3369 case kX64I16x8SConvertI8x16High: {
3370 __ I16x8SConvertI8x16High(i.OutputSimd128Register(),
3371 i.InputSimd128Register(0));
3372 break;
3373 }
3374 case kX64I16x8Neg: {
3375 XMMRegister dst = i.OutputSimd128Register();
3376 XMMRegister src = i.InputSimd128Register(0);
3377 if (dst == src) {
3378 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3379 __ Psignw(dst, kScratchDoubleReg);
3380 } else {
3381 __ Pxor(dst, dst);
3382 __ Psubw(dst, src);
3383 }
3384 break;
3385 }
3386 case kX64I16x8Shl: {
3387 // Take shift value modulo 2^4.
3388 ASSEMBLE_SIMD_SHIFT(psllw, 4);
3389 break;
3390 }
3391 case kX64I16x8ShrS: {
3392 // Take shift value modulo 2^4.
3393 ASSEMBLE_SIMD_SHIFT(psraw, 4);
3394 break;
3395 }
3396 case kX64I16x8SConvertI32x4: {
3397 ASSEMBLE_SIMD_BINOP(packssdw);
3398 break;
3399 }
3400 case kX64I16x8Add: {
3401 ASSEMBLE_SIMD_BINOP(paddw);
3402 break;
3403 }
3404 case kX64I16x8AddSatS: {
3405 ASSEMBLE_SIMD_BINOP(paddsw);
3406 break;
3407 }
3408 case kX64I16x8Sub: {
3409 ASSEMBLE_SIMD_BINOP(psubw);
3410 break;
3411 }
3412 case kX64I16x8SubSatS: {
3413 ASSEMBLE_SIMD_BINOP(psubsw);
3414 break;
3415 }
3416 case kX64I16x8Mul: {
3417 ASSEMBLE_SIMD_BINOP(pmullw);
3418 break;
3419 }
3420 case kX64I16x8MinS: {
3421 ASSEMBLE_SIMD_BINOP(pminsw);
3422 break;
3423 }
3424 case kX64I16x8MaxS: {
3425 ASSEMBLE_SIMD_BINOP(pmaxsw);
3426 break;
3427 }
3428 case kX64I16x8Eq: {
3429 ASSEMBLE_SIMD_BINOP(pcmpeqw);
3430 break;
3431 }
3432 case kX64I16x8Ne: {
3433 XMMRegister dst = i.OutputSimd128Register();
3434 __ Pcmpeqw(dst, i.InputSimd128Register(1));
3435 __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3436 __ Pxor(dst, kScratchDoubleReg);
3437 break;
3438 }
3439 case kX64I16x8GtS: {
3440 ASSEMBLE_SIMD_BINOP(pcmpgtw);
3441 break;
3442 }
3443 case kX64I16x8GeS: {
3444 XMMRegister dst = i.OutputSimd128Register();
3445 XMMRegister src = i.InputSimd128Register(1);
3446 __ Pminsw(dst, src);
3447 __ Pcmpeqw(dst, src);
3448 break;
3449 }
3450 case kX64I16x8UConvertI8x16Low: {
3451 __ Pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3452 break;
3453 }
3454 case kX64I16x8UConvertI8x16High: {
3455 __ I16x8UConvertI8x16High(i.OutputSimd128Register(),
3456 i.InputSimd128Register(0), kScratchDoubleReg);
3457 break;
3458 }
3459 case kX64I16x8ShrU: {
3460 // Take shift value modulo 2^4.
3461 ASSEMBLE_SIMD_SHIFT(psrlw, 4);
3462 break;
3463 }
3464 case kX64I16x8UConvertI32x4: {
3465 ASSEMBLE_SIMD_BINOP(packusdw);
3466 break;
3467 }
3468 case kX64I16x8AddSatU: {
3469 ASSEMBLE_SIMD_BINOP(paddusw);
3470 break;
3471 }
3472 case kX64I16x8SubSatU: {
3473 ASSEMBLE_SIMD_BINOP(psubusw);
3474 break;
3475 }
3476 case kX64I16x8MinU: {
3477 ASSEMBLE_SIMD_BINOP(pminuw);
3478 break;
3479 }
3480 case kX64I16x8MaxU: {
3481 ASSEMBLE_SIMD_BINOP(pmaxuw);
3482 break;
3483 }
3484 case kX64I16x8GtU: {
3485 XMMRegister dst = i.OutputSimd128Register();
3486 XMMRegister src = i.InputSimd128Register(1);
3487 __ Pmaxuw(dst, src);
3488 __ Pcmpeqw(dst, src);
3489 __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3490 __ Pxor(dst, kScratchDoubleReg);
3491 break;
3492 }
3493 case kX64I16x8GeU: {
3494 XMMRegister dst = i.OutputSimd128Register();
3495 XMMRegister src = i.InputSimd128Register(1);
3496 __ Pminuw(dst, src);
3497 __ Pcmpeqw(dst, src);
3498 break;
3499 }
3500 case kX64I16x8RoundingAverageU: {
3501 ASSEMBLE_SIMD_BINOP(pavgw);
3502 break;
3503 }
3504 case kX64I16x8Abs: {
3505 __ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3506 break;
3507 }
3508 case kX64I16x8BitMask: {
3509 Register dst = i.OutputRegister();
3510 __ Packsswb(kScratchDoubleReg, i.InputSimd128Register(0));
3511 __ Pmovmskb(dst, kScratchDoubleReg);
3512 __ shrq(dst, Immediate(8));
3513 break;
3514 }
3515 case kX64I16x8ExtMulLowI8x16S: {
3516 __ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
3517 i.InputSimd128Register(1), kScratchDoubleReg,
3518 /*is_signed=*/true);
3519 break;
3520 }
3521 case kX64I16x8ExtMulHighI8x16S: {
3522 __ I16x8ExtMulHighS(i.OutputSimd128Register(), i.InputSimd128Register(0),
3523 i.InputSimd128Register(1), kScratchDoubleReg);
3524 break;
3525 }
3526 case kX64I16x8ExtMulLowI8x16U: {
3527 __ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
3528 i.InputSimd128Register(1), kScratchDoubleReg,
3529 /*is_signed=*/false);
3530 break;
3531 }
3532 case kX64I16x8ExtMulHighI8x16U: {
3533 __ I16x8ExtMulHighU(i.OutputSimd128Register(), i.InputSimd128Register(0),
3534 i.InputSimd128Register(1), kScratchDoubleReg);
3535 break;
3536 }
3537 case kX64I16x8ExtAddPairwiseI8x16S: {
3538 __ I16x8ExtAddPairwiseI8x16S(i.OutputSimd128Register(),
3539 i.InputSimd128Register(0), kScratchDoubleReg,
3540 kScratchRegister);
3541 break;
3542 }
3543 case kX64I16x8ExtAddPairwiseI8x16U: {
3544 __ I16x8ExtAddPairwiseI8x16U(i.OutputSimd128Register(),
3545 i.InputSimd128Register(0), kScratchRegister);
3546 break;
3547 }
3548 case kX64I16x8Q15MulRSatS: {
3549 __ I16x8Q15MulRSatS(i.OutputSimd128Register(), i.InputSimd128Register(0),
3550 i.InputSimd128Register(1), kScratchDoubleReg);
3551 break;
3552 }
3553 case kX64I8x16Splat: {
3554 XMMRegister dst = i.OutputSimd128Register();
3555 if (HasRegisterInput(instr, 0)) {
3556 __ I8x16Splat(dst, i.InputRegister(0), kScratchDoubleReg);
3557 } else {
3558 __ I8x16Splat(dst, i.InputOperand(0), kScratchDoubleReg);
3559 }
3560 break;
3561 }
3562 case kX64Pextrb: {
3563 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3564 size_t index = 0;
3565 if (HasAddressingMode(instr)) {
3566 Operand operand = i.MemoryOperand(&index);
3567 __ Pextrb(operand, i.InputSimd128Register(index),
3568 i.InputUint8(index + 1));
3569 } else {
3570 __ Pextrb(i.OutputRegister(), i.InputSimd128Register(0),
3571 i.InputUint8(1));
3572 }
3573 break;
3574 }
3575 case kX64Pextrw: {
3576 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3577 size_t index = 0;
3578 if (HasAddressingMode(instr)) {
3579 Operand operand = i.MemoryOperand(&index);
3580 __ Pextrw(operand, i.InputSimd128Register(index),
3581 i.InputUint8(index + 1));
3582 } else {
3583 __ Pextrw(i.OutputRegister(), i.InputSimd128Register(0),
3584 i.InputUint8(1));
3585 }
3586 break;
3587 }
3588 case kX64I8x16ExtractLaneS: {
3589 Register dst = i.OutputRegister();
3590 __ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
3591 __ movsxbl(dst, dst);
3592 break;
3593 }
3594 case kX64Pinsrb: {
3595 ASSEMBLE_PINSR(Pinsrb);
3596 break;
3597 }
3598 case kX64Pinsrw: {
3599 ASSEMBLE_PINSR(Pinsrw);
3600 break;
3601 }
3602 case kX64Pinsrd: {
3603 ASSEMBLE_PINSR(Pinsrd);
3604 break;
3605 }
3606 case kX64Pinsrq: {
3607 ASSEMBLE_PINSR(Pinsrq);
3608 break;
3609 }
3610 case kX64I8x16SConvertI16x8: {
3611 ASSEMBLE_SIMD_BINOP(packsswb);
3612 break;
3613 }
3614 case kX64I8x16Neg: {
3615 XMMRegister dst = i.OutputSimd128Register();
3616 XMMRegister src = i.InputSimd128Register(0);
3617 if (dst == src) {
3618 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3619 __ Psignb(dst, kScratchDoubleReg);
3620 } else {
3621 __ Pxor(dst, dst);
3622 __ Psubb(dst, src);
3623 }
3624 break;
3625 }
3626 case kX64I8x16Shl: {
3627 XMMRegister dst = i.OutputSimd128Register();
3628 XMMRegister src = i.InputSimd128Register(0);
3629 DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
3630 if (HasImmediateInput(instr, 1)) {
3631 __ I8x16Shl(dst, src, i.InputInt3(1), kScratchRegister,
3632 kScratchDoubleReg);
3633 } else {
3634 __ I8x16Shl(dst, src, i.InputRegister(1), kScratchRegister,
3635 kScratchDoubleReg, i.TempSimd128Register(0));
3636 }
3637 break;
3638 }
3639 case kX64I8x16ShrS: {
3640 XMMRegister dst = i.OutputSimd128Register();
3641 XMMRegister src = i.InputSimd128Register(0);
3642 DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
3643 if (HasImmediateInput(instr, 1)) {
3644 __ I8x16ShrS(dst, src, i.InputInt3(1), kScratchDoubleReg);
3645 } else {
3646 __ I8x16ShrS(dst, src, i.InputRegister(1), kScratchRegister,
3647 kScratchDoubleReg, i.TempSimd128Register(0));
3648 }
3649 break;
3650 }
3651 case kX64I8x16Add: {
3652 ASSEMBLE_SIMD_BINOP(paddb);
3653 break;
3654 }
3655 case kX64I8x16AddSatS: {
3656 ASSEMBLE_SIMD_BINOP(paddsb);
3657 break;
3658 }
3659 case kX64I8x16Sub: {
3660 ASSEMBLE_SIMD_BINOP(psubb);
3661 break;
3662 }
3663 case kX64I8x16SubSatS: {
3664 ASSEMBLE_SIMD_BINOP(psubsb);
3665 break;
3666 }
3667 case kX64I8x16MinS: {
3668 ASSEMBLE_SIMD_BINOP(pminsb);
3669 break;
3670 }
3671 case kX64I8x16MaxS: {
3672 ASSEMBLE_SIMD_BINOP(pmaxsb);
3673 break;
3674 }
3675 case kX64I8x16Eq: {
3676 ASSEMBLE_SIMD_BINOP(pcmpeqb);
3677 break;
3678 }
3679 case kX64I8x16Ne: {
3680 XMMRegister dst = i.OutputSimd128Register();
3681 __ Pcmpeqb(dst, i.InputSimd128Register(1));
3682 __ Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3683 __ Pxor(dst, kScratchDoubleReg);
3684 break;
3685 }
3686 case kX64I8x16GtS: {
3687 ASSEMBLE_SIMD_BINOP(pcmpgtb);
3688 break;
3689 }
3690 case kX64I8x16GeS: {
3691 XMMRegister dst = i.OutputSimd128Register();
3692 XMMRegister src = i.InputSimd128Register(1);
3693 __ Pminsb(dst, src);
3694 __ Pcmpeqb(dst, src);
3695 break;
3696 }
3697 case kX64I8x16UConvertI16x8: {
3698 ASSEMBLE_SIMD_BINOP(packuswb);
3699 break;
3700 }
3701 case kX64I8x16ShrU: {
3702 XMMRegister dst = i.OutputSimd128Register();
3703 XMMRegister src = i.InputSimd128Register(0);
3704 DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
3705 if (HasImmediateInput(instr, 1)) {
3706 __ I8x16ShrU(dst, src, i.InputInt3(1), kScratchRegister,
3707 kScratchDoubleReg);
3708 } else {
3709 __ I8x16ShrU(dst, src, i.InputRegister(1), kScratchRegister,
3710 kScratchDoubleReg, i.TempSimd128Register(0));
3711 }
3712 break;
3713 }
3714 case kX64I8x16AddSatU: {
3715 ASSEMBLE_SIMD_BINOP(paddusb);
3716 break;
3717 }
3718 case kX64I8x16SubSatU: {
3719 ASSEMBLE_SIMD_BINOP(psubusb);
3720 break;
3721 }
3722 case kX64I8x16MinU: {
3723 ASSEMBLE_SIMD_BINOP(pminub);
3724 break;
3725 }
3726 case kX64I8x16MaxU: {
3727 ASSEMBLE_SIMD_BINOP(pmaxub);
3728 break;
3729 }
3730 case kX64I8x16GtU: {
3731 XMMRegister dst = i.OutputSimd128Register();
3732 XMMRegister src = i.InputSimd128Register(1);
3733 __ Pmaxub(dst, src);
3734 __ Pcmpeqb(dst, src);
3735 __ Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3736 __ Pxor(dst, kScratchDoubleReg);
3737 break;
3738 }
3739 case kX64I8x16GeU: {
3740 XMMRegister dst = i.OutputSimd128Register();
3741 XMMRegister src = i.InputSimd128Register(1);
3742 __ Pminub(dst, src);
3743 __ Pcmpeqb(dst, src);
3744 break;
3745 }
3746 case kX64I8x16RoundingAverageU: {
3747 ASSEMBLE_SIMD_BINOP(pavgb);
3748 break;
3749 }
3750 case kX64I8x16Abs: {
3751 __ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
3752 break;
3753 }
3754 case kX64I8x16BitMask: {
3755 __ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0));
3756 break;
3757 }
3758 case kX64I32x4ExtMulLowI16x8S: {
3759 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3760 i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
3761 /*is_signed=*/true);
3762 break;
3763 }
3764 case kX64I32x4ExtMulHighI16x8S: {
3765 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3766 i.InputSimd128Register(1), kScratchDoubleReg,
3767 /*low=*/false,
3768 /*is_signed=*/true);
3769 break;
3770 }
3771 case kX64I32x4ExtMulLowI16x8U: {
3772 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3773 i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
3774 /*is_signed=*/false);
3775 break;
3776 }
3777 case kX64I32x4ExtMulHighI16x8U: {
3778 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3779 i.InputSimd128Register(1), kScratchDoubleReg,
3780 /*low=*/false,
3781 /*is_signed=*/false);
3782 break;
3783 }
3784 case kX64S128And: {
3785 ASSEMBLE_SIMD_BINOP(pand);
3786 break;
3787 }
3788 case kX64S128Or: {
3789 ASSEMBLE_SIMD_BINOP(por);
3790 break;
3791 }
3792 case kX64S128Xor: {
3793 ASSEMBLE_SIMD_BINOP(pxor);
3794 break;
3795 }
3796 case kX64S128Not: {
3797 __ S128Not(i.OutputSimd128Register(), i.InputSimd128Register(0),
3798 kScratchDoubleReg);
3799 break;
3800 }
3801 case kX64S128Select: {
3802 __ S128Select(i.OutputSimd128Register(), i.InputSimd128Register(0),
3803 i.InputSimd128Register(1), i.InputSimd128Register(2),
3804 kScratchDoubleReg);
3805 break;
3806 }
3807 case kX64S128AndNot: {
3808 XMMRegister dst = i.OutputSimd128Register();
3809 DCHECK_EQ(dst, i.InputSimd128Register(0));
3810 // The inputs have been inverted by instruction selector, so we can call
3811 // andnps here without any modifications.
3812 __ Andnps(dst, i.InputSimd128Register(1));
3813 break;
3814 }
3815 case kX64I8x16Swizzle: {
3816 __ I8x16Swizzle(i.OutputSimd128Register(), i.InputSimd128Register(0),
3817 i.InputSimd128Register(1), kScratchDoubleReg,
3818 kScratchRegister, MiscField::decode(instr->opcode()));
3819 break;
3820 }
3821 case kX64I8x16Shuffle: {
3822 XMMRegister dst = i.OutputSimd128Register();
3823 XMMRegister tmp_simd = i.TempSimd128Register(0);
3824 DCHECK_NE(tmp_simd, i.InputSimd128Register(0));
3825 if (instr->InputCount() == 5) { // only one input operand
3826 uint32_t mask[4] = {};
3827 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3828 for (int j = 4; j > 0; j--) {
3829 mask[j - 1] = i.InputUint32(j);
3830 }
3831
3832 SetupSimdImmediateInRegister(tasm(), mask, tmp_simd);
3833 __ Pshufb(dst, tmp_simd);
3834 } else { // two input operands
3835 DCHECK_NE(tmp_simd, i.InputSimd128Register(1));
3836 DCHECK_EQ(6, instr->InputCount());
3837 ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 0);
3838 uint32_t mask1[4] = {};
3839 for (int j = 5; j > 1; j--) {
3840 uint32_t lanes = i.InputUint32(j);
3841 for (int k = 0; k < 32; k += 8) {
3842 uint8_t lane = lanes >> k;
3843 mask1[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
3844 }
3845 }
3846 SetupSimdImmediateInRegister(tasm(), mask1, tmp_simd);
3847 __ Pshufb(kScratchDoubleReg, tmp_simd);
3848 uint32_t mask2[4] = {};
3849 if (instr->InputAt(1)->IsSimd128Register()) {
3850 XMMRegister src1 = i.InputSimd128Register(1);
3851 if (src1 != dst) __ Movdqa(dst, src1);
3852 } else {
3853 __ Movdqu(dst, i.InputOperand(1));
3854 }
3855 for (int j = 5; j > 1; j--) {
3856 uint32_t lanes = i.InputUint32(j);
3857 for (int k = 0; k < 32; k += 8) {
3858 uint8_t lane = lanes >> k;
3859 mask2[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
3860 }
3861 }
3862 SetupSimdImmediateInRegister(tasm(), mask2, tmp_simd);
3863 __ Pshufb(dst, tmp_simd);
3864 __ Por(dst, kScratchDoubleReg);
3865 }
3866 break;
3867 }
3868 case kX64I8x16Popcnt: {
3869 __ I8x16Popcnt(i.OutputSimd128Register(), i.InputSimd128Register(0),
3870 i.TempSimd128Register(0), kScratchDoubleReg,
3871 kScratchRegister);
3872 break;
3873 }
3874 case kX64S128Load8Splat: {
3875 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3876 __ S128Load8Splat(i.OutputSimd128Register(), i.MemoryOperand(),
3877 kScratchDoubleReg);
3878 break;
3879 }
3880 case kX64S128Load16Splat: {
3881 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3882 __ S128Load16Splat(i.OutputSimd128Register(), i.MemoryOperand(),
3883 kScratchDoubleReg);
3884 break;
3885 }
3886 case kX64S128Load32Splat: {
3887 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3888 __ S128Load32Splat(i.OutputSimd128Register(), i.MemoryOperand());
3889 break;
3890 }
3891 case kX64S128Load64Splat: {
3892 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3893 __ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
3894 break;
3895 }
3896 case kX64S128Load8x8S: {
3897 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3898 __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
3899 break;
3900 }
3901 case kX64S128Load8x8U: {
3902 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3903 __ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
3904 break;
3905 }
3906 case kX64S128Load16x4S: {
3907 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3908 __ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
3909 break;
3910 }
3911 case kX64S128Load16x4U: {
3912 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3913 __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
3914 break;
3915 }
3916 case kX64S128Load32x2S: {
3917 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3918 __ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
3919 break;
3920 }
3921 case kX64S128Load32x2U: {
3922 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3923 __ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
3924 break;
3925 }
3926 case kX64S128Store32Lane: {
3927 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3928 size_t index = 0;
3929 Operand operand = i.MemoryOperand(&index);
3930 uint8_t lane = i.InputUint8(index + 1);
3931 __ S128Store32Lane(operand, i.InputSimd128Register(index), lane);
3932 break;
3933 }
3934 case kX64S128Store64Lane: {
3935 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3936 size_t index = 0;
3937 Operand operand = i.MemoryOperand(&index);
3938 uint8_t lane = i.InputUint8(index + 1);
3939 __ S128Store64Lane(operand, i.InputSimd128Register(index), lane);
3940 break;
3941 }
3942 case kX64Shufps: {
3943 __ Shufps(i.OutputSimd128Register(), i.InputSimd128Register(0),
3944 i.InputSimd128Register(1), i.InputUint8(2));
3945 break;
3946 }
3947 case kX64S32x4Rotate: {
3948 XMMRegister dst = i.OutputSimd128Register();
3949 XMMRegister src = i.InputSimd128Register(0);
3950 uint8_t mask = i.InputUint8(1);
3951 if (dst == src) {
3952 // 1-byte shorter encoding than pshufd.
3953 __ Shufps(dst, src, src, mask);
3954 } else {
3955 __ Pshufd(dst, src, mask);
3956 }
3957 break;
3958 }
3959 case kX64S32x4Swizzle: {
3960 DCHECK_EQ(2, instr->InputCount());
3961 ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0,
3962 i.InputUint8(1));
3963 break;
3964 }
3965 case kX64S32x4Shuffle: {
3966 DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
3967 uint8_t shuffle = i.InputUint8(2);
3968 DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
3969 ASSEMBLE_SIMD_IMM_INSTR(Pshufd, kScratchDoubleReg, 1, shuffle);
3970 ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0, shuffle);
3971 __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputUint8(3));
3972 break;
3973 }
3974 case kX64S16x8Blend: {
3975 ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, i.InputUint8(2));
3976 break;
3977 }
3978 case kX64S16x8HalfShuffle1: {
3979 XMMRegister dst = i.OutputSimd128Register();
3980 uint8_t mask_lo = i.InputUint8(1);
3981 uint8_t mask_hi = i.InputUint8(2);
3982 if (mask_lo != 0xe4) {
3983 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, mask_lo);
3984 if (mask_hi != 0xe4) __ Pshufhw(dst, dst, mask_hi);
3985 } else {
3986 DCHECK_NE(mask_hi, 0xe4);
3987 ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, mask_hi);
3988 }
3989 break;
3990 }
3991 case kX64S16x8HalfShuffle2: {
3992 XMMRegister dst = i.OutputSimd128Register();
3993 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, kScratchDoubleReg, 1, i.InputUint8(2));
3994 __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputUint8(3));
3995 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(2));
3996 __ Pshufhw(dst, dst, i.InputUint8(3));
3997 __ Pblendw(dst, kScratchDoubleReg, i.InputUint8(4));
3998 break;
3999 }
4000 case kX64S8x16Alignr: {
4001 ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, i.InputUint8(2));
4002 break;
4003 }
4004 case kX64S16x8Dup: {
4005 XMMRegister dst = i.OutputSimd128Register();
4006 uint8_t lane = i.InputInt8(1) & 0x7;
4007 uint8_t lane4 = lane & 0x3;
4008 uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
4009 if (lane < 4) {
4010 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, half_dup);
4011 __ Punpcklqdq(dst, dst);
4012 } else {
4013 ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, half_dup);
4014 __ Punpckhqdq(dst, dst);
4015 }
4016 break;
4017 }
4018 case kX64S8x16Dup: {
4019 XMMRegister dst = i.OutputSimd128Register();
4020 uint8_t lane = i.InputInt8(1) & 0xf;
4021 DCHECK_EQ(dst, i.InputSimd128Register(0));
4022 if (lane < 8) {
4023 __ Punpcklbw(dst, dst);
4024 } else {
4025 __ Punpckhbw(dst, dst);
4026 }
4027 lane &= 0x7;
4028 uint8_t lane4 = lane & 0x3;
4029 uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
4030 if (lane < 4) {
4031 __ Pshuflw(dst, dst, half_dup);
4032 __ Punpcklqdq(dst, dst);
4033 } else {
4034 __ Pshufhw(dst, dst, half_dup);
4035 __ Punpckhqdq(dst, dst);
4036 }
4037 break;
4038 }
4039 case kX64S64x2UnpackHigh:
4040 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
4041 break;
4042 case kX64S32x4UnpackHigh:
4043 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
4044 break;
4045 case kX64S16x8UnpackHigh:
4046 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
4047 break;
4048 case kX64S8x16UnpackHigh:
4049 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
4050 break;
4051 case kX64S64x2UnpackLow:
4052 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
4053 break;
4054 case kX64S32x4UnpackLow:
4055 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
4056 break;
4057 case kX64S16x8UnpackLow:
4058 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
4059 break;
4060 case kX64S8x16UnpackLow:
4061 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
4062 break;
4063 case kX64S16x8UnzipHigh: {
4064 XMMRegister dst = i.OutputSimd128Register();
4065 XMMRegister src2 = dst;
4066 DCHECK_EQ(dst, i.InputSimd128Register(0));
4067 if (instr->InputCount() == 2) {
4068 ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
4069 __ Psrld(kScratchDoubleReg, byte{16});
4070 src2 = kScratchDoubleReg;
4071 }
4072 __ Psrld(dst, byte{16});
4073 __ Packusdw(dst, src2);
4074 break;
4075 }
4076 case kX64S16x8UnzipLow: {
4077 XMMRegister dst = i.OutputSimd128Register();
4078 XMMRegister src2 = dst;
4079 DCHECK_EQ(dst, i.InputSimd128Register(0));
4080 __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
4081 if (instr->InputCount() == 2) {
4082 ASSEMBLE_SIMD_IMM_INSTR(Pblendw, kScratchDoubleReg, 1, uint8_t{0x55});
4083 src2 = kScratchDoubleReg;
4084 }
4085 __ Pblendw(dst, kScratchDoubleReg, uint8_t{0xaa});
4086 __ Packusdw(dst, src2);
4087 break;
4088 }
4089 case kX64S8x16UnzipHigh: {
4090 XMMRegister dst = i.OutputSimd128Register();
4091 XMMRegister src2 = dst;
4092 DCHECK_EQ(dst, i.InputSimd128Register(0));
4093 if (instr->InputCount() == 2) {
4094 ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
4095 __ Psrlw(kScratchDoubleReg, byte{8});
4096 src2 = kScratchDoubleReg;
4097 }
4098 __ Psrlw(dst, byte{8});
4099 __ Packuswb(dst, src2);
4100 break;
4101 }
4102 case kX64S8x16UnzipLow: {
4103 XMMRegister dst = i.OutputSimd128Register();
4104 XMMRegister src2 = dst;
4105 DCHECK_EQ(dst, i.InputSimd128Register(0));
4106 if (instr->InputCount() == 2) {
4107 ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
4108 __ Psllw(kScratchDoubleReg, byte{8});
4109 __ Psrlw(kScratchDoubleReg, byte{8});
4110 src2 = kScratchDoubleReg;
4111 }
4112 __ Psllw(dst, byte{8});
4113 __ Psrlw(dst, byte{8});
4114 __ Packuswb(dst, src2);
4115 break;
4116 }
4117 case kX64S8x16TransposeLow: {
4118 XMMRegister dst = i.OutputSimd128Register();
4119 DCHECK_EQ(dst, i.InputSimd128Register(0));
4120 __ Psllw(dst, byte{8});
4121 if (instr->InputCount() == 1) {
4122 __ Movdqa(kScratchDoubleReg, dst);
4123 } else {
4124 DCHECK_EQ(2, instr->InputCount());
4125 ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
4126 __ Psllw(kScratchDoubleReg, byte{8});
4127 }
4128 __ Psrlw(dst, byte{8});
4129 __ Por(dst, kScratchDoubleReg);
4130 break;
4131 }
4132 case kX64S8x16TransposeHigh: {
4133 XMMRegister dst = i.OutputSimd128Register();
4134 DCHECK_EQ(dst, i.InputSimd128Register(0));
4135 __ Psrlw(dst, byte{8});
4136 if (instr->InputCount() == 1) {
4137 __ Movdqa(kScratchDoubleReg, dst);
4138 } else {
4139 DCHECK_EQ(2, instr->InputCount());
4140 ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
4141 __ Psrlw(kScratchDoubleReg, byte{8});
4142 }
4143 __ Psllw(kScratchDoubleReg, byte{8});
4144 __ Por(dst, kScratchDoubleReg);
4145 break;
4146 }
4147 case kX64S8x8Reverse:
4148 case kX64S8x4Reverse:
4149 case kX64S8x2Reverse: {
4150 DCHECK_EQ(1, instr->InputCount());
4151 XMMRegister dst = i.OutputSimd128Register();
4152 DCHECK_EQ(dst, i.InputSimd128Register(0));
4153 if (arch_opcode != kX64S8x2Reverse) {
4154 // First shuffle words into position.
4155 uint8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
4156 __ Pshuflw(dst, dst, shuffle_mask);
4157 __ Pshufhw(dst, dst, shuffle_mask);
4158 }
4159 __ Movdqa(kScratchDoubleReg, dst);
4160 __ Psrlw(kScratchDoubleReg, byte{8});
4161 __ Psllw(dst, byte{8});
4162 __ Por(dst, kScratchDoubleReg);
4163 break;
4164 }
4165 case kX64V128AnyTrue: {
4166 Register dst = i.OutputRegister();
4167 XMMRegister src = i.InputSimd128Register(0);
4168
4169 __ xorq(dst, dst);
4170 __ Ptest(src, src);
4171 __ setcc(not_equal, dst);
4172 break;
4173 }
4174 // Need to split up all the different lane structures because the
4175 // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
4176 // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
4177 // respectively.
4178 case kX64I64x2AllTrue: {
4179 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqq);
4180 break;
4181 }
4182 case kX64I32x4AllTrue: {
4183 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
4184 break;
4185 }
4186 case kX64I16x8AllTrue: {
4187 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqw);
4188 break;
4189 }
4190 case kX64I8x16AllTrue: {
4191 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqb);
4192 break;
4193 }
4194 case kX64Pblendvb: {
4195 __ Pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
4196 i.InputSimd128Register(1), i.InputSimd128Register(2));
4197 break;
4198 }
4199 case kX64I32x4TruncF64x2UZero: {
4200 __ I32x4TruncF64x2UZero(i.OutputSimd128Register(),
4201 i.InputSimd128Register(0), kScratchRegister,
4202 kScratchDoubleReg);
4203 break;
4204 }
4205 case kX64I32x4TruncF32x4U: {
4206 __ I32x4TruncF32x4U(i.OutputSimd128Register(), i.InputSimd128Register(0),
4207 kScratchRegister, kScratchDoubleReg);
4208 break;
4209 }
4210 case kX64Cvttps2dq: {
4211 __ Cvttps2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
4212 break;
4213 }
4214 case kX64Cvttpd2dq: {
4215 __ Cvttpd2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
4216 break;
4217 }
4218 case kAtomicStoreWord8: {
4219 ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord8);
4220 break;
4221 }
4222 case kAtomicStoreWord16: {
4223 ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord16);
4224 break;
4225 }
4226 case kAtomicStoreWord32: {
4227 ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord32);
4228 break;
4229 }
4230 case kX64Word64AtomicStoreWord64: {
4231 ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord64);
4232 break;
4233 }
4234 case kAtomicExchangeInt8: {
4235 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
4236 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
4237 __ movsxbl(i.InputRegister(0), i.InputRegister(0));
4238 break;
4239 }
4240 case kAtomicExchangeUint8: {
4241 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
4242 switch (AtomicWidthField::decode(opcode)) {
4243 case AtomicWidth::kWord32:
4244 __ movzxbl(i.InputRegister(0), i.InputRegister(0));
4245 break;
4246 case AtomicWidth::kWord64:
4247 __ movzxbq(i.InputRegister(0), i.InputRegister(0));
4248 break;
4249 }
4250 break;
4251 }
4252 case kAtomicExchangeInt16: {
4253 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
4254 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
4255 __ movsxwl(i.InputRegister(0), i.InputRegister(0));
4256 break;
4257 }
4258 case kAtomicExchangeUint16: {
4259 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
4260 switch (AtomicWidthField::decode(opcode)) {
4261 case AtomicWidth::kWord32:
4262 __ movzxwl(i.InputRegister(0), i.InputRegister(0));
4263 break;
4264 case AtomicWidth::kWord64:
4265 __ movzxwq(i.InputRegister(0), i.InputRegister(0));
4266 break;
4267 }
4268 break;
4269 }
4270 case kAtomicExchangeWord32: {
4271 __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
4272 break;
4273 }
4274 case kAtomicCompareExchangeInt8: {
4275 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
4276 __ lock();
4277 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
4278 __ movsxbl(rax, rax);
4279 break;
4280 }
4281 case kAtomicCompareExchangeUint8: {
4282 __ lock();
4283 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
4284 switch (AtomicWidthField::decode(opcode)) {
4285 case AtomicWidth::kWord32:
4286 __ movzxbl(rax, rax);
4287 break;
4288 case AtomicWidth::kWord64:
4289 __ movzxbq(rax, rax);
4290 break;
4291 }
4292 break;
4293 }
4294 case kAtomicCompareExchangeInt16: {
4295 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
4296 __ lock();
4297 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
4298 __ movsxwl(rax, rax);
4299 break;
4300 }
4301 case kAtomicCompareExchangeUint16: {
4302 __ lock();
4303 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
4304 switch (AtomicWidthField::decode(opcode)) {
4305 case AtomicWidth::kWord32:
4306 __ movzxwl(rax, rax);
4307 break;
4308 case AtomicWidth::kWord64:
4309 __ movzxwq(rax, rax);
4310 break;
4311 }
4312 break;
4313 }
4314 case kAtomicCompareExchangeWord32: {
4315 __ lock();
4316 __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
4317 if (AtomicWidthField::decode(opcode) == AtomicWidth::kWord64) {
4318 // Zero-extend the 32 bit value to 64 bit.
4319 __ movl(rax, rax);
4320 }
4321 break;
4322 }
4323 case kX64Word64AtomicExchangeUint64: {
4324 __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
4325 break;
4326 }
4327 case kX64Word64AtomicCompareExchangeUint64: {
4328 __ lock();
4329 __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
4330 break;
4331 }
4332 #define ATOMIC_BINOP_CASE(op, inst32, inst64) \
4333 case kAtomic##op##Int8: \
4334 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32); \
4335 ASSEMBLE_ATOMIC_BINOP(inst32, movb, cmpxchgb); \
4336 __ movsxbl(rax, rax); \
4337 break; \
4338 case kAtomic##op##Uint8: \
4339 switch (AtomicWidthField::decode(opcode)) { \
4340 case AtomicWidth::kWord32: \
4341 ASSEMBLE_ATOMIC_BINOP(inst32, movb, cmpxchgb); \
4342 __ movzxbl(rax, rax); \
4343 break; \
4344 case AtomicWidth::kWord64: \
4345 ASSEMBLE_ATOMIC64_BINOP(inst64, movb, cmpxchgb); \
4346 __ movzxbq(rax, rax); \
4347 break; \
4348 } \
4349 break; \
4350 case kAtomic##op##Int16: \
4351 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32); \
4352 ASSEMBLE_ATOMIC_BINOP(inst32, movw, cmpxchgw); \
4353 __ movsxwl(rax, rax); \
4354 break; \
4355 case kAtomic##op##Uint16: \
4356 switch (AtomicWidthField::decode(opcode)) { \
4357 case AtomicWidth::kWord32: \
4358 ASSEMBLE_ATOMIC_BINOP(inst32, movw, cmpxchgw); \
4359 __ movzxwl(rax, rax); \
4360 break; \
4361 case AtomicWidth::kWord64: \
4362 ASSEMBLE_ATOMIC64_BINOP(inst64, movw, cmpxchgw); \
4363 __ movzxwq(rax, rax); \
4364 break; \
4365 } \
4366 break; \
4367 case kAtomic##op##Word32: \
4368 switch (AtomicWidthField::decode(opcode)) { \
4369 case AtomicWidth::kWord32: \
4370 ASSEMBLE_ATOMIC_BINOP(inst32, movl, cmpxchgl); \
4371 break; \
4372 case AtomicWidth::kWord64: \
4373 ASSEMBLE_ATOMIC64_BINOP(inst64, movl, cmpxchgl); \
4374 break; \
4375 } \
4376 break; \
4377 case kX64Word64Atomic##op##Uint64: \
4378 ASSEMBLE_ATOMIC64_BINOP(inst64, movq, cmpxchgq); \
4379 break;
4380 ATOMIC_BINOP_CASE(Add, addl, addq)
4381 ATOMIC_BINOP_CASE(Sub, subl, subq)
4382 ATOMIC_BINOP_CASE(And, andl, andq)
4383 ATOMIC_BINOP_CASE(Or, orl, orq)
4384 ATOMIC_BINOP_CASE(Xor, xorl, xorq)
4385 #undef ATOMIC_BINOP_CASE
4386
4387 case kAtomicLoadInt8:
4388 case kAtomicLoadUint8:
4389 case kAtomicLoadInt16:
4390 case kAtomicLoadUint16:
4391 case kAtomicLoadWord32:
4392 UNREACHABLE(); // Won't be generated by instruction selector.
4393 }
4394 return kSuccess;
4395 } // NOLadability/fn_size)
4396
4397 #undef ASSEMBLE_PINSR
4398 #undef ASSEMBLE_UNOP
4399 #undef ASSEMBLE_BINOP
4400 #undef ASSEMBLE_COMPARE
4401 #undef ASSEMBLE_MULT
4402 #undef ASSEMBLE_SHIFT
4403 #undef ASSEMBLE_MOVX
4404 #undef ASSEMBLE_SSE_BINOP
4405 #undef ASSEMBLE_SSE_UNOP
4406 #undef ASSEMBLE_AVX_BINOP
4407 #undef ASSEMBLE_IEEE754_BINOP
4408 #undef ASSEMBLE_IEEE754_UNOP
4409 #undef ASSEMBLE_ATOMIC_BINOP
4410 #undef ASSEMBLE_ATOMIC64_BINOP
4411 #undef ASSEMBLE_SIMD_INSTR
4412 #undef ASSEMBLE_SIMD_IMM_INSTR
4413 #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
4414 #undef ASSEMBLE_SIMD_IMM_SHUFFLE
4415 #undef ASSEMBLE_SIMD_ALL_TRUE
4416 #undef ASSEMBLE_SIMD_SHIFT
4417 #undef ASSEMBLE_SEQ_CST_STORE
4418
4419 namespace {
4420
FlagsConditionToCondition(FlagsCondition condition)4421 Condition FlagsConditionToCondition(FlagsCondition condition) {
4422 switch (condition) {
4423 case kUnorderedEqual:
4424 case kEqual:
4425 return equal;
4426 case kUnorderedNotEqual:
4427 case kNotEqual:
4428 return not_equal;
4429 case kSignedLessThan:
4430 return less;
4431 case kSignedGreaterThanOrEqual:
4432 return greater_equal;
4433 case kSignedLessThanOrEqual:
4434 return less_equal;
4435 case kSignedGreaterThan:
4436 return greater;
4437 case kUnsignedLessThan:
4438 return below;
4439 case kUnsignedGreaterThanOrEqual:
4440 return above_equal;
4441 case kUnsignedLessThanOrEqual:
4442 return below_equal;
4443 case kUnsignedGreaterThan:
4444 return above;
4445 case kOverflow:
4446 return overflow;
4447 case kNotOverflow:
4448 return no_overflow;
4449 default:
4450 break;
4451 }
4452 UNREACHABLE();
4453 }
4454
4455 } // namespace
4456
4457 // Assembles branches after this instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)4458 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
4459 Label::Distance flabel_distance =
4460 branch->fallthru ? Label::kNear : Label::kFar;
4461 Label* tlabel = branch->true_label;
4462 Label* flabel = branch->false_label;
4463 if (branch->condition == kUnorderedEqual) {
4464 __ j(parity_even, flabel, flabel_distance);
4465 } else if (branch->condition == kUnorderedNotEqual) {
4466 __ j(parity_even, tlabel);
4467 }
4468 __ j(FlagsConditionToCondition(branch->condition), tlabel);
4469
4470 if (!branch->fallthru) __ jmp(flabel, flabel_distance);
4471 }
4472
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)4473 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
4474 BranchInfo* branch) {
4475 Label::Distance flabel_distance =
4476 branch->fallthru ? Label::kNear : Label::kFar;
4477 Label* tlabel = branch->true_label;
4478 Label* flabel = branch->false_label;
4479 Label nodeopt;
4480 if (branch->condition == kUnorderedEqual) {
4481 __ j(parity_even, flabel, flabel_distance);
4482 } else if (branch->condition == kUnorderedNotEqual) {
4483 __ j(parity_even, tlabel);
4484 }
4485 __ j(FlagsConditionToCondition(branch->condition), tlabel);
4486
4487 if (FLAG_deopt_every_n_times > 0) {
4488 ExternalReference counter =
4489 ExternalReference::stress_deopt_count(isolate());
4490
4491 __ pushfq();
4492 __ pushq(rax);
4493 __ load_rax(counter);
4494 __ decl(rax);
4495 __ j(not_zero, &nodeopt, Label::kNear);
4496
4497 __ Move(rax, FLAG_deopt_every_n_times);
4498 __ store_rax(counter);
4499 __ popq(rax);
4500 __ popfq();
4501 __ jmp(tlabel);
4502
4503 __ bind(&nodeopt);
4504 __ store_rax(counter);
4505 __ popq(rax);
4506 __ popfq();
4507 }
4508
4509 if (!branch->fallthru) {
4510 __ jmp(flabel, flabel_distance);
4511 }
4512 }
4513
AssembleArchJumpRegardlessOfAssemblyOrder(RpoNumber target)4514 void CodeGenerator::AssembleArchJumpRegardlessOfAssemblyOrder(
4515 RpoNumber target) {
4516 __ jmp(GetLabel(target));
4517 }
4518
4519 #if V8_ENABLE_WEBASSEMBLY
AssembleArchTrap(Instruction * instr,FlagsCondition condition)4520 void CodeGenerator::AssembleArchTrap(Instruction* instr,
4521 FlagsCondition condition) {
4522 auto ool = zone()->New<WasmOutOfLineTrap>(this, instr);
4523 Label* tlabel = ool->entry();
4524 Label end;
4525 if (condition == kUnorderedEqual) {
4526 __ j(parity_even, &end, Label::kNear);
4527 } else if (condition == kUnorderedNotEqual) {
4528 __ j(parity_even, tlabel);
4529 }
4530 __ j(FlagsConditionToCondition(condition), tlabel);
4531 __ bind(&end);
4532 }
4533 #endif // V8_ENABLE_WEBASSEMBLY
4534
4535 // Assembles boolean materializations after this instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)4536 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
4537 FlagsCondition condition) {
4538 X64OperandConverter i(this, instr);
4539 Label done;
4540
4541 // Materialize a full 64-bit 1 or 0 value. The result register is always the
4542 // last output of the instruction.
4543 Label check;
4544 DCHECK_NE(0u, instr->OutputCount());
4545 Register reg = i.OutputRegister(instr->OutputCount() - 1);
4546 if (condition == kUnorderedEqual) {
4547 __ j(parity_odd, &check, Label::kNear);
4548 __ Move(reg, 0);
4549 __ jmp(&done, Label::kNear);
4550 } else if (condition == kUnorderedNotEqual) {
4551 __ j(parity_odd, &check, Label::kNear);
4552 __ Move(reg, 1);
4553 __ jmp(&done, Label::kNear);
4554 }
4555 __ bind(&check);
4556 __ setcc(FlagsConditionToCondition(condition), reg);
4557 if (!ShouldClearOutputRegisterBeforeInstruction(this, instr)) {
4558 __ movzxbl(reg, reg);
4559 }
4560 __ bind(&done);
4561 }
4562
AssembleArchBinarySearchSwitch(Instruction * instr)4563 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
4564 X64OperandConverter i(this, instr);
4565 Register input = i.InputRegister(0);
4566 std::vector<std::pair<int32_t, Label*>> cases;
4567 for (size_t index = 2; index < instr->InputCount(); index += 2) {
4568 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
4569 }
4570 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
4571 cases.data() + cases.size());
4572 }
4573
AssembleArchTableSwitch(Instruction * instr)4574 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
4575 X64OperandConverter i(this, instr);
4576 Register input = i.InputRegister(0);
4577 int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
4578 Label** cases = zone()->NewArray<Label*>(case_count);
4579 for (int32_t index = 0; index < case_count; ++index) {
4580 cases[index] = GetLabel(i.InputRpo(index + 2));
4581 }
4582 Label* const table = AddJumpTable(cases, case_count);
4583 __ cmpl(input, Immediate(case_count));
4584 __ j(above_equal, GetLabel(i.InputRpo(1)));
4585 __ leaq(kScratchRegister, Operand(table));
4586 __ jmp(Operand(kScratchRegister, input, times_8, 0));
4587 }
4588
AssembleArchSelect(Instruction * instr,FlagsCondition condition)4589 void CodeGenerator::AssembleArchSelect(Instruction* instr,
4590 FlagsCondition condition) {
4591 X64OperandConverter i(this, instr);
4592 MachineRepresentation rep =
4593 LocationOperand::cast(instr->OutputAt(0))->representation();
4594 Condition cc = FlagsConditionToCondition(condition);
4595 DCHECK_EQ(i.OutputRegister(), i.InputRegister(instr->InputCount() - 2));
4596 size_t last_input = instr->InputCount() - 1;
4597 // kUnorderedNotEqual can be implemented more efficiently than
4598 // kUnorderedEqual. As the OR of two flags, it can be done with just two
4599 // cmovs. If the condition was originally a kUnorderedEqual, expect the
4600 // instruction selector to have inverted it and swapped the input.
4601 DCHECK_NE(condition, kUnorderedEqual);
4602 if (rep == MachineRepresentation::kWord32) {
4603 if (HasRegisterInput(instr, last_input)) {
4604 __ cmovl(cc, i.OutputRegister(), i.InputRegister(last_input));
4605 if (condition == kUnorderedNotEqual) {
4606 __ cmovl(parity_even, i.OutputRegister(), i.InputRegister(last_input));
4607 }
4608 } else {
4609 __ cmovl(cc, i.OutputRegister(), i.InputOperand(last_input));
4610 if (condition == kUnorderedNotEqual) {
4611 __ cmovl(parity_even, i.OutputRegister(), i.InputOperand(last_input));
4612 }
4613 }
4614 } else {
4615 DCHECK_EQ(rep, MachineRepresentation::kWord64);
4616 if (HasRegisterInput(instr, last_input)) {
4617 __ cmovq(cc, i.OutputRegister(), i.InputRegister(last_input));
4618 if (condition == kUnorderedNotEqual) {
4619 __ cmovq(parity_even, i.OutputRegister(), i.InputRegister(last_input));
4620 }
4621 } else {
4622 __ cmovq(cc, i.OutputRegister(), i.InputOperand(last_input));
4623 if (condition == kUnorderedNotEqual) {
4624 __ cmovq(parity_even, i.OutputRegister(), i.InputOperand(last_input));
4625 }
4626 }
4627 }
4628 }
4629
4630 namespace {
4631
4632 static const int kQuadWordSize = 16;
4633
4634 } // namespace
4635
FinishFrame(Frame * frame)4636 void CodeGenerator::FinishFrame(Frame* frame) {
4637 CallDescriptor* call_descriptor = linkage()->GetIncomingDescriptor();
4638
4639 const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4640 if (!saves_fp.is_empty()) { // Save callee-saved XMM registers.
4641 frame->AlignSavedCalleeRegisterSlots();
4642 const uint32_t saves_fp_count = saves_fp.Count();
4643 frame->AllocateSavedCalleeRegisterSlots(
4644 saves_fp_count * (kQuadWordSize / kSystemPointerSize));
4645 }
4646 const RegList saves = call_descriptor->CalleeSavedRegisters();
4647 if (!saves.is_empty()) { // Save callee-saved registers.
4648 frame->AllocateSavedCalleeRegisterSlots(saves.Count());
4649 }
4650 }
4651
AssembleConstructFrame()4652 void CodeGenerator::AssembleConstructFrame() {
4653 auto call_descriptor = linkage()->GetIncomingDescriptor();
4654 if (frame_access_state()->has_frame()) {
4655 int pc_base = __ pc_offset();
4656
4657 if (call_descriptor->IsCFunctionCall()) {
4658 __ pushq(rbp);
4659 __ movq(rbp, rsp);
4660 #if V8_ENABLE_WEBASSEMBLY
4661 if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
4662 __ Push(Immediate(StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY)));
4663 // Reserve stack space for saving the c_entry_fp later.
4664 __ AllocateStackSpace(kSystemPointerSize);
4665 }
4666 #endif // V8_ENABLE_WEBASSEMBLY
4667 } else if (call_descriptor->IsJSFunctionCall()) {
4668 __ Prologue();
4669 } else {
4670 __ StubPrologue(info()->GetOutputStackFrameType());
4671 #if V8_ENABLE_WEBASSEMBLY
4672 if (call_descriptor->IsWasmFunctionCall() ||
4673 call_descriptor->IsWasmImportWrapper() ||
4674 call_descriptor->IsWasmCapiFunction()) {
4675 // We do not use this stack value in import wrappers and capi functions.
4676 // We push it anyway to satisfy legacy assumptions about these frames'
4677 // size and order.
4678 // TODO(manoskouk): Consider fixing this.
4679 __ pushq(kWasmInstanceRegister);
4680 }
4681 if (call_descriptor->IsWasmCapiFunction()) {
4682 // Reserve space for saving the PC later.
4683 __ AllocateStackSpace(kSystemPointerSize);
4684 }
4685 #endif // V8_ENABLE_WEBASSEMBLY
4686 }
4687
4688 unwinding_info_writer_.MarkFrameConstructed(pc_base);
4689 }
4690 int required_slots =
4691 frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
4692
4693 if (info()->is_osr()) {
4694 // TurboFan OSR-compiled functions cannot be entered directly.
4695 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
4696
4697 // Unoptimized code jumps directly to this entrypoint while the unoptimized
4698 // frame is still on the stack. Optimized code uses OSR values directly from
4699 // the unoptimized frame. Thus, all that needs to be done is to allocate the
4700 // remaining stack slots.
4701 __ RecordComment("-- OSR entrypoint --");
4702 osr_pc_offset_ = __ pc_offset();
4703 required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
4704 }
4705
4706 const RegList saves = call_descriptor->CalleeSavedRegisters();
4707 const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4708
4709 if (required_slots > 0) {
4710 DCHECK(frame_access_state()->has_frame());
4711 #if V8_ENABLE_WEBASSEMBLY
4712 if (info()->IsWasm() && required_slots * kSystemPointerSize > 4 * KB) {
4713 // For WebAssembly functions with big frames we have to do the stack
4714 // overflow check before we construct the frame. Otherwise we may not
4715 // have enough space on the stack to call the runtime for the stack
4716 // overflow.
4717 Label done;
4718
4719 // If the frame is bigger than the stack, we throw the stack overflow
4720 // exception unconditionally. Thereby we can avoid the integer overflow
4721 // check in the condition code.
4722 if (required_slots * kSystemPointerSize < FLAG_stack_size * KB) {
4723 __ movq(kScratchRegister,
4724 FieldOperand(kWasmInstanceRegister,
4725 WasmInstanceObject::kRealStackLimitAddressOffset));
4726 __ movq(kScratchRegister, Operand(kScratchRegister, 0));
4727 __ addq(kScratchRegister,
4728 Immediate(required_slots * kSystemPointerSize));
4729 __ cmpq(rsp, kScratchRegister);
4730 __ j(above_equal, &done, Label::kNear);
4731 }
4732
4733 __ near_call(wasm::WasmCode::kWasmStackOverflow,
4734 RelocInfo::WASM_STUB_CALL);
4735 // The call does not return, hence we can ignore any references and just
4736 // define an empty safepoint.
4737 ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
4738 RecordSafepoint(reference_map);
4739 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4740 __ bind(&done);
4741 }
4742 #endif // V8_ENABLE_WEBASSEMBLY
4743
4744 // Skip callee-saved and return slots, which are created below.
4745 required_slots -= saves.Count();
4746 required_slots -= saves_fp.Count() * (kQuadWordSize / kSystemPointerSize);
4747 required_slots -= frame()->GetReturnSlotCount();
4748 if (required_slots > 0) {
4749 __ AllocateStackSpace(required_slots * kSystemPointerSize);
4750 }
4751 }
4752
4753 if (!saves_fp.is_empty()) { // Save callee-saved XMM registers.
4754 const uint32_t saves_fp_count = saves_fp.Count();
4755 const int stack_size = saves_fp_count * kQuadWordSize;
4756 // Adjust the stack pointer.
4757 __ AllocateStackSpace(stack_size);
4758 // Store the registers on the stack.
4759 int slot_idx = 0;
4760 for (XMMRegister reg : saves_fp) {
4761 __ Movdqu(Operand(rsp, kQuadWordSize * slot_idx), reg);
4762 slot_idx++;
4763 }
4764 }
4765
4766 if (!saves.is_empty()) { // Save callee-saved registers.
4767 for (Register reg : base::Reversed(saves)) {
4768 __ pushq(reg);
4769 }
4770 }
4771
4772 // Allocate return slots (located after callee-saved).
4773 if (frame()->GetReturnSlotCount() > 0) {
4774 __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize);
4775 }
4776 }
4777
AssembleReturn(InstructionOperand * additional_pop_count)4778 void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
4779 auto call_descriptor = linkage()->GetIncomingDescriptor();
4780
4781 // Restore registers.
4782 const RegList saves = call_descriptor->CalleeSavedRegisters();
4783 if (!saves.is_empty()) {
4784 const int returns = frame()->GetReturnSlotCount();
4785 if (returns != 0) {
4786 __ addq(rsp, Immediate(returns * kSystemPointerSize));
4787 }
4788 for (Register reg : saves) {
4789 __ popq(reg);
4790 }
4791 }
4792 const DoubleRegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4793 if (!saves_fp.is_empty()) {
4794 const uint32_t saves_fp_count = saves_fp.Count();
4795 const int stack_size = saves_fp_count * kQuadWordSize;
4796 // Load the registers from the stack.
4797 int slot_idx = 0;
4798 for (XMMRegister reg : saves_fp) {
4799 __ Movdqu(reg, Operand(rsp, kQuadWordSize * slot_idx));
4800 slot_idx++;
4801 }
4802 // Adjust the stack pointer.
4803 __ addq(rsp, Immediate(stack_size));
4804 }
4805
4806 unwinding_info_writer_.MarkBlockWillExit();
4807
4808 X64OperandConverter g(this, nullptr);
4809 int parameter_slots = static_cast<int>(call_descriptor->ParameterSlotCount());
4810
4811 // {aditional_pop_count} is only greater than zero if {parameter_slots = 0}.
4812 // Check RawMachineAssembler::PopAndReturn.
4813 if (parameter_slots != 0) {
4814 if (additional_pop_count->IsImmediate()) {
4815 DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
4816 } else if (FLAG_debug_code) {
4817 __ cmpq(g.ToRegister(additional_pop_count), Immediate(0));
4818 __ Assert(equal, AbortReason::kUnexpectedAdditionalPopValue);
4819 }
4820 }
4821
4822 Register argc_reg = rcx;
4823 // Functions with JS linkage have at least one parameter (the receiver).
4824 // If {parameter_slots} == 0, it means it is a builtin with
4825 // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
4826 // itself.
4827 const bool drop_jsargs = parameter_slots != 0 &&
4828 frame_access_state()->has_frame() &&
4829 call_descriptor->IsJSFunctionCall();
4830 if (call_descriptor->IsCFunctionCall()) {
4831 AssembleDeconstructFrame();
4832 } else if (frame_access_state()->has_frame()) {
4833 if (additional_pop_count->IsImmediate() &&
4834 g.ToConstant(additional_pop_count).ToInt32() == 0) {
4835 // Canonicalize JSFunction return sites for now.
4836 if (return_label_.is_bound()) {
4837 __ jmp(&return_label_);
4838 return;
4839 } else {
4840 __ bind(&return_label_);
4841 }
4842 }
4843 if (drop_jsargs) {
4844 // Get the actual argument count.
4845 DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
4846 __ movq(argc_reg, Operand(rbp, StandardFrameConstants::kArgCOffset));
4847 }
4848 AssembleDeconstructFrame();
4849 }
4850
4851 if (drop_jsargs) {
4852 // We must pop all arguments from the stack (including the receiver).
4853 // The number of arguments without the receiver is
4854 // max(argc_reg, parameter_slots-1), and the receiver is added in
4855 // DropArguments().
4856 Label mismatch_return;
4857 Register scratch_reg = r10;
4858 DCHECK_NE(argc_reg, scratch_reg);
4859 DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
4860 DCHECK(!call_descriptor->CalleeSavedRegisters().has(argc_reg));
4861 __ cmpq(argc_reg, Immediate(parameter_slots));
4862 __ j(greater, &mismatch_return, Label::kNear);
4863 __ Ret(parameter_slots * kSystemPointerSize, scratch_reg);
4864 __ bind(&mismatch_return);
4865 __ DropArguments(argc_reg, scratch_reg, TurboAssembler::kCountIsInteger,
4866 TurboAssembler::kCountIncludesReceiver);
4867 // We use a return instead of a jump for better return address prediction.
4868 __ Ret();
4869 } else if (additional_pop_count->IsImmediate()) {
4870 Register scratch_reg = r10;
4871 DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
4872 int additional_count = g.ToConstant(additional_pop_count).ToInt32();
4873 size_t pop_size = (parameter_slots + additional_count) * kSystemPointerSize;
4874 CHECK_LE(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
4875 __ Ret(static_cast<int>(pop_size), scratch_reg);
4876 } else {
4877 Register pop_reg = g.ToRegister(additional_pop_count);
4878 Register scratch_reg = pop_reg == r10 ? rcx : r10;
4879 DCHECK(!call_descriptor->CalleeSavedRegisters().has(scratch_reg));
4880 DCHECK(!call_descriptor->CalleeSavedRegisters().has(pop_reg));
4881 int pop_size = static_cast<int>(parameter_slots * kSystemPointerSize);
4882 __ PopReturnAddressTo(scratch_reg);
4883 __ leaq(rsp, Operand(rsp, pop_reg, times_system_pointer_size,
4884 static_cast<int>(pop_size)));
4885 __ PushReturnAddressFrom(scratch_reg);
4886 __ Ret();
4887 }
4888 }
4889
FinishCode()4890 void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
4891
PrepareForDeoptimizationExits(ZoneDeque<DeoptimizationExit * > * exits)4892 void CodeGenerator::PrepareForDeoptimizationExits(
4893 ZoneDeque<DeoptimizationExit*>* exits) {}
4894
IncrementStackAccessCounter(InstructionOperand * source,InstructionOperand * destination)4895 void CodeGenerator::IncrementStackAccessCounter(
4896 InstructionOperand* source, InstructionOperand* destination) {
4897 DCHECK(FLAG_trace_turbo_stack_accesses);
4898 if (!info()->IsOptimizing()) {
4899 #if V8_ENABLE_WEBASSEMBLY
4900 if (!info()->IsWasm()) return;
4901 #else
4902 return;
4903 #endif // V8_ENABLE_WEBASSEMBLY
4904 }
4905 DCHECK_NOT_NULL(debug_name_);
4906 auto IncrementCounter = [&](ExternalReference counter) {
4907 __ incl(__ ExternalReferenceAsOperand(counter));
4908 };
4909 if (source->IsAnyStackSlot()) {
4910 IncrementCounter(
4911 ExternalReference::address_of_load_from_stack_count(debug_name_));
4912 }
4913 if (destination->IsAnyStackSlot()) {
4914 IncrementCounter(
4915 ExternalReference::address_of_store_to_stack_count(debug_name_));
4916 }
4917 }
4918
4919 namespace {
4920
Is32BitOperand(InstructionOperand * operand)4921 bool Is32BitOperand(InstructionOperand* operand) {
4922 DCHECK(operand->IsStackSlot() || operand->IsRegister());
4923 MachineRepresentation mr = LocationOperand::cast(operand)->representation();
4924 return mr == MachineRepresentation::kWord32 ||
4925 mr == MachineRepresentation::kCompressed ||
4926 mr == MachineRepresentation::kCompressedPointer;
4927 }
4928
4929 } // namespace
4930
AssembleMove(InstructionOperand * source,InstructionOperand * destination)4931 void CodeGenerator::AssembleMove(InstructionOperand* source,
4932 InstructionOperand* destination) {
4933 X64OperandConverter g(this, nullptr);
4934 // Helper function to write the given constant to the dst register.
4935 auto MoveConstantToRegister = [&](Register dst, Constant src) {
4936 switch (src.type()) {
4937 case Constant::kInt32: {
4938 if (RelocInfo::IsWasmReference(src.rmode())) {
4939 __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
4940 } else {
4941 int32_t value = src.ToInt32();
4942 if (value == 0) {
4943 __ xorl(dst, dst);
4944 } else {
4945 __ movl(dst, Immediate(value));
4946 }
4947 }
4948 break;
4949 }
4950 case Constant::kInt64:
4951 if (RelocInfo::IsWasmReference(src.rmode())) {
4952 __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
4953 } else {
4954 __ Move(dst, src.ToInt64());
4955 }
4956 break;
4957 case Constant::kFloat32:
4958 __ MoveNumber(dst, src.ToFloat32());
4959 break;
4960 case Constant::kFloat64:
4961 __ MoveNumber(dst, src.ToFloat64().value());
4962 break;
4963 case Constant::kExternalReference:
4964 __ Move(dst, src.ToExternalReference());
4965 break;
4966 case Constant::kHeapObject: {
4967 Handle<HeapObject> src_object = src.ToHeapObject();
4968 RootIndex index;
4969 if (IsMaterializableFromRoot(src_object, &index)) {
4970 __ LoadRoot(dst, index);
4971 } else {
4972 __ Move(dst, src_object);
4973 }
4974 break;
4975 }
4976 case Constant::kCompressedHeapObject: {
4977 Handle<HeapObject> src_object = src.ToHeapObject();
4978 RootIndex index;
4979 if (IsMaterializableFromRoot(src_object, &index)) {
4980 __ LoadRoot(dst, index);
4981 } else {
4982 __ Move(dst, src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT);
4983 }
4984 break;
4985 }
4986 case Constant::kDelayedStringConstant: {
4987 const StringConstantBase* src_constant = src.ToDelayedStringConstant();
4988 __ MoveStringConstant(dst, src_constant);
4989 break;
4990 }
4991 case Constant::kRpoNumber:
4992 UNREACHABLE(); // TODO(dcarney): load of labels on x64.
4993 }
4994 };
4995 // Helper function to write the given constant to the stack.
4996 auto MoveConstantToSlot = [&](Operand dst, Constant src) {
4997 if (!RelocInfo::IsWasmReference(src.rmode())) {
4998 switch (src.type()) {
4999 case Constant::kInt32:
5000 __ Move(dst, src.ToInt32());
5001 return;
5002 case Constant::kInt64:
5003 __ Move(dst, src.ToInt64());
5004 return;
5005 default:
5006 break;
5007 }
5008 }
5009 MoveConstantToRegister(kScratchRegister, src);
5010 __ movq(dst, kScratchRegister);
5011 };
5012
5013 if (FLAG_trace_turbo_stack_accesses) {
5014 IncrementStackAccessCounter(source, destination);
5015 }
5016
5017 // Dispatch on the source and destination operand kinds.
5018 switch (MoveType::InferMove(source, destination)) {
5019 case MoveType::kRegisterToRegister:
5020 if (source->IsRegister()) {
5021 __ movq(g.ToRegister(destination), g.ToRegister(source));
5022 } else {
5023 DCHECK(source->IsFPRegister());
5024 __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
5025 }
5026 return;
5027 case MoveType::kRegisterToStack: {
5028 Operand dst = g.ToOperand(destination);
5029 if (source->IsRegister()) {
5030 __ movq(dst, g.ToRegister(source));
5031 } else {
5032 DCHECK(source->IsFPRegister());
5033 XMMRegister src = g.ToDoubleRegister(source);
5034 MachineRepresentation rep =
5035 LocationOperand::cast(source)->representation();
5036 if (rep != MachineRepresentation::kSimd128) {
5037 __ Movsd(dst, src);
5038 } else {
5039 __ Movups(dst, src);
5040 }
5041 }
5042 return;
5043 }
5044 case MoveType::kStackToRegister: {
5045 Operand src = g.ToOperand(source);
5046 if (source->IsStackSlot()) {
5047 // TODO(13581): Fix this for other code kinds (see
5048 // https://crbug.com/1356461).
5049 if (code_kind() == CodeKind::WASM_FUNCTION && Is32BitOperand(source) &&
5050 Is32BitOperand(destination)) {
5051 // When we need only 32 bits, move only 32 bits. Benefits:
5052 // - Save a byte here and there (depending on the destination
5053 // register; "movl eax, ..." is smaller than "movq rax, ...").
5054 // - Safeguard against accidental decompression of compressed slots.
5055 // We must check both {source} and {destination} to be 32-bit values,
5056 // because treating 32-bit sources as 64-bit values can be perfectly
5057 // fine as a result of virtual register renaming (to avoid redundant
5058 // explicit zero-extensions that also happen implicitly).
5059 __ movl(g.ToRegister(destination), src);
5060 } else {
5061 __ movq(g.ToRegister(destination), src);
5062 }
5063 } else {
5064 DCHECK(source->IsFPStackSlot());
5065 XMMRegister dst = g.ToDoubleRegister(destination);
5066 MachineRepresentation rep =
5067 LocationOperand::cast(source)->representation();
5068 if (rep != MachineRepresentation::kSimd128) {
5069 __ Movsd(dst, src);
5070 } else {
5071 __ Movups(dst, src);
5072 }
5073 }
5074 return;
5075 }
5076 case MoveType::kStackToStack: {
5077 Operand src = g.ToOperand(source);
5078 Operand dst = g.ToOperand(destination);
5079 if (source->IsStackSlot()) {
5080 // Spill on demand to use a temporary register for memory-to-memory
5081 // moves.
5082 __ movq(kScratchRegister, src);
5083 __ movq(dst, kScratchRegister);
5084 } else {
5085 MachineRepresentation rep =
5086 LocationOperand::cast(source)->representation();
5087 if (rep != MachineRepresentation::kSimd128) {
5088 __ Movsd(kScratchDoubleReg, src);
5089 __ Movsd(dst, kScratchDoubleReg);
5090 } else {
5091 DCHECK(source->IsSimd128StackSlot());
5092 __ Movups(kScratchDoubleReg, src);
5093 __ Movups(dst, kScratchDoubleReg);
5094 }
5095 }
5096 return;
5097 }
5098 case MoveType::kConstantToRegister: {
5099 Constant src = g.ToConstant(source);
5100 if (destination->IsRegister()) {
5101 MoveConstantToRegister(g.ToRegister(destination), src);
5102 } else {
5103 DCHECK(destination->IsFPRegister());
5104 XMMRegister dst = g.ToDoubleRegister(destination);
5105 if (src.type() == Constant::kFloat32) {
5106 // TODO(turbofan): Can we do better here?
5107 __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
5108 } else {
5109 DCHECK_EQ(src.type(), Constant::kFloat64);
5110 __ Move(dst, src.ToFloat64().AsUint64());
5111 }
5112 }
5113 return;
5114 }
5115 case MoveType::kConstantToStack: {
5116 Constant src = g.ToConstant(source);
5117 Operand dst = g.ToOperand(destination);
5118 if (destination->IsStackSlot()) {
5119 MoveConstantToSlot(dst, src);
5120 } else {
5121 DCHECK(destination->IsFPStackSlot());
5122 if (src.type() == Constant::kFloat32) {
5123 __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
5124 } else {
5125 DCHECK_EQ(src.type(), Constant::kFloat64);
5126 __ Move(dst, src.ToFloat64().AsUint64());
5127 }
5128 }
5129 return;
5130 }
5131 }
5132 UNREACHABLE();
5133 }
5134
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)5135 void CodeGenerator::AssembleSwap(InstructionOperand* source,
5136 InstructionOperand* destination) {
5137 if (FLAG_trace_turbo_stack_accesses) {
5138 IncrementStackAccessCounter(source, destination);
5139 IncrementStackAccessCounter(destination, source);
5140 }
5141
5142 X64OperandConverter g(this, nullptr);
5143 // Dispatch on the source and destination operand kinds. Not all
5144 // combinations are possible.
5145 switch (MoveType::InferSwap(source, destination)) {
5146 case MoveType::kRegisterToRegister: {
5147 if (source->IsRegister()) {
5148 Register src = g.ToRegister(source);
5149 Register dst = g.ToRegister(destination);
5150 __ movq(kScratchRegister, src);
5151 __ movq(src, dst);
5152 __ movq(dst, kScratchRegister);
5153 } else {
5154 DCHECK(source->IsFPRegister());
5155 XMMRegister src = g.ToDoubleRegister(source);
5156 XMMRegister dst = g.ToDoubleRegister(destination);
5157 __ Movapd(kScratchDoubleReg, src);
5158 __ Movapd(src, dst);
5159 __ Movapd(dst, kScratchDoubleReg);
5160 }
5161 return;
5162 }
5163 case MoveType::kRegisterToStack: {
5164 if (source->IsRegister()) {
5165 Register src = g.ToRegister(source);
5166 Operand dst = g.ToOperand(destination);
5167 __ movq(kScratchRegister, src);
5168 __ movq(src, dst);
5169 __ movq(dst, kScratchRegister);
5170 } else {
5171 DCHECK(source->IsFPRegister());
5172 XMMRegister src = g.ToDoubleRegister(source);
5173 Operand dst = g.ToOperand(destination);
5174 MachineRepresentation rep =
5175 LocationOperand::cast(source)->representation();
5176 if (rep != MachineRepresentation::kSimd128) {
5177 __ Movsd(kScratchDoubleReg, src);
5178 __ Movsd(src, dst);
5179 __ Movsd(dst, kScratchDoubleReg);
5180 } else {
5181 __ Movups(kScratchDoubleReg, src);
5182 __ Movups(src, dst);
5183 __ Movups(dst, kScratchDoubleReg);
5184 }
5185 }
5186 return;
5187 }
5188 case MoveType::kStackToStack: {
5189 Operand src = g.ToOperand(source);
5190 Operand dst = g.ToOperand(destination);
5191 MachineRepresentation rep =
5192 LocationOperand::cast(source)->representation();
5193 if (rep != MachineRepresentation::kSimd128) {
5194 Register tmp = kScratchRegister;
5195 __ movq(tmp, dst);
5196 __ pushq(src); // Then use stack to copy src to destination.
5197 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5198 kSystemPointerSize);
5199 __ popq(dst);
5200 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5201 -kSystemPointerSize);
5202 __ movq(src, tmp);
5203 } else {
5204 // Without AVX, misaligned reads and writes will trap. Move using the
5205 // stack, in two parts.
5206 __ movups(kScratchDoubleReg, dst); // Save dst in scratch register.
5207 __ pushq(src); // Then use stack to copy src to destination.
5208 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5209 kSystemPointerSize);
5210 __ popq(dst);
5211 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5212 -kSystemPointerSize);
5213 __ pushq(g.ToOperand(source, kSystemPointerSize));
5214 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5215 kSystemPointerSize);
5216 __ popq(g.ToOperand(destination, kSystemPointerSize));
5217 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5218 -kSystemPointerSize);
5219 __ movups(src, kScratchDoubleReg);
5220 }
5221 return;
5222 }
5223 default:
5224 UNREACHABLE();
5225 }
5226 }
5227
AssembleJumpTable(Label ** targets,size_t target_count)5228 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
5229 for (size_t index = 0; index < target_count; ++index) {
5230 __ dq(targets[index]);
5231 }
5232 }
5233
5234 #undef __
5235
5236 } // namespace compiler
5237 } // namespace internal
5238 } // namespace v8
5239