1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_WASM_BASELINE_X64_LIFTOFF_ASSEMBLER_X64_H_
6 #define V8_WASM_BASELINE_X64_LIFTOFF_ASSEMBLER_X64_H_
7
8 #include "src/base/platform/wrappers.h"
9 #include "src/codegen/assembler.h"
10 #include "src/codegen/cpu-features.h"
11 #include "src/codegen/machine-type.h"
12 #include "src/codegen/x64/register-x64.h"
13 #include "src/heap/memory-chunk.h"
14 #include "src/wasm/baseline/liftoff-assembler.h"
15 #include "src/wasm/simd-shuffle.h"
16 #include "src/wasm/wasm-objects.h"
17
18 namespace v8 {
19 namespace internal {
20 namespace wasm {
21
22 #define RETURN_FALSE_IF_MISSING_CPU_FEATURE(name) \
23 if (!CpuFeatures::IsSupported(name)) return false; \
24 CpuFeatureScope feature(this, name);
25
26 namespace liftoff {
27
ToCondition(LiftoffCondition liftoff_cond)28 inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
29 switch (liftoff_cond) {
30 case kEqual:
31 return equal;
32 case kUnequal:
33 return not_equal;
34 case kSignedLessThan:
35 return less;
36 case kSignedLessEqual:
37 return less_equal;
38 case kSignedGreaterThan:
39 return greater;
40 case kSignedGreaterEqual:
41 return greater_equal;
42 case kUnsignedLessThan:
43 return below;
44 case kUnsignedLessEqual:
45 return below_equal;
46 case kUnsignedGreaterThan:
47 return above;
48 case kUnsignedGreaterEqual:
49 return above_equal;
50 }
51 }
52
53 constexpr Register kScratchRegister2 = r11;
54 static_assert(kScratchRegister != kScratchRegister2, "collision");
55 static_assert((kLiftoffAssemblerGpCacheRegs &
56 RegList{kScratchRegister, kScratchRegister2})
57 .is_empty(),
58 "scratch registers must not be used as cache registers");
59
60 constexpr DoubleRegister kScratchDoubleReg2 = xmm14;
61 static_assert(kScratchDoubleReg != kScratchDoubleReg2, "collision");
62 static_assert((kLiftoffAssemblerFpCacheRegs &
63 DoubleRegList{kScratchDoubleReg, kScratchDoubleReg2})
64 .is_empty(),
65 "scratch registers must not be used as cache registers");
66
67 // rbp-8 holds the stack marker, rbp-16 is the instance parameter.
68 constexpr int kInstanceOffset = 16;
69 constexpr int kFeedbackVectorOffset = 24; // rbp-24 is the feedback vector.
70 constexpr int kTierupBudgetOffset = 32; // rbp-32 is the feedback vector.
71
GetStackSlot(int offset)72 inline Operand GetStackSlot(int offset) { return Operand(rbp, -offset); }
73
74 // TODO(clemensb): Make this a constexpr variable once Operand is constexpr.
GetInstanceOperand()75 inline Operand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
76
GetOSRTargetSlot()77 inline Operand GetOSRTargetSlot() { return GetStackSlot(kOSRTargetOffset); }
78
GetMemOp(LiftoffAssembler * assm,Register addr,Register offset,uintptr_t offset_imm)79 inline Operand GetMemOp(LiftoffAssembler* assm, Register addr, Register offset,
80 uintptr_t offset_imm) {
81 if (is_uint31(offset_imm)) {
82 int32_t offset_imm32 = static_cast<int32_t>(offset_imm);
83 return offset == no_reg ? Operand(addr, offset_imm32)
84 : Operand(addr, offset, times_1, offset_imm32);
85 }
86 // Offset immediate does not fit in 31 bits.
87 Register scratch = kScratchRegister;
88 assm->TurboAssembler::Move(scratch, offset_imm);
89 if (offset != no_reg) assm->addq(scratch, offset);
90 return Operand(addr, scratch, times_1, 0);
91 }
92
Load(LiftoffAssembler * assm,LiftoffRegister dst,Operand src,ValueKind kind)93 inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, Operand src,
94 ValueKind kind) {
95 switch (kind) {
96 case kI32:
97 assm->movl(dst.gp(), src);
98 break;
99 case kI64:
100 case kOptRef:
101 case kRef:
102 case kRtt:
103 assm->movq(dst.gp(), src);
104 break;
105 case kF32:
106 assm->Movss(dst.fp(), src);
107 break;
108 case kF64:
109 assm->Movsd(dst.fp(), src);
110 break;
111 case kS128:
112 assm->Movdqu(dst.fp(), src);
113 break;
114 default:
115 UNREACHABLE();
116 }
117 }
118
Store(LiftoffAssembler * assm,Operand dst,LiftoffRegister src,ValueKind kind)119 inline void Store(LiftoffAssembler* assm, Operand dst, LiftoffRegister src,
120 ValueKind kind) {
121 switch (kind) {
122 case kI32:
123 assm->movl(dst, src.gp());
124 break;
125 case kI64:
126 assm->movq(dst, src.gp());
127 break;
128 case kOptRef:
129 case kRef:
130 case kRtt:
131 assm->StoreTaggedField(dst, src.gp());
132 break;
133 case kF32:
134 assm->Movss(dst, src.fp());
135 break;
136 case kF64:
137 assm->Movsd(dst, src.fp());
138 break;
139 case kS128:
140 assm->Movdqu(dst, src.fp());
141 break;
142 default:
143 UNREACHABLE();
144 }
145 }
146
147 inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueKind kind,
148 int padding = 0) {
149 switch (kind) {
150 case kI32:
151 case kI64:
152 case kRef:
153 case kOptRef:
154 assm->AllocateStackSpace(padding);
155 assm->pushq(reg.gp());
156 break;
157 case kF32:
158 assm->AllocateStackSpace(kSystemPointerSize + padding);
159 assm->Movss(Operand(rsp, 0), reg.fp());
160 break;
161 case kF64:
162 assm->AllocateStackSpace(kSystemPointerSize + padding);
163 assm->Movsd(Operand(rsp, 0), reg.fp());
164 break;
165 case kS128:
166 assm->AllocateStackSpace(kSystemPointerSize * 2 + padding);
167 assm->Movdqu(Operand(rsp, 0), reg.fp());
168 break;
169 default:
170 UNREACHABLE();
171 }
172 }
173
174 constexpr int kSubSpSize = 7; // 7 bytes for "subq rsp, <imm32>"
175
176 } // namespace liftoff
177
PrepareStackFrame()178 int LiftoffAssembler::PrepareStackFrame() {
179 int offset = pc_offset();
180 // Next we reserve the memory for the whole stack frame. We do not know yet
181 // how big the stack frame will be so we just emit a placeholder instruction.
182 // PatchPrepareStackFrame will patch this in order to increase the stack
183 // appropriately.
184 sub_sp_32(0);
185 DCHECK_EQ(liftoff::kSubSpSize, pc_offset() - offset);
186 return offset;
187 }
188
PrepareTailCall(int num_callee_stack_params,int stack_param_delta)189 void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params,
190 int stack_param_delta) {
191 // Push the return address and frame pointer to complete the stack frame.
192 pushq(Operand(rbp, 8));
193 pushq(Operand(rbp, 0));
194
195 // Shift the whole frame upwards.
196 const int slot_count = num_callee_stack_params + 2;
197 for (int i = slot_count - 1; i >= 0; --i) {
198 movq(kScratchRegister, Operand(rsp, i * 8));
199 movq(Operand(rbp, (i - stack_param_delta) * 8), kScratchRegister);
200 }
201
202 // Set the new stack and frame pointer.
203 leaq(rsp, Operand(rbp, -stack_param_delta * 8));
204 popq(rbp);
205 }
206
AlignFrameSize()207 void LiftoffAssembler::AlignFrameSize() {
208 max_used_spill_offset_ = RoundUp(max_used_spill_offset_, kSystemPointerSize);
209 }
210
PatchPrepareStackFrame(int offset,SafepointTableBuilder * safepoint_table_builder)211 void LiftoffAssembler::PatchPrepareStackFrame(
212 int offset, SafepointTableBuilder* safepoint_table_builder) {
213 // The frame_size includes the frame marker and the instance slot. Both are
214 // pushed as part of frame construction, so we don't need to allocate memory
215 // for them anymore.
216 int frame_size = GetTotalFrameSize() - 2 * kSystemPointerSize;
217 DCHECK_EQ(0, frame_size % kSystemPointerSize);
218
219 // We can't run out of space when patching, just pass anything big enough to
220 // not cause the assembler to try to grow the buffer.
221 constexpr int kAvailableSpace = 64;
222 Assembler patching_assembler(
223 AssemblerOptions{},
224 ExternalAssemblerBuffer(buffer_start_ + offset, kAvailableSpace));
225
226 if (V8_LIKELY(frame_size < 4 * KB)) {
227 // This is the standard case for small frames: just subtract from SP and be
228 // done with it.
229 patching_assembler.sub_sp_32(frame_size);
230 DCHECK_EQ(liftoff::kSubSpSize, patching_assembler.pc_offset());
231 return;
232 }
233
234 // The frame size is bigger than 4KB, so we might overflow the available stack
235 // space if we first allocate the frame and then do the stack check (we will
236 // need some remaining stack space for throwing the exception). That's why we
237 // check the available stack space before we allocate the frame. To do this we
238 // replace the {__ sub(sp, framesize)} with a jump to OOL code that does this
239 // "extended stack check".
240 //
241 // The OOL code can simply be generated here with the normal assembler,
242 // because all other code generation, including OOL code, has already finished
243 // when {PatchPrepareStackFrame} is called. The function prologue then jumps
244 // to the current {pc_offset()} to execute the OOL code for allocating the
245 // large frame.
246
247 // Emit the unconditional branch in the function prologue (from {offset} to
248 // {pc_offset()}).
249 patching_assembler.jmp_rel(pc_offset() - offset);
250 DCHECK_GE(liftoff::kSubSpSize, patching_assembler.pc_offset());
251 patching_assembler.Nop(liftoff::kSubSpSize - patching_assembler.pc_offset());
252
253 // If the frame is bigger than the stack, we throw the stack overflow
254 // exception unconditionally. Thereby we can avoid the integer overflow
255 // check in the condition code.
256 RecordComment("OOL: stack check for large frame");
257 Label continuation;
258 if (frame_size < FLAG_stack_size * 1024) {
259 movq(kScratchRegister,
260 FieldOperand(kWasmInstanceRegister,
261 WasmInstanceObject::kRealStackLimitAddressOffset));
262 movq(kScratchRegister, Operand(kScratchRegister, 0));
263 addq(kScratchRegister, Immediate(frame_size));
264 cmpq(rsp, kScratchRegister);
265 j(above_equal, &continuation, Label::kNear);
266 }
267
268 near_call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
269 // The call will not return; just define an empty safepoint.
270 safepoint_table_builder->DefineSafepoint(this);
271 AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
272
273 bind(&continuation);
274
275 // Now allocate the stack space. Note that this might do more than just
276 // decrementing the SP; consult {TurboAssembler::AllocateStackSpace}.
277 AllocateStackSpace(frame_size);
278
279 // Jump back to the start of the function, from {pc_offset()} to
280 // right after the reserved space for the {__ sub(sp, sp, framesize)} (which
281 // is a branch now).
282 int func_start_offset = offset + liftoff::kSubSpSize;
283 jmp_rel(func_start_offset - pc_offset());
284 }
285
FinishCode()286 void LiftoffAssembler::FinishCode() {}
287
AbortCompilation()288 void LiftoffAssembler::AbortCompilation() {}
289
290 // static
StaticStackFrameSize()291 constexpr int LiftoffAssembler::StaticStackFrameSize() {
292 return kOSRTargetOffset;
293 }
294
SlotSizeForType(ValueKind kind)295 int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
296 return value_kind_full_size(kind);
297 }
298
NeedsAlignment(ValueKind kind)299 bool LiftoffAssembler::NeedsAlignment(ValueKind kind) {
300 return is_reference(kind);
301 }
302
LoadConstant(LiftoffRegister reg,WasmValue value,RelocInfo::Mode rmode)303 void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
304 RelocInfo::Mode rmode) {
305 switch (value.type().kind()) {
306 case kI32:
307 if (value.to_i32() == 0 && RelocInfo::IsNoInfo(rmode)) {
308 xorl(reg.gp(), reg.gp());
309 } else {
310 movl(reg.gp(), Immediate(value.to_i32(), rmode));
311 }
312 break;
313 case kI64:
314 if (RelocInfo::IsNoInfo(rmode)) {
315 TurboAssembler::Move(reg.gp(), value.to_i64());
316 } else {
317 movq(reg.gp(), Immediate64(value.to_i64(), rmode));
318 }
319 break;
320 case kF32:
321 TurboAssembler::Move(reg.fp(), value.to_f32_boxed().get_bits());
322 break;
323 case kF64:
324 TurboAssembler::Move(reg.fp(), value.to_f64_boxed().get_bits());
325 break;
326 default:
327 UNREACHABLE();
328 }
329 }
330
LoadInstanceFromFrame(Register dst)331 void LiftoffAssembler::LoadInstanceFromFrame(Register dst) {
332 movq(dst, liftoff::GetInstanceOperand());
333 }
334
LoadFromInstance(Register dst,Register instance,int offset,int size)335 void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
336 int offset, int size) {
337 DCHECK_LE(0, offset);
338 Operand src{instance, offset};
339 switch (size) {
340 case 1:
341 movzxbl(dst, src);
342 break;
343 case 4:
344 movl(dst, src);
345 break;
346 case 8:
347 movq(dst, src);
348 break;
349 default:
350 UNIMPLEMENTED();
351 }
352 }
353
LoadTaggedPointerFromInstance(Register dst,Register instance,int offset)354 void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
355 Register instance,
356 int offset) {
357 DCHECK_LE(0, offset);
358 LoadTaggedPointerField(dst, Operand(instance, offset));
359 }
360
LoadExternalPointer(Register dst,Register instance,int offset,ExternalPointerTag tag,Register isolate_root)361 void LiftoffAssembler::LoadExternalPointer(Register dst, Register instance,
362 int offset, ExternalPointerTag tag,
363 Register isolate_root) {
364 LoadExternalPointerField(dst, FieldOperand(instance, offset), tag,
365 isolate_root,
366 IsolateRootLocation::kInScratchRegister);
367 }
368
SpillInstance(Register instance)369 void LiftoffAssembler::SpillInstance(Register instance) {
370 movq(liftoff::GetInstanceOperand(), instance);
371 }
372
ResetOSRTarget()373 void LiftoffAssembler::ResetOSRTarget() {
374 movq(liftoff::GetOSRTargetSlot(), Immediate(0));
375 }
376
LoadTaggedPointer(Register dst,Register src_addr,Register offset_reg,int32_t offset_imm,LiftoffRegList pinned)377 void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
378 Register offset_reg,
379 int32_t offset_imm,
380 LiftoffRegList pinned) {
381 DCHECK_GE(offset_imm, 0);
382 if (FLAG_debug_code && offset_reg != no_reg) {
383 AssertZeroExtended(offset_reg);
384 }
385 Operand src_op = liftoff::GetMemOp(this, src_addr, offset_reg,
386 static_cast<uint32_t>(offset_imm));
387 LoadTaggedPointerField(dst, src_op);
388 }
389
LoadFullPointer(Register dst,Register src_addr,int32_t offset_imm)390 void LiftoffAssembler::LoadFullPointer(Register dst, Register src_addr,
391 int32_t offset_imm) {
392 Operand src_op = liftoff::GetMemOp(this, src_addr, no_reg,
393 static_cast<uint32_t>(offset_imm));
394 movq(dst, src_op);
395 }
396
StoreTaggedPointer(Register dst_addr,Register offset_reg,int32_t offset_imm,LiftoffRegister src,LiftoffRegList pinned,SkipWriteBarrier skip_write_barrier)397 void LiftoffAssembler::StoreTaggedPointer(Register dst_addr,
398 Register offset_reg,
399 int32_t offset_imm,
400 LiftoffRegister src,
401 LiftoffRegList pinned,
402 SkipWriteBarrier skip_write_barrier) {
403 DCHECK_GE(offset_imm, 0);
404 Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg,
405 static_cast<uint32_t>(offset_imm));
406 StoreTaggedField(dst_op, src.gp());
407
408 if (skip_write_barrier || FLAG_disable_write_barriers) return;
409
410 Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
411 Label write_barrier;
412 Label exit;
413 CheckPageFlag(dst_addr, scratch,
414 MemoryChunk::kPointersFromHereAreInterestingMask, not_zero,
415 &write_barrier, Label::kNear);
416 jmp(&exit, Label::kNear);
417 bind(&write_barrier);
418 JumpIfSmi(src.gp(), &exit, Label::kNear);
419 if (COMPRESS_POINTERS_BOOL) {
420 DecompressTaggedPointer(src.gp(), src.gp());
421 }
422 CheckPageFlag(src.gp(), scratch,
423 MemoryChunk::kPointersToHereAreInterestingMask, zero, &exit,
424 Label::kNear);
425 leaq(scratch, dst_op);
426
427 CallRecordWriteStubSaveRegisters(
428 dst_addr, scratch, RememberedSetAction::kEmit, SaveFPRegsMode::kSave,
429 StubCallMode::kCallWasmRuntimeStub);
430 bind(&exit);
431 }
432
AtomicLoad(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LiftoffRegList pinned)433 void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
434 Register offset_reg, uintptr_t offset_imm,
435 LoadType type, LiftoffRegList pinned) {
436 Load(dst, src_addr, offset_reg, offset_imm, type, pinned, nullptr, true);
437 }
438
Load(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LiftoffRegList pinned,uint32_t * protected_load_pc,bool is_load_mem,bool i64_offset)439 void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
440 Register offset_reg, uintptr_t offset_imm,
441 LoadType type, LiftoffRegList pinned,
442 uint32_t* protected_load_pc, bool is_load_mem,
443 bool i64_offset) {
444 if (offset_reg != no_reg && !i64_offset) {
445 AssertZeroExtended(offset_reg);
446 }
447 Operand src_op = liftoff::GetMemOp(this, src_addr, offset_reg, offset_imm);
448 if (protected_load_pc) *protected_load_pc = pc_offset();
449 switch (type.value()) {
450 case LoadType::kI32Load8U:
451 case LoadType::kI64Load8U:
452 movzxbl(dst.gp(), src_op);
453 break;
454 case LoadType::kI32Load8S:
455 movsxbl(dst.gp(), src_op);
456 break;
457 case LoadType::kI64Load8S:
458 movsxbq(dst.gp(), src_op);
459 break;
460 case LoadType::kI32Load16U:
461 case LoadType::kI64Load16U:
462 movzxwl(dst.gp(), src_op);
463 break;
464 case LoadType::kI32Load16S:
465 movsxwl(dst.gp(), src_op);
466 break;
467 case LoadType::kI64Load16S:
468 movsxwq(dst.gp(), src_op);
469 break;
470 case LoadType::kI32Load:
471 case LoadType::kI64Load32U:
472 movl(dst.gp(), src_op);
473 break;
474 case LoadType::kI64Load32S:
475 movsxlq(dst.gp(), src_op);
476 break;
477 case LoadType::kI64Load:
478 movq(dst.gp(), src_op);
479 break;
480 case LoadType::kF32Load:
481 Movss(dst.fp(), src_op);
482 break;
483 case LoadType::kF64Load:
484 Movsd(dst.fp(), src_op);
485 break;
486 case LoadType::kS128Load:
487 Movdqu(dst.fp(), src_op);
488 break;
489 }
490 }
491
Store(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList,uint32_t * protected_store_pc,bool is_store_mem)492 void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
493 uintptr_t offset_imm, LiftoffRegister src,
494 StoreType type, LiftoffRegList /* pinned */,
495 uint32_t* protected_store_pc, bool is_store_mem) {
496 Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
497 if (protected_store_pc) *protected_store_pc = pc_offset();
498 switch (type.value()) {
499 case StoreType::kI32Store8:
500 case StoreType::kI64Store8:
501 movb(dst_op, src.gp());
502 break;
503 case StoreType::kI32Store16:
504 case StoreType::kI64Store16:
505 movw(dst_op, src.gp());
506 break;
507 case StoreType::kI32Store:
508 case StoreType::kI64Store32:
509 movl(dst_op, src.gp());
510 break;
511 case StoreType::kI64Store:
512 movq(dst_op, src.gp());
513 break;
514 case StoreType::kF32Store:
515 Movss(dst_op, src.fp());
516 break;
517 case StoreType::kF64Store:
518 Movsd(dst_op, src.fp());
519 break;
520 case StoreType::kS128Store:
521 Movdqu(dst_op, src.fp());
522 break;
523 }
524 }
525
AtomicStore(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList pinned)526 void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
527 uintptr_t offset_imm, LiftoffRegister src,
528 StoreType type, LiftoffRegList pinned) {
529 Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
530 Register src_reg = src.gp();
531 if (cache_state()->is_used(src)) {
532 movq(kScratchRegister, src_reg);
533 src_reg = kScratchRegister;
534 }
535 switch (type.value()) {
536 case StoreType::kI32Store8:
537 case StoreType::kI64Store8:
538 xchgb(src_reg, dst_op);
539 break;
540 case StoreType::kI32Store16:
541 case StoreType::kI64Store16:
542 xchgw(src_reg, dst_op);
543 break;
544 case StoreType::kI32Store:
545 case StoreType::kI64Store32:
546 xchgl(src_reg, dst_op);
547 break;
548 case StoreType::kI64Store:
549 xchgq(src_reg, dst_op);
550 break;
551 default:
552 UNREACHABLE();
553 }
554 }
555
AtomicAdd(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)556 void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
557 uintptr_t offset_imm, LiftoffRegister value,
558 LiftoffRegister result, StoreType type) {
559 DCHECK(!cache_state()->is_used(result));
560 if (cache_state()->is_used(value)) {
561 // We cannot overwrite {value}, but the {value} register is changed in the
562 // code we generate. Therefore we copy {value} to {result} and use the
563 // {result} register in the code below.
564 movq(result.gp(), value.gp());
565 value = result;
566 }
567 Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
568 lock();
569 switch (type.value()) {
570 case StoreType::kI32Store8:
571 case StoreType::kI64Store8:
572 xaddb(dst_op, value.gp());
573 movzxbq(result.gp(), value.gp());
574 break;
575 case StoreType::kI32Store16:
576 case StoreType::kI64Store16:
577 xaddw(dst_op, value.gp());
578 movzxwq(result.gp(), value.gp());
579 break;
580 case StoreType::kI32Store:
581 case StoreType::kI64Store32:
582 xaddl(dst_op, value.gp());
583 if (value != result) {
584 movq(result.gp(), value.gp());
585 }
586 break;
587 case StoreType::kI64Store:
588 xaddq(dst_op, value.gp());
589 if (value != result) {
590 movq(result.gp(), value.gp());
591 }
592 break;
593 default:
594 UNREACHABLE();
595 }
596 }
597
AtomicSub(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)598 void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
599 uintptr_t offset_imm, LiftoffRegister value,
600 LiftoffRegister result, StoreType type) {
601 LiftoffRegList dont_overwrite =
602 cache_state()->used_registers | LiftoffRegList{dst_addr, offset_reg};
603 DCHECK(!dont_overwrite.has(result));
604 if (dont_overwrite.has(value)) {
605 // We cannot overwrite {value}, but the {value} register is changed in the
606 // code we generate. Therefore we copy {value} to {result} and use the
607 // {result} register in the code below.
608 movq(result.gp(), value.gp());
609 value = result;
610 }
611 Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
612 switch (type.value()) {
613 case StoreType::kI32Store8:
614 case StoreType::kI64Store8:
615 negb(value.gp());
616 lock();
617 xaddb(dst_op, value.gp());
618 movzxbq(result.gp(), value.gp());
619 break;
620 case StoreType::kI32Store16:
621 case StoreType::kI64Store16:
622 negw(value.gp());
623 lock();
624 xaddw(dst_op, value.gp());
625 movzxwq(result.gp(), value.gp());
626 break;
627 case StoreType::kI32Store:
628 case StoreType::kI64Store32:
629 negl(value.gp());
630 lock();
631 xaddl(dst_op, value.gp());
632 if (value != result) {
633 movq(result.gp(), value.gp());
634 }
635 break;
636 case StoreType::kI64Store:
637 negq(value.gp());
638 lock();
639 xaddq(dst_op, value.gp());
640 if (value != result) {
641 movq(result.gp(), value.gp());
642 }
643 break;
644 default:
645 UNREACHABLE();
646 }
647 }
648
649 namespace liftoff {
650 #define __ lasm->
651
AtomicBinop(LiftoffAssembler * lasm,void (Assembler::* opl)(Register,Register),void (Assembler::* opq)(Register,Register),Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)652 inline void AtomicBinop(LiftoffAssembler* lasm,
653 void (Assembler::*opl)(Register, Register),
654 void (Assembler::*opq)(Register, Register),
655 Register dst_addr, Register offset_reg,
656 uintptr_t offset_imm, LiftoffRegister value,
657 LiftoffRegister result, StoreType type) {
658 DCHECK(!__ cache_state()->is_used(result));
659 Register value_reg = value.gp();
660 // The cmpxchg instruction uses rax to store the old value of the
661 // compare-exchange primitive. Therefore we have to spill the register and
662 // move any use to another register.
663 LiftoffRegList pinned = LiftoffRegList{dst_addr, offset_reg, value_reg};
664 __ ClearRegister(rax, {&dst_addr, &offset_reg, &value_reg}, pinned);
665 Operand dst_op = liftoff::GetMemOp(lasm, dst_addr, offset_reg, offset_imm);
666
667 switch (type.value()) {
668 case StoreType::kI32Store8:
669 case StoreType::kI64Store8: {
670 Label binop;
671 __ xorq(rax, rax);
672 __ movb(rax, dst_op);
673 __ bind(&binop);
674 __ movl(kScratchRegister, rax);
675 (lasm->*opl)(kScratchRegister, value_reg);
676 __ lock();
677 __ cmpxchgb(dst_op, kScratchRegister);
678 __ j(not_equal, &binop);
679 break;
680 }
681 case StoreType::kI32Store16:
682 case StoreType::kI64Store16: {
683 Label binop;
684 __ xorq(rax, rax);
685 __ movw(rax, dst_op);
686 __ bind(&binop);
687 __ movl(kScratchRegister, rax);
688 (lasm->*opl)(kScratchRegister, value_reg);
689 __ lock();
690 __ cmpxchgw(dst_op, kScratchRegister);
691 __ j(not_equal, &binop);
692 break;
693 }
694 case StoreType::kI32Store:
695 case StoreType::kI64Store32: {
696 Label binop;
697 __ movl(rax, dst_op);
698 __ bind(&binop);
699 __ movl(kScratchRegister, rax);
700 (lasm->*opl)(kScratchRegister, value_reg);
701 __ lock();
702 __ cmpxchgl(dst_op, kScratchRegister);
703 __ j(not_equal, &binop);
704 break;
705 }
706 case StoreType::kI64Store: {
707 Label binop;
708 __ movq(rax, dst_op);
709 __ bind(&binop);
710 __ movq(kScratchRegister, rax);
711 (lasm->*opq)(kScratchRegister, value_reg);
712 __ lock();
713 __ cmpxchgq(dst_op, kScratchRegister);
714 __ j(not_equal, &binop);
715 break;
716 }
717 default:
718 UNREACHABLE();
719 }
720
721 if (result.gp() != rax) {
722 __ movq(result.gp(), rax);
723 }
724 }
725 #undef __
726 } // namespace liftoff
727
AtomicAnd(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)728 void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
729 uintptr_t offset_imm, LiftoffRegister value,
730 LiftoffRegister result, StoreType type) {
731 liftoff::AtomicBinop(this, &Assembler::andl, &Assembler::andq, dst_addr,
732 offset_reg, offset_imm, value, result, type);
733 }
734
AtomicOr(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)735 void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
736 uintptr_t offset_imm, LiftoffRegister value,
737 LiftoffRegister result, StoreType type) {
738 liftoff::AtomicBinop(this, &Assembler::orl, &Assembler::orq, dst_addr,
739 offset_reg, offset_imm, value, result, type);
740 }
741
AtomicXor(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)742 void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
743 uintptr_t offset_imm, LiftoffRegister value,
744 LiftoffRegister result, StoreType type) {
745 liftoff::AtomicBinop(this, &Assembler::xorl, &Assembler::xorq, dst_addr,
746 offset_reg, offset_imm, value, result, type);
747 }
748
AtomicExchange(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)749 void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
750 uintptr_t offset_imm,
751 LiftoffRegister value,
752 LiftoffRegister result, StoreType type) {
753 DCHECK(!cache_state()->is_used(result));
754 if (cache_state()->is_used(value)) {
755 // We cannot overwrite {value}, but the {value} register is changed in the
756 // code we generate. Therefore we copy {value} to {result} and use the
757 // {result} register in the code below.
758 movq(result.gp(), value.gp());
759 value = result;
760 }
761 Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
762 switch (type.value()) {
763 case StoreType::kI32Store8:
764 case StoreType::kI64Store8:
765 xchgb(value.gp(), dst_op);
766 movzxbq(result.gp(), value.gp());
767 break;
768 case StoreType::kI32Store16:
769 case StoreType::kI64Store16:
770 xchgw(value.gp(), dst_op);
771 movzxwq(result.gp(), value.gp());
772 break;
773 case StoreType::kI32Store:
774 case StoreType::kI64Store32:
775 xchgl(value.gp(), dst_op);
776 if (value != result) {
777 movq(result.gp(), value.gp());
778 }
779 break;
780 case StoreType::kI64Store:
781 xchgq(value.gp(), dst_op);
782 if (value != result) {
783 movq(result.gp(), value.gp());
784 }
785 break;
786 default:
787 UNREACHABLE();
788 }
789 }
790
AtomicCompareExchange(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister expected,LiftoffRegister new_value,LiftoffRegister result,StoreType type)791 void LiftoffAssembler::AtomicCompareExchange(
792 Register dst_addr, Register offset_reg, uintptr_t offset_imm,
793 LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
794 StoreType type) {
795 Register value_reg = new_value.gp();
796 // The cmpxchg instruction uses rax to store the old value of the
797 // compare-exchange primitive. Therefore we have to spill the register and
798 // move any use to another register.
799 LiftoffRegList pinned =
800 LiftoffRegList{dst_addr, offset_reg, expected, value_reg};
801 ClearRegister(rax, {&dst_addr, &offset_reg, &value_reg}, pinned);
802 if (expected.gp() != rax) {
803 movq(rax, expected.gp());
804 }
805
806 Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
807
808 lock();
809 switch (type.value()) {
810 case StoreType::kI32Store8:
811 case StoreType::kI64Store8: {
812 cmpxchgb(dst_op, value_reg);
813 movzxbq(result.gp(), rax);
814 break;
815 }
816 case StoreType::kI32Store16:
817 case StoreType::kI64Store16: {
818 cmpxchgw(dst_op, value_reg);
819 movzxwq(result.gp(), rax);
820 break;
821 }
822 case StoreType::kI32Store: {
823 cmpxchgl(dst_op, value_reg);
824 if (result.gp() != rax) {
825 movl(result.gp(), rax);
826 }
827 break;
828 }
829 case StoreType::kI64Store32: {
830 cmpxchgl(dst_op, value_reg);
831 // Zero extension.
832 movl(result.gp(), rax);
833 break;
834 }
835 case StoreType::kI64Store: {
836 cmpxchgq(dst_op, value_reg);
837 if (result.gp() != rax) {
838 movq(result.gp(), rax);
839 }
840 break;
841 }
842 default:
843 UNREACHABLE();
844 }
845 }
846
AtomicFence()847 void LiftoffAssembler::AtomicFence() { mfence(); }
848
LoadCallerFrameSlot(LiftoffRegister dst,uint32_t caller_slot_idx,ValueKind kind)849 void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
850 uint32_t caller_slot_idx,
851 ValueKind kind) {
852 Operand src(rbp, kSystemPointerSize * (caller_slot_idx + 1));
853 liftoff::Load(this, dst, src, kind);
854 }
855
StoreCallerFrameSlot(LiftoffRegister src,uint32_t caller_slot_idx,ValueKind kind)856 void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
857 uint32_t caller_slot_idx,
858 ValueKind kind) {
859 Operand dst(rbp, kSystemPointerSize * (caller_slot_idx + 1));
860 liftoff::Store(this, dst, src, kind);
861 }
862
LoadReturnStackSlot(LiftoffRegister reg,int offset,ValueKind kind)863 void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister reg, int offset,
864 ValueKind kind) {
865 Operand src(rsp, offset);
866 liftoff::Load(this, reg, src, kind);
867 }
868
MoveStackValue(uint32_t dst_offset,uint32_t src_offset,ValueKind kind)869 void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
870 ValueKind kind) {
871 DCHECK_NE(dst_offset, src_offset);
872 Operand dst = liftoff::GetStackSlot(dst_offset);
873 Operand src = liftoff::GetStackSlot(src_offset);
874 switch (SlotSizeForType(kind)) {
875 case 4:
876 movl(kScratchRegister, src);
877 movl(dst, kScratchRegister);
878 break;
879 case 8:
880 movq(kScratchRegister, src);
881 movq(dst, kScratchRegister);
882 break;
883 case 16:
884 Movdqu(kScratchDoubleReg, src);
885 Movdqu(dst, kScratchDoubleReg);
886 break;
887 default:
888 UNREACHABLE();
889 }
890 }
891
Move(Register dst,Register src,ValueKind kind)892 void LiftoffAssembler::Move(Register dst, Register src, ValueKind kind) {
893 DCHECK_NE(dst, src);
894 if (kind == kI32) {
895 movl(dst, src);
896 } else {
897 DCHECK(kI64 == kind || is_reference(kind));
898 movq(dst, src);
899 }
900 }
901
Move(DoubleRegister dst,DoubleRegister src,ValueKind kind)902 void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
903 ValueKind kind) {
904 DCHECK_NE(dst, src);
905 if (kind == kF32) {
906 Movss(dst, src);
907 } else if (kind == kF64) {
908 Movsd(dst, src);
909 } else {
910 DCHECK_EQ(kS128, kind);
911 Movapd(dst, src);
912 }
913 }
914
Spill(int offset,LiftoffRegister reg,ValueKind kind)915 void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueKind kind) {
916 RecordUsedSpillOffset(offset);
917 Operand dst = liftoff::GetStackSlot(offset);
918 switch (kind) {
919 case kI32:
920 movl(dst, reg.gp());
921 break;
922 case kI64:
923 case kOptRef:
924 case kRef:
925 case kRtt:
926 movq(dst, reg.gp());
927 break;
928 case kF32:
929 Movss(dst, reg.fp());
930 break;
931 case kF64:
932 Movsd(dst, reg.fp());
933 break;
934 case kS128:
935 Movdqu(dst, reg.fp());
936 break;
937 default:
938 UNREACHABLE();
939 }
940 }
941
Spill(int offset,WasmValue value)942 void LiftoffAssembler::Spill(int offset, WasmValue value) {
943 RecordUsedSpillOffset(offset);
944 Operand dst = liftoff::GetStackSlot(offset);
945 switch (value.type().kind()) {
946 case kI32:
947 movl(dst, Immediate(value.to_i32()));
948 break;
949 case kI64: {
950 if (is_int32(value.to_i64())) {
951 // Sign extend low word.
952 movq(dst, Immediate(static_cast<int32_t>(value.to_i64())));
953 } else if (is_uint32(value.to_i64())) {
954 // Zero extend low word.
955 movl(kScratchRegister, Immediate(static_cast<int32_t>(value.to_i64())));
956 movq(dst, kScratchRegister);
957 } else {
958 movq(kScratchRegister, value.to_i64());
959 movq(dst, kScratchRegister);
960 }
961 break;
962 }
963 default:
964 // We do not track f32 and f64 constants, hence they are unreachable.
965 UNREACHABLE();
966 }
967 }
968
Fill(LiftoffRegister reg,int offset,ValueKind kind)969 void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueKind kind) {
970 liftoff::Load(this, reg, liftoff::GetStackSlot(offset), kind);
971 }
972
FillI64Half(Register,int offset,RegPairHalf)973 void LiftoffAssembler::FillI64Half(Register, int offset, RegPairHalf) {
974 UNREACHABLE();
975 }
976
FillStackSlotsWithZero(int start,int size)977 void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
978 DCHECK_LT(0, size);
979 RecordUsedSpillOffset(start + size);
980
981 if (size <= 3 * kStackSlotSize) {
982 // Special straight-line code for up to three slots
983 // (7-10 bytes per slot: REX C7 <1-4 bytes op> <4 bytes imm>),
984 // And a movd (6-9 byte) when size % 8 != 0;
985 uint32_t remainder = size;
986 for (; remainder >= kStackSlotSize; remainder -= kStackSlotSize) {
987 movq(liftoff::GetStackSlot(start + remainder), Immediate(0));
988 }
989 DCHECK(remainder == 4 || remainder == 0);
990 if (remainder) {
991 movl(liftoff::GetStackSlot(start + remainder), Immediate(0));
992 }
993 } else {
994 // General case for bigger counts.
995 // This sequence takes 19-22 bytes (3 for pushes, 4-7 for lea, 2 for xor, 5
996 // for mov, 2 for repstosl, 3 for pops).
997 pushq(rax);
998 pushq(rcx);
999 pushq(rdi);
1000 leaq(rdi, liftoff::GetStackSlot(start + size));
1001 xorl(rax, rax);
1002 // Convert size (bytes) to doublewords (4-bytes).
1003 movl(rcx, Immediate(size / 4));
1004 repstosl();
1005 popq(rdi);
1006 popq(rcx);
1007 popq(rax);
1008 }
1009 }
1010
emit_i32_add(Register dst,Register lhs,Register rhs)1011 void LiftoffAssembler::emit_i32_add(Register dst, Register lhs, Register rhs) {
1012 if (lhs != dst) {
1013 leal(dst, Operand(lhs, rhs, times_1, 0));
1014 } else {
1015 addl(dst, rhs);
1016 }
1017 }
1018
emit_i32_addi(Register dst,Register lhs,int32_t imm)1019 void LiftoffAssembler::emit_i32_addi(Register dst, Register lhs, int32_t imm) {
1020 if (lhs != dst) {
1021 leal(dst, Operand(lhs, imm));
1022 } else {
1023 addl(dst, Immediate(imm));
1024 }
1025 }
1026
emit_i32_sub(Register dst,Register lhs,Register rhs)1027 void LiftoffAssembler::emit_i32_sub(Register dst, Register lhs, Register rhs) {
1028 if (dst != rhs) {
1029 // Default path.
1030 if (dst != lhs) movl(dst, lhs);
1031 subl(dst, rhs);
1032 } else if (lhs == rhs) {
1033 // Degenerate case.
1034 xorl(dst, dst);
1035 } else {
1036 // Emit {dst = lhs + -rhs} if dst == rhs.
1037 negl(dst);
1038 addl(dst, lhs);
1039 }
1040 }
1041
emit_i32_subi(Register dst,Register lhs,int32_t imm)1042 void LiftoffAssembler::emit_i32_subi(Register dst, Register lhs, int32_t imm) {
1043 if (dst != lhs) {
1044 // We'll have to implement an UB-safe version if we need this corner case.
1045 DCHECK_NE(imm, kMinInt);
1046 leal(dst, Operand(lhs, -imm));
1047 } else {
1048 subl(dst, Immediate(imm));
1049 }
1050 }
1051
1052 namespace liftoff {
1053 template <void (Assembler::*op)(Register, Register),
1054 void (Assembler::*mov)(Register, Register)>
EmitCommutativeBinOp(LiftoffAssembler * assm,Register dst,Register lhs,Register rhs)1055 void EmitCommutativeBinOp(LiftoffAssembler* assm, Register dst, Register lhs,
1056 Register rhs) {
1057 if (dst == rhs) {
1058 (assm->*op)(dst, lhs);
1059 } else {
1060 if (dst != lhs) (assm->*mov)(dst, lhs);
1061 (assm->*op)(dst, rhs);
1062 }
1063 }
1064
1065 template <void (Assembler::*op)(Register, Immediate),
1066 void (Assembler::*mov)(Register, Register)>
EmitCommutativeBinOpImm(LiftoffAssembler * assm,Register dst,Register lhs,int32_t imm)1067 void EmitCommutativeBinOpImm(LiftoffAssembler* assm, Register dst, Register lhs,
1068 int32_t imm) {
1069 if (dst != lhs) (assm->*mov)(dst, lhs);
1070 (assm->*op)(dst, Immediate(imm));
1071 }
1072
1073 } // namespace liftoff
1074
emit_i32_mul(Register dst,Register lhs,Register rhs)1075 void LiftoffAssembler::emit_i32_mul(Register dst, Register lhs, Register rhs) {
1076 liftoff::EmitCommutativeBinOp<&Assembler::imull, &Assembler::movl>(this, dst,
1077 lhs, rhs);
1078 }
1079
1080 namespace liftoff {
1081 enum class DivOrRem : uint8_t { kDiv, kRem };
1082 template <typename type, DivOrRem div_or_rem>
EmitIntDivOrRem(LiftoffAssembler * assm,Register dst,Register lhs,Register rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1083 void EmitIntDivOrRem(LiftoffAssembler* assm, Register dst, Register lhs,
1084 Register rhs, Label* trap_div_by_zero,
1085 Label* trap_div_unrepresentable) {
1086 constexpr bool needs_unrepresentable_check =
1087 std::is_signed<type>::value && div_or_rem == DivOrRem::kDiv;
1088 constexpr bool special_case_minus_1 =
1089 std::is_signed<type>::value && div_or_rem == DivOrRem::kRem;
1090 DCHECK_EQ(needs_unrepresentable_check, trap_div_unrepresentable != nullptr);
1091
1092 #define iop(name, ...) \
1093 do { \
1094 if (sizeof(type) == 4) { \
1095 assm->name##l(__VA_ARGS__); \
1096 } else { \
1097 assm->name##q(__VA_ARGS__); \
1098 } \
1099 } while (false)
1100
1101 // For division, the lhs is always taken from {edx:eax}. Thus, make sure that
1102 // these registers are unused. If {rhs} is stored in one of them, move it to
1103 // another temporary register.
1104 // Do all this before any branch, such that the code is executed
1105 // unconditionally, as the cache state will also be modified unconditionally.
1106 assm->SpillRegisters(rdx, rax);
1107 if (rhs == rax || rhs == rdx) {
1108 iop(mov, kScratchRegister, rhs);
1109 rhs = kScratchRegister;
1110 }
1111
1112 // Check for division by zero.
1113 iop(test, rhs, rhs);
1114 assm->j(zero, trap_div_by_zero);
1115
1116 Label done;
1117 if (needs_unrepresentable_check) {
1118 // Check for {kMinInt / -1}. This is unrepresentable.
1119 Label do_div;
1120 iop(cmp, rhs, Immediate(-1));
1121 assm->j(not_equal, &do_div);
1122 // {lhs} is min int if {lhs - 1} overflows.
1123 iop(cmp, lhs, Immediate(1));
1124 assm->j(overflow, trap_div_unrepresentable);
1125 assm->bind(&do_div);
1126 } else if (special_case_minus_1) {
1127 // {lhs % -1} is always 0 (needs to be special cased because {kMinInt / -1}
1128 // cannot be computed).
1129 Label do_rem;
1130 iop(cmp, rhs, Immediate(-1));
1131 assm->j(not_equal, &do_rem);
1132 // clang-format off
1133 // (conflicts with presubmit checks because it is confused about "xor")
1134 iop(xor, dst, dst);
1135 // clang-format on
1136 assm->jmp(&done);
1137 assm->bind(&do_rem);
1138 }
1139
1140 // Now move {lhs} into {eax}, then zero-extend or sign-extend into {edx}, then
1141 // do the division.
1142 if (lhs != rax) iop(mov, rax, lhs);
1143 if (std::is_same<int32_t, type>::value) { // i32
1144 assm->cdq();
1145 assm->idivl(rhs);
1146 } else if (std::is_same<uint32_t, type>::value) { // u32
1147 assm->xorl(rdx, rdx);
1148 assm->divl(rhs);
1149 } else if (std::is_same<int64_t, type>::value) { // i64
1150 assm->cqo();
1151 assm->idivq(rhs);
1152 } else { // u64
1153 assm->xorq(rdx, rdx);
1154 assm->divq(rhs);
1155 }
1156
1157 // Move back the result (in {eax} or {edx}) into the {dst} register.
1158 constexpr Register kResultReg = div_or_rem == DivOrRem::kDiv ? rax : rdx;
1159 if (dst != kResultReg) {
1160 iop(mov, dst, kResultReg);
1161 }
1162 if (special_case_minus_1) assm->bind(&done);
1163 }
1164 } // namespace liftoff
1165
emit_i32_divs(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1166 void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
1167 Label* trap_div_by_zero,
1168 Label* trap_div_unrepresentable) {
1169 liftoff::EmitIntDivOrRem<int32_t, liftoff::DivOrRem::kDiv>(
1170 this, dst, lhs, rhs, trap_div_by_zero, trap_div_unrepresentable);
1171 }
1172
emit_i32_divu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1173 void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
1174 Label* trap_div_by_zero) {
1175 liftoff::EmitIntDivOrRem<uint32_t, liftoff::DivOrRem::kDiv>(
1176 this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1177 }
1178
emit_i32_rems(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1179 void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
1180 Label* trap_div_by_zero) {
1181 liftoff::EmitIntDivOrRem<int32_t, liftoff::DivOrRem::kRem>(
1182 this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1183 }
1184
emit_i32_remu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1185 void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
1186 Label* trap_div_by_zero) {
1187 liftoff::EmitIntDivOrRem<uint32_t, liftoff::DivOrRem::kRem>(
1188 this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1189 }
1190
emit_i32_and(Register dst,Register lhs,Register rhs)1191 void LiftoffAssembler::emit_i32_and(Register dst, Register lhs, Register rhs) {
1192 liftoff::EmitCommutativeBinOp<&Assembler::andl, &Assembler::movl>(this, dst,
1193 lhs, rhs);
1194 }
1195
emit_i32_andi(Register dst,Register lhs,int32_t imm)1196 void LiftoffAssembler::emit_i32_andi(Register dst, Register lhs, int32_t imm) {
1197 liftoff::EmitCommutativeBinOpImm<&Assembler::andl, &Assembler::movl>(
1198 this, dst, lhs, imm);
1199 }
1200
emit_i32_or(Register dst,Register lhs,Register rhs)1201 void LiftoffAssembler::emit_i32_or(Register dst, Register lhs, Register rhs) {
1202 liftoff::EmitCommutativeBinOp<&Assembler::orl, &Assembler::movl>(this, dst,
1203 lhs, rhs);
1204 }
1205
emit_i32_ori(Register dst,Register lhs,int32_t imm)1206 void LiftoffAssembler::emit_i32_ori(Register dst, Register lhs, int32_t imm) {
1207 liftoff::EmitCommutativeBinOpImm<&Assembler::orl, &Assembler::movl>(this, dst,
1208 lhs, imm);
1209 }
1210
emit_i32_xor(Register dst,Register lhs,Register rhs)1211 void LiftoffAssembler::emit_i32_xor(Register dst, Register lhs, Register rhs) {
1212 liftoff::EmitCommutativeBinOp<&Assembler::xorl, &Assembler::movl>(this, dst,
1213 lhs, rhs);
1214 }
1215
emit_i32_xori(Register dst,Register lhs,int32_t imm)1216 void LiftoffAssembler::emit_i32_xori(Register dst, Register lhs, int32_t imm) {
1217 liftoff::EmitCommutativeBinOpImm<&Assembler::xorl, &Assembler::movl>(
1218 this, dst, lhs, imm);
1219 }
1220
1221 namespace liftoff {
1222 template <ValueKind kind>
EmitShiftOperation(LiftoffAssembler * assm,Register dst,Register src,Register amount,void (Assembler::* emit_shift)(Register))1223 inline void EmitShiftOperation(LiftoffAssembler* assm, Register dst,
1224 Register src, Register amount,
1225 void (Assembler::*emit_shift)(Register)) {
1226 // If dst is rcx, compute into the scratch register first, then move to rcx.
1227 if (dst == rcx) {
1228 assm->Move(kScratchRegister, src, kind);
1229 if (amount != rcx) assm->Move(rcx, amount, kind);
1230 (assm->*emit_shift)(kScratchRegister);
1231 assm->Move(rcx, kScratchRegister, kind);
1232 return;
1233 }
1234
1235 // Move amount into rcx. If rcx is in use, move its content into the scratch
1236 // register. If src is rcx, src is now the scratch register.
1237 bool use_scratch = false;
1238 if (amount != rcx) {
1239 use_scratch =
1240 src == rcx || assm->cache_state()->is_used(LiftoffRegister(rcx));
1241 if (use_scratch) assm->movq(kScratchRegister, rcx);
1242 if (src == rcx) src = kScratchRegister;
1243 assm->Move(rcx, amount, kind);
1244 }
1245
1246 // Do the actual shift.
1247 if (dst != src) assm->Move(dst, src, kind);
1248 (assm->*emit_shift)(dst);
1249
1250 // Restore rcx if needed.
1251 if (use_scratch) assm->movq(rcx, kScratchRegister);
1252 }
1253 } // namespace liftoff
1254
emit_i32_shl(Register dst,Register src,Register amount)1255 void LiftoffAssembler::emit_i32_shl(Register dst, Register src,
1256 Register amount) {
1257 liftoff::EmitShiftOperation<kI32>(this, dst, src, amount,
1258 &Assembler::shll_cl);
1259 }
1260
emit_i32_shli(Register dst,Register src,int32_t amount)1261 void LiftoffAssembler::emit_i32_shli(Register dst, Register src,
1262 int32_t amount) {
1263 if (dst != src) movl(dst, src);
1264 shll(dst, Immediate(amount & 31));
1265 }
1266
emit_i32_sar(Register dst,Register src,Register amount)1267 void LiftoffAssembler::emit_i32_sar(Register dst, Register src,
1268 Register amount) {
1269 liftoff::EmitShiftOperation<kI32>(this, dst, src, amount,
1270 &Assembler::sarl_cl);
1271 }
1272
emit_i32_sari(Register dst,Register src,int32_t amount)1273 void LiftoffAssembler::emit_i32_sari(Register dst, Register src,
1274 int32_t amount) {
1275 if (dst != src) movl(dst, src);
1276 sarl(dst, Immediate(amount & 31));
1277 }
1278
emit_i32_shr(Register dst,Register src,Register amount)1279 void LiftoffAssembler::emit_i32_shr(Register dst, Register src,
1280 Register amount) {
1281 liftoff::EmitShiftOperation<kI32>(this, dst, src, amount,
1282 &Assembler::shrl_cl);
1283 }
1284
emit_i32_shri(Register dst,Register src,int32_t amount)1285 void LiftoffAssembler::emit_i32_shri(Register dst, Register src,
1286 int32_t amount) {
1287 if (dst != src) movl(dst, src);
1288 shrl(dst, Immediate(amount & 31));
1289 }
1290
emit_i32_clz(Register dst,Register src)1291 void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
1292 Lzcntl(dst, src);
1293 }
1294
emit_i32_ctz(Register dst,Register src)1295 void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
1296 Tzcntl(dst, src);
1297 }
1298
emit_i32_popcnt(Register dst,Register src)1299 bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
1300 if (!CpuFeatures::IsSupported(POPCNT)) return false;
1301 CpuFeatureScope scope(this, POPCNT);
1302 popcntl(dst, src);
1303 return true;
1304 }
1305
emit_i64_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1306 void LiftoffAssembler::emit_i64_add(LiftoffRegister dst, LiftoffRegister lhs,
1307 LiftoffRegister rhs) {
1308 if (lhs.gp() != dst.gp()) {
1309 leaq(dst.gp(), Operand(lhs.gp(), rhs.gp(), times_1, 0));
1310 } else {
1311 addq(dst.gp(), rhs.gp());
1312 }
1313 }
1314
emit_i64_addi(LiftoffRegister dst,LiftoffRegister lhs,int64_t imm)1315 void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs,
1316 int64_t imm) {
1317 if (!is_int32(imm)) {
1318 TurboAssembler::Move(kScratchRegister, imm);
1319 if (lhs.gp() == dst.gp()) {
1320 addq(dst.gp(), kScratchRegister);
1321 } else {
1322 leaq(dst.gp(), Operand(lhs.gp(), kScratchRegister, times_1, 0));
1323 }
1324 } else if (lhs.gp() == dst.gp()) {
1325 addq(dst.gp(), Immediate(static_cast<int32_t>(imm)));
1326 } else {
1327 leaq(dst.gp(), Operand(lhs.gp(), static_cast<int32_t>(imm)));
1328 }
1329 }
1330
emit_i64_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1331 void LiftoffAssembler::emit_i64_sub(LiftoffRegister dst, LiftoffRegister lhs,
1332 LiftoffRegister rhs) {
1333 if (lhs.gp() == rhs.gp()) {
1334 xorq(dst.gp(), dst.gp());
1335 } else if (dst.gp() == rhs.gp()) {
1336 negq(dst.gp());
1337 addq(dst.gp(), lhs.gp());
1338 } else {
1339 if (dst.gp() != lhs.gp()) movq(dst.gp(), lhs.gp());
1340 subq(dst.gp(), rhs.gp());
1341 }
1342 }
1343
emit_i64_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1344 void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs,
1345 LiftoffRegister rhs) {
1346 liftoff::EmitCommutativeBinOp<&Assembler::imulq, &Assembler::movq>(
1347 this, dst.gp(), lhs.gp(), rhs.gp());
1348 }
1349
emit_i64_divs(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1350 bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
1351 LiftoffRegister rhs,
1352 Label* trap_div_by_zero,
1353 Label* trap_div_unrepresentable) {
1354 liftoff::EmitIntDivOrRem<int64_t, liftoff::DivOrRem::kDiv>(
1355 this, dst.gp(), lhs.gp(), rhs.gp(), trap_div_by_zero,
1356 trap_div_unrepresentable);
1357 return true;
1358 }
1359
emit_i64_divu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1360 bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
1361 LiftoffRegister rhs,
1362 Label* trap_div_by_zero) {
1363 liftoff::EmitIntDivOrRem<uint64_t, liftoff::DivOrRem::kDiv>(
1364 this, dst.gp(), lhs.gp(), rhs.gp(), trap_div_by_zero, nullptr);
1365 return true;
1366 }
1367
emit_i64_rems(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1368 bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
1369 LiftoffRegister rhs,
1370 Label* trap_div_by_zero) {
1371 liftoff::EmitIntDivOrRem<int64_t, liftoff::DivOrRem::kRem>(
1372 this, dst.gp(), lhs.gp(), rhs.gp(), trap_div_by_zero, nullptr);
1373 return true;
1374 }
1375
emit_i64_remu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1376 bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
1377 LiftoffRegister rhs,
1378 Label* trap_div_by_zero) {
1379 liftoff::EmitIntDivOrRem<uint64_t, liftoff::DivOrRem::kRem>(
1380 this, dst.gp(), lhs.gp(), rhs.gp(), trap_div_by_zero, nullptr);
1381 return true;
1382 }
1383
emit_i64_and(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1384 void LiftoffAssembler::emit_i64_and(LiftoffRegister dst, LiftoffRegister lhs,
1385 LiftoffRegister rhs) {
1386 liftoff::EmitCommutativeBinOp<&Assembler::andq, &Assembler::movq>(
1387 this, dst.gp(), lhs.gp(), rhs.gp());
1388 }
1389
emit_i64_andi(LiftoffRegister dst,LiftoffRegister lhs,int32_t imm)1390 void LiftoffAssembler::emit_i64_andi(LiftoffRegister dst, LiftoffRegister lhs,
1391 int32_t imm) {
1392 liftoff::EmitCommutativeBinOpImm<&Assembler::andq, &Assembler::movq>(
1393 this, dst.gp(), lhs.gp(), imm);
1394 }
1395
emit_i64_or(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1396 void LiftoffAssembler::emit_i64_or(LiftoffRegister dst, LiftoffRegister lhs,
1397 LiftoffRegister rhs) {
1398 liftoff::EmitCommutativeBinOp<&Assembler::orq, &Assembler::movq>(
1399 this, dst.gp(), lhs.gp(), rhs.gp());
1400 }
1401
emit_i64_ori(LiftoffRegister dst,LiftoffRegister lhs,int32_t imm)1402 void LiftoffAssembler::emit_i64_ori(LiftoffRegister dst, LiftoffRegister lhs,
1403 int32_t imm) {
1404 liftoff::EmitCommutativeBinOpImm<&Assembler::orq, &Assembler::movq>(
1405 this, dst.gp(), lhs.gp(), imm);
1406 }
1407
emit_i64_xor(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1408 void LiftoffAssembler::emit_i64_xor(LiftoffRegister dst, LiftoffRegister lhs,
1409 LiftoffRegister rhs) {
1410 liftoff::EmitCommutativeBinOp<&Assembler::xorq, &Assembler::movq>(
1411 this, dst.gp(), lhs.gp(), rhs.gp());
1412 }
1413
emit_i64_xori(LiftoffRegister dst,LiftoffRegister lhs,int32_t imm)1414 void LiftoffAssembler::emit_i64_xori(LiftoffRegister dst, LiftoffRegister lhs,
1415 int32_t imm) {
1416 liftoff::EmitCommutativeBinOpImm<&Assembler::xorq, &Assembler::movq>(
1417 this, dst.gp(), lhs.gp(), imm);
1418 }
1419
emit_i64_shl(LiftoffRegister dst,LiftoffRegister src,Register amount)1420 void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
1421 Register amount) {
1422 liftoff::EmitShiftOperation<kI64>(this, dst.gp(), src.gp(), amount,
1423 &Assembler::shlq_cl);
1424 }
1425
emit_i64_shli(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1426 void LiftoffAssembler::emit_i64_shli(LiftoffRegister dst, LiftoffRegister src,
1427 int32_t amount) {
1428 if (dst.gp() != src.gp()) movq(dst.gp(), src.gp());
1429 shlq(dst.gp(), Immediate(amount & 63));
1430 }
1431
emit_i64_sar(LiftoffRegister dst,LiftoffRegister src,Register amount)1432 void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
1433 Register amount) {
1434 liftoff::EmitShiftOperation<kI64>(this, dst.gp(), src.gp(), amount,
1435 &Assembler::sarq_cl);
1436 }
1437
emit_i64_sari(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1438 void LiftoffAssembler::emit_i64_sari(LiftoffRegister dst, LiftoffRegister src,
1439 int32_t amount) {
1440 if (dst.gp() != src.gp()) movq(dst.gp(), src.gp());
1441 sarq(dst.gp(), Immediate(amount & 63));
1442 }
1443
emit_i64_shr(LiftoffRegister dst,LiftoffRegister src,Register amount)1444 void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
1445 Register amount) {
1446 liftoff::EmitShiftOperation<kI64>(this, dst.gp(), src.gp(), amount,
1447 &Assembler::shrq_cl);
1448 }
1449
emit_i64_shri(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1450 void LiftoffAssembler::emit_i64_shri(LiftoffRegister dst, LiftoffRegister src,
1451 int32_t amount) {
1452 if (dst != src) movq(dst.gp(), src.gp());
1453 shrq(dst.gp(), Immediate(amount & 63));
1454 }
1455
emit_i64_clz(LiftoffRegister dst,LiftoffRegister src)1456 void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
1457 Lzcntq(dst.gp(), src.gp());
1458 }
1459
emit_i64_ctz(LiftoffRegister dst,LiftoffRegister src)1460 void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
1461 Tzcntq(dst.gp(), src.gp());
1462 }
1463
emit_i64_popcnt(LiftoffRegister dst,LiftoffRegister src)1464 bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
1465 LiftoffRegister src) {
1466 if (!CpuFeatures::IsSupported(POPCNT)) return false;
1467 CpuFeatureScope scope(this, POPCNT);
1468 popcntq(dst.gp(), src.gp());
1469 return true;
1470 }
1471
IncrementSmi(LiftoffRegister dst,int offset)1472 void LiftoffAssembler::IncrementSmi(LiftoffRegister dst, int offset) {
1473 SmiAddConstant(Operand(dst.gp(), offset), Smi::FromInt(1));
1474 }
1475
emit_u32_to_uintptr(Register dst,Register src)1476 void LiftoffAssembler::emit_u32_to_uintptr(Register dst, Register src) {
1477 movl(dst, src);
1478 }
1479
emit_f32_add(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1480 void LiftoffAssembler::emit_f32_add(DoubleRegister dst, DoubleRegister lhs,
1481 DoubleRegister rhs) {
1482 if (CpuFeatures::IsSupported(AVX)) {
1483 CpuFeatureScope scope(this, AVX);
1484 vaddss(dst, lhs, rhs);
1485 } else if (dst == rhs) {
1486 addss(dst, lhs);
1487 } else {
1488 if (dst != lhs) movss(dst, lhs);
1489 addss(dst, rhs);
1490 }
1491 }
1492
emit_f32_sub(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1493 void LiftoffAssembler::emit_f32_sub(DoubleRegister dst, DoubleRegister lhs,
1494 DoubleRegister rhs) {
1495 if (CpuFeatures::IsSupported(AVX)) {
1496 CpuFeatureScope scope(this, AVX);
1497 vsubss(dst, lhs, rhs);
1498 } else if (dst == rhs) {
1499 movss(kScratchDoubleReg, rhs);
1500 movss(dst, lhs);
1501 subss(dst, kScratchDoubleReg);
1502 } else {
1503 if (dst != lhs) movss(dst, lhs);
1504 subss(dst, rhs);
1505 }
1506 }
1507
emit_f32_mul(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1508 void LiftoffAssembler::emit_f32_mul(DoubleRegister dst, DoubleRegister lhs,
1509 DoubleRegister rhs) {
1510 if (CpuFeatures::IsSupported(AVX)) {
1511 CpuFeatureScope scope(this, AVX);
1512 vmulss(dst, lhs, rhs);
1513 } else if (dst == rhs) {
1514 mulss(dst, lhs);
1515 } else {
1516 if (dst != lhs) movss(dst, lhs);
1517 mulss(dst, rhs);
1518 }
1519 }
1520
emit_f32_div(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1521 void LiftoffAssembler::emit_f32_div(DoubleRegister dst, DoubleRegister lhs,
1522 DoubleRegister rhs) {
1523 if (CpuFeatures::IsSupported(AVX)) {
1524 CpuFeatureScope scope(this, AVX);
1525 vdivss(dst, lhs, rhs);
1526 } else if (dst == rhs) {
1527 movss(kScratchDoubleReg, rhs);
1528 movss(dst, lhs);
1529 divss(dst, kScratchDoubleReg);
1530 } else {
1531 if (dst != lhs) movss(dst, lhs);
1532 divss(dst, rhs);
1533 }
1534 }
1535
1536 namespace liftoff {
1537 enum class MinOrMax : uint8_t { kMin, kMax };
1538 template <typename type>
EmitFloatMinOrMax(LiftoffAssembler * assm,DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs,MinOrMax min_or_max)1539 inline void EmitFloatMinOrMax(LiftoffAssembler* assm, DoubleRegister dst,
1540 DoubleRegister lhs, DoubleRegister rhs,
1541 MinOrMax min_or_max) {
1542 Label is_nan;
1543 Label lhs_below_rhs;
1544 Label lhs_above_rhs;
1545 Label done;
1546
1547 #define dop(name, ...) \
1548 do { \
1549 if (sizeof(type) == 4) { \
1550 assm->name##s(__VA_ARGS__); \
1551 } else { \
1552 assm->name##d(__VA_ARGS__); \
1553 } \
1554 } while (false)
1555
1556 // Check the easy cases first: nan (e.g. unordered), smaller and greater.
1557 // NaN has to be checked first, because PF=1 implies CF=1.
1558 dop(Ucomis, lhs, rhs);
1559 assm->j(parity_even, &is_nan, Label::kNear); // PF=1
1560 assm->j(below, &lhs_below_rhs, Label::kNear); // CF=1
1561 assm->j(above, &lhs_above_rhs, Label::kNear); // CF=0 && ZF=0
1562
1563 // If we get here, then either
1564 // a) {lhs == rhs},
1565 // b) {lhs == -0.0} and {rhs == 0.0}, or
1566 // c) {lhs == 0.0} and {rhs == -0.0}.
1567 // For a), it does not matter whether we return {lhs} or {rhs}. Check the sign
1568 // bit of {rhs} to differentiate b) and c).
1569 dop(Movmskp, kScratchRegister, rhs);
1570 assm->testl(kScratchRegister, Immediate(1));
1571 assm->j(zero, &lhs_below_rhs, Label::kNear);
1572 assm->jmp(&lhs_above_rhs, Label::kNear);
1573
1574 assm->bind(&is_nan);
1575 // Create a NaN output.
1576 dop(Xorp, dst, dst);
1577 dop(Divs, dst, dst);
1578 assm->jmp(&done, Label::kNear);
1579
1580 assm->bind(&lhs_below_rhs);
1581 DoubleRegister lhs_below_rhs_src = min_or_max == MinOrMax::kMin ? lhs : rhs;
1582 if (dst != lhs_below_rhs_src) dop(Movs, dst, lhs_below_rhs_src);
1583 assm->jmp(&done, Label::kNear);
1584
1585 assm->bind(&lhs_above_rhs);
1586 DoubleRegister lhs_above_rhs_src = min_or_max == MinOrMax::kMin ? rhs : lhs;
1587 if (dst != lhs_above_rhs_src) dop(Movs, dst, lhs_above_rhs_src);
1588
1589 assm->bind(&done);
1590 }
1591 } // namespace liftoff
1592
emit_f32_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1593 void LiftoffAssembler::emit_f32_min(DoubleRegister dst, DoubleRegister lhs,
1594 DoubleRegister rhs) {
1595 liftoff::EmitFloatMinOrMax<float>(this, dst, lhs, rhs,
1596 liftoff::MinOrMax::kMin);
1597 }
1598
emit_f32_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1599 void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs,
1600 DoubleRegister rhs) {
1601 liftoff::EmitFloatMinOrMax<float>(this, dst, lhs, rhs,
1602 liftoff::MinOrMax::kMax);
1603 }
1604
emit_f32_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1605 void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
1606 DoubleRegister rhs) {
1607 static constexpr int kF32SignBit = 1 << 31;
1608 Movd(kScratchRegister, lhs);
1609 andl(kScratchRegister, Immediate(~kF32SignBit));
1610 Movd(liftoff::kScratchRegister2, rhs);
1611 andl(liftoff::kScratchRegister2, Immediate(kF32SignBit));
1612 orl(kScratchRegister, liftoff::kScratchRegister2);
1613 Movd(dst, kScratchRegister);
1614 }
1615
emit_f32_abs(DoubleRegister dst,DoubleRegister src)1616 void LiftoffAssembler::emit_f32_abs(DoubleRegister dst, DoubleRegister src) {
1617 static constexpr uint32_t kSignBit = uint32_t{1} << 31;
1618 if (dst == src) {
1619 TurboAssembler::Move(kScratchDoubleReg, kSignBit - 1);
1620 Andps(dst, kScratchDoubleReg);
1621 } else {
1622 TurboAssembler::Move(dst, kSignBit - 1);
1623 Andps(dst, src);
1624 }
1625 }
1626
emit_f32_neg(DoubleRegister dst,DoubleRegister src)1627 void LiftoffAssembler::emit_f32_neg(DoubleRegister dst, DoubleRegister src) {
1628 static constexpr uint32_t kSignBit = uint32_t{1} << 31;
1629 if (dst == src) {
1630 TurboAssembler::Move(kScratchDoubleReg, kSignBit);
1631 Xorps(dst, kScratchDoubleReg);
1632 } else {
1633 TurboAssembler::Move(dst, kSignBit);
1634 Xorps(dst, src);
1635 }
1636 }
1637
emit_f32_ceil(DoubleRegister dst,DoubleRegister src)1638 bool LiftoffAssembler::emit_f32_ceil(DoubleRegister dst, DoubleRegister src) {
1639 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1640 Roundss(dst, src, kRoundUp);
1641 return true;
1642 }
1643
emit_f32_floor(DoubleRegister dst,DoubleRegister src)1644 bool LiftoffAssembler::emit_f32_floor(DoubleRegister dst, DoubleRegister src) {
1645 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1646 Roundss(dst, src, kRoundDown);
1647 return true;
1648 }
1649
emit_f32_trunc(DoubleRegister dst,DoubleRegister src)1650 bool LiftoffAssembler::emit_f32_trunc(DoubleRegister dst, DoubleRegister src) {
1651 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1652 Roundss(dst, src, kRoundToZero);
1653 return true;
1654 }
1655
emit_f32_nearest_int(DoubleRegister dst,DoubleRegister src)1656 bool LiftoffAssembler::emit_f32_nearest_int(DoubleRegister dst,
1657 DoubleRegister src) {
1658 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1659 Roundss(dst, src, kRoundToNearest);
1660 return true;
1661 }
1662
emit_f32_sqrt(DoubleRegister dst,DoubleRegister src)1663 void LiftoffAssembler::emit_f32_sqrt(DoubleRegister dst, DoubleRegister src) {
1664 Sqrtss(dst, src);
1665 }
1666
emit_f64_add(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1667 void LiftoffAssembler::emit_f64_add(DoubleRegister dst, DoubleRegister lhs,
1668 DoubleRegister rhs) {
1669 if (CpuFeatures::IsSupported(AVX)) {
1670 CpuFeatureScope scope(this, AVX);
1671 vaddsd(dst, lhs, rhs);
1672 } else if (dst == rhs) {
1673 addsd(dst, lhs);
1674 } else {
1675 if (dst != lhs) movsd(dst, lhs);
1676 addsd(dst, rhs);
1677 }
1678 }
1679
emit_f64_sub(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1680 void LiftoffAssembler::emit_f64_sub(DoubleRegister dst, DoubleRegister lhs,
1681 DoubleRegister rhs) {
1682 if (CpuFeatures::IsSupported(AVX)) {
1683 CpuFeatureScope scope(this, AVX);
1684 vsubsd(dst, lhs, rhs);
1685 } else if (dst == rhs) {
1686 movsd(kScratchDoubleReg, rhs);
1687 movsd(dst, lhs);
1688 subsd(dst, kScratchDoubleReg);
1689 } else {
1690 if (dst != lhs) movsd(dst, lhs);
1691 subsd(dst, rhs);
1692 }
1693 }
1694
emit_f64_mul(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1695 void LiftoffAssembler::emit_f64_mul(DoubleRegister dst, DoubleRegister lhs,
1696 DoubleRegister rhs) {
1697 if (CpuFeatures::IsSupported(AVX)) {
1698 CpuFeatureScope scope(this, AVX);
1699 vmulsd(dst, lhs, rhs);
1700 } else if (dst == rhs) {
1701 mulsd(dst, lhs);
1702 } else {
1703 if (dst != lhs) movsd(dst, lhs);
1704 mulsd(dst, rhs);
1705 }
1706 }
1707
emit_f64_div(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1708 void LiftoffAssembler::emit_f64_div(DoubleRegister dst, DoubleRegister lhs,
1709 DoubleRegister rhs) {
1710 if (CpuFeatures::IsSupported(AVX)) {
1711 CpuFeatureScope scope(this, AVX);
1712 vdivsd(dst, lhs, rhs);
1713 } else if (dst == rhs) {
1714 movsd(kScratchDoubleReg, rhs);
1715 movsd(dst, lhs);
1716 divsd(dst, kScratchDoubleReg);
1717 } else {
1718 if (dst != lhs) movsd(dst, lhs);
1719 divsd(dst, rhs);
1720 }
1721 }
1722
emit_f64_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1723 void LiftoffAssembler::emit_f64_min(DoubleRegister dst, DoubleRegister lhs,
1724 DoubleRegister rhs) {
1725 liftoff::EmitFloatMinOrMax<double>(this, dst, lhs, rhs,
1726 liftoff::MinOrMax::kMin);
1727 }
1728
emit_f64_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1729 void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
1730 DoubleRegister rhs) {
1731 // Extract sign bit from {rhs} into {kScratchRegister2}.
1732 Movq(liftoff::kScratchRegister2, rhs);
1733 shrq(liftoff::kScratchRegister2, Immediate(63));
1734 shlq(liftoff::kScratchRegister2, Immediate(63));
1735 // Reset sign bit of {lhs} (in {kScratchRegister}).
1736 Movq(kScratchRegister, lhs);
1737 btrq(kScratchRegister, Immediate(63));
1738 // Combine both values into {kScratchRegister} and move into {dst}.
1739 orq(kScratchRegister, liftoff::kScratchRegister2);
1740 Movq(dst, kScratchRegister);
1741 }
1742
emit_f64_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1743 void LiftoffAssembler::emit_f64_max(DoubleRegister dst, DoubleRegister lhs,
1744 DoubleRegister rhs) {
1745 liftoff::EmitFloatMinOrMax<double>(this, dst, lhs, rhs,
1746 liftoff::MinOrMax::kMax);
1747 }
1748
emit_f64_abs(DoubleRegister dst,DoubleRegister src)1749 void LiftoffAssembler::emit_f64_abs(DoubleRegister dst, DoubleRegister src) {
1750 static constexpr uint64_t kSignBit = uint64_t{1} << 63;
1751 if (dst == src) {
1752 TurboAssembler::Move(kScratchDoubleReg, kSignBit - 1);
1753 Andpd(dst, kScratchDoubleReg);
1754 } else {
1755 TurboAssembler::Move(dst, kSignBit - 1);
1756 Andpd(dst, src);
1757 }
1758 }
1759
emit_f64_neg(DoubleRegister dst,DoubleRegister src)1760 void LiftoffAssembler::emit_f64_neg(DoubleRegister dst, DoubleRegister src) {
1761 static constexpr uint64_t kSignBit = uint64_t{1} << 63;
1762 if (dst == src) {
1763 TurboAssembler::Move(kScratchDoubleReg, kSignBit);
1764 Xorpd(dst, kScratchDoubleReg);
1765 } else {
1766 TurboAssembler::Move(dst, kSignBit);
1767 Xorpd(dst, src);
1768 }
1769 }
1770
emit_f64_ceil(DoubleRegister dst,DoubleRegister src)1771 bool LiftoffAssembler::emit_f64_ceil(DoubleRegister dst, DoubleRegister src) {
1772 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1773 Roundsd(dst, src, kRoundUp);
1774 return true;
1775 }
1776
emit_f64_floor(DoubleRegister dst,DoubleRegister src)1777 bool LiftoffAssembler::emit_f64_floor(DoubleRegister dst, DoubleRegister src) {
1778 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1779 Roundsd(dst, src, kRoundDown);
1780 return true;
1781 }
1782
emit_f64_trunc(DoubleRegister dst,DoubleRegister src)1783 bool LiftoffAssembler::emit_f64_trunc(DoubleRegister dst, DoubleRegister src) {
1784 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1785 Roundsd(dst, src, kRoundToZero);
1786 return true;
1787 }
1788
emit_f64_nearest_int(DoubleRegister dst,DoubleRegister src)1789 bool LiftoffAssembler::emit_f64_nearest_int(DoubleRegister dst,
1790 DoubleRegister src) {
1791 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1792 Roundsd(dst, src, kRoundToNearest);
1793 return true;
1794 }
1795
emit_f64_sqrt(DoubleRegister dst,DoubleRegister src)1796 void LiftoffAssembler::emit_f64_sqrt(DoubleRegister dst, DoubleRegister src) {
1797 Sqrtsd(dst, src);
1798 }
1799
1800 namespace liftoff {
1801 #define __ assm->
1802 // Used for float to int conversions. If the value in {converted_back} equals
1803 // {src} afterwards, the conversion succeeded.
1804 template <typename dst_type, typename src_type>
ConvertFloatToIntAndBack(LiftoffAssembler * assm,Register dst,DoubleRegister src,DoubleRegister converted_back)1805 inline void ConvertFloatToIntAndBack(LiftoffAssembler* assm, Register dst,
1806 DoubleRegister src,
1807 DoubleRegister converted_back) {
1808 if (std::is_same<double, src_type>::value) { // f64
1809 if (std::is_same<int32_t, dst_type>::value) { // f64 -> i32
1810 __ Cvttsd2si(dst, src);
1811 __ Cvtlsi2sd(converted_back, dst);
1812 } else if (std::is_same<uint32_t, dst_type>::value) { // f64 -> u32
1813 __ Cvttsd2siq(dst, src);
1814 __ movl(dst, dst);
1815 __ Cvtqsi2sd(converted_back, dst);
1816 } else if (std::is_same<int64_t, dst_type>::value) { // f64 -> i64
1817 __ Cvttsd2siq(dst, src);
1818 __ Cvtqsi2sd(converted_back, dst);
1819 } else {
1820 UNREACHABLE();
1821 }
1822 } else { // f32
1823 if (std::is_same<int32_t, dst_type>::value) { // f32 -> i32
1824 __ Cvttss2si(dst, src);
1825 __ Cvtlsi2ss(converted_back, dst);
1826 } else if (std::is_same<uint32_t, dst_type>::value) { // f32 -> u32
1827 __ Cvttss2siq(dst, src);
1828 __ movl(dst, dst);
1829 __ Cvtqsi2ss(converted_back, dst);
1830 } else if (std::is_same<int64_t, dst_type>::value) { // f32 -> i64
1831 __ Cvttss2siq(dst, src);
1832 __ Cvtqsi2ss(converted_back, dst);
1833 } else {
1834 UNREACHABLE();
1835 }
1836 }
1837 }
1838
1839 template <typename dst_type, typename src_type>
EmitTruncateFloatToInt(LiftoffAssembler * assm,Register dst,DoubleRegister src,Label * trap)1840 inline bool EmitTruncateFloatToInt(LiftoffAssembler* assm, Register dst,
1841 DoubleRegister src, Label* trap) {
1842 if (!CpuFeatures::IsSupported(SSE4_1)) {
1843 __ bailout(kMissingCPUFeature, "no SSE4.1");
1844 return true;
1845 }
1846 CpuFeatureScope feature(assm, SSE4_1);
1847
1848 DoubleRegister rounded = kScratchDoubleReg;
1849 DoubleRegister converted_back = kScratchDoubleReg2;
1850
1851 if (std::is_same<double, src_type>::value) { // f64
1852 __ Roundsd(rounded, src, kRoundToZero);
1853 } else { // f32
1854 __ Roundss(rounded, src, kRoundToZero);
1855 }
1856 ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
1857 converted_back);
1858 if (std::is_same<double, src_type>::value) { // f64
1859 __ Ucomisd(converted_back, rounded);
1860 } else { // f32
1861 __ Ucomiss(converted_back, rounded);
1862 }
1863
1864 // Jump to trap if PF is 0 (one of the operands was NaN) or they are not
1865 // equal.
1866 __ j(parity_even, trap);
1867 __ j(not_equal, trap);
1868 return true;
1869 }
1870
1871 template <typename dst_type, typename src_type>
EmitSatTruncateFloatToInt(LiftoffAssembler * assm,Register dst,DoubleRegister src)1872 inline bool EmitSatTruncateFloatToInt(LiftoffAssembler* assm, Register dst,
1873 DoubleRegister src) {
1874 if (!CpuFeatures::IsSupported(SSE4_1)) {
1875 __ bailout(kMissingCPUFeature, "no SSE4.1");
1876 return true;
1877 }
1878 CpuFeatureScope feature(assm, SSE4_1);
1879
1880 Label done;
1881 Label not_nan;
1882 Label src_positive;
1883
1884 DoubleRegister rounded = kScratchDoubleReg;
1885 DoubleRegister converted_back = kScratchDoubleReg2;
1886 DoubleRegister zero_reg = kScratchDoubleReg;
1887
1888 if (std::is_same<double, src_type>::value) { // f64
1889 __ Roundsd(rounded, src, kRoundToZero);
1890 } else { // f32
1891 __ Roundss(rounded, src, kRoundToZero);
1892 }
1893
1894 ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
1895 converted_back);
1896 if (std::is_same<double, src_type>::value) { // f64
1897 __ Ucomisd(converted_back, rounded);
1898 } else { // f32
1899 __ Ucomiss(converted_back, rounded);
1900 }
1901
1902 // Return 0 if PF is 0 (one of the operands was NaN)
1903 __ j(parity_odd, ¬_nan);
1904 __ xorl(dst, dst);
1905 __ jmp(&done);
1906
1907 __ bind(¬_nan);
1908 // If rounding is as expected, return result
1909 __ j(equal, &done);
1910
1911 __ xorpd(zero_reg, zero_reg);
1912
1913 // if out-of-bounds, check if src is positive
1914 if (std::is_same<double, src_type>::value) { // f64
1915 __ Ucomisd(src, zero_reg);
1916 } else { // f32
1917 __ Ucomiss(src, zero_reg);
1918 }
1919 __ j(above, &src_positive);
1920 if (std::is_same<int32_t, dst_type>::value ||
1921 std::is_same<uint32_t, dst_type>::value) { // i32
1922 __ movl(
1923 dst,
1924 Immediate(static_cast<int32_t>(std::numeric_limits<dst_type>::min())));
1925 } else if (std::is_same<int64_t, dst_type>::value) { // i64s
1926 __ movq(dst, Immediate64(std::numeric_limits<dst_type>::min()));
1927 } else {
1928 UNREACHABLE();
1929 }
1930 __ jmp(&done);
1931
1932 __ bind(&src_positive);
1933 if (std::is_same<int32_t, dst_type>::value ||
1934 std::is_same<uint32_t, dst_type>::value) { // i32
1935 __ movl(
1936 dst,
1937 Immediate(static_cast<int32_t>(std::numeric_limits<dst_type>::max())));
1938 } else if (std::is_same<int64_t, dst_type>::value) { // i64s
1939 __ movq(dst, Immediate64(std::numeric_limits<dst_type>::max()));
1940 } else {
1941 UNREACHABLE();
1942 }
1943
1944 __ bind(&done);
1945 return true;
1946 }
1947
1948 template <typename src_type>
EmitSatTruncateFloatToUInt64(LiftoffAssembler * assm,Register dst,DoubleRegister src)1949 inline bool EmitSatTruncateFloatToUInt64(LiftoffAssembler* assm, Register dst,
1950 DoubleRegister src) {
1951 if (!CpuFeatures::IsSupported(SSE4_1)) {
1952 __ bailout(kMissingCPUFeature, "no SSE4.1");
1953 return true;
1954 }
1955 CpuFeatureScope feature(assm, SSE4_1);
1956
1957 Label done;
1958 Label neg_or_nan;
1959 Label overflow;
1960
1961 DoubleRegister zero_reg = kScratchDoubleReg;
1962
1963 __ xorpd(zero_reg, zero_reg);
1964 if (std::is_same<double, src_type>::value) { // f64
1965 __ Ucomisd(src, zero_reg);
1966 } else { // f32
1967 __ Ucomiss(src, zero_reg);
1968 }
1969 // Check if NaN
1970 __ j(parity_even, &neg_or_nan);
1971 __ j(below, &neg_or_nan);
1972 if (std::is_same<double, src_type>::value) { // f64
1973 __ Cvttsd2uiq(dst, src, &overflow);
1974 } else { // f32
1975 __ Cvttss2uiq(dst, src, &overflow);
1976 }
1977 __ jmp(&done);
1978
1979 __ bind(&neg_or_nan);
1980 __ movq(dst, zero_reg);
1981 __ jmp(&done);
1982
1983 __ bind(&overflow);
1984 __ movq(dst, Immediate64(std::numeric_limits<uint64_t>::max()));
1985 __ bind(&done);
1986 return true;
1987 }
1988 #undef __
1989 } // namespace liftoff
1990
emit_type_conversion(WasmOpcode opcode,LiftoffRegister dst,LiftoffRegister src,Label * trap)1991 bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
1992 LiftoffRegister dst,
1993 LiftoffRegister src, Label* trap) {
1994 switch (opcode) {
1995 case kExprI32ConvertI64:
1996 movl(dst.gp(), src.gp());
1997 return true;
1998 case kExprI32SConvertF32:
1999 return liftoff::EmitTruncateFloatToInt<int32_t, float>(this, dst.gp(),
2000 src.fp(), trap);
2001 case kExprI32UConvertF32:
2002 return liftoff::EmitTruncateFloatToInt<uint32_t, float>(this, dst.gp(),
2003 src.fp(), trap);
2004 case kExprI32SConvertF64:
2005 return liftoff::EmitTruncateFloatToInt<int32_t, double>(this, dst.gp(),
2006 src.fp(), trap);
2007 case kExprI32UConvertF64:
2008 return liftoff::EmitTruncateFloatToInt<uint32_t, double>(this, dst.gp(),
2009 src.fp(), trap);
2010 case kExprI32SConvertSatF32:
2011 return liftoff::EmitSatTruncateFloatToInt<int32_t, float>(this, dst.gp(),
2012 src.fp());
2013 case kExprI32UConvertSatF32:
2014 return liftoff::EmitSatTruncateFloatToInt<uint32_t, float>(this, dst.gp(),
2015 src.fp());
2016 case kExprI32SConvertSatF64:
2017 return liftoff::EmitSatTruncateFloatToInt<int32_t, double>(this, dst.gp(),
2018 src.fp());
2019 case kExprI32UConvertSatF64:
2020 return liftoff::EmitSatTruncateFloatToInt<uint32_t, double>(
2021 this, dst.gp(), src.fp());
2022 case kExprI32ReinterpretF32:
2023 Movd(dst.gp(), src.fp());
2024 return true;
2025 case kExprI64SConvertI32:
2026 movsxlq(dst.gp(), src.gp());
2027 return true;
2028 case kExprI64SConvertF32:
2029 return liftoff::EmitTruncateFloatToInt<int64_t, float>(this, dst.gp(),
2030 src.fp(), trap);
2031 case kExprI64UConvertF32: {
2032 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2033 Cvttss2uiq(dst.gp(), src.fp(), trap);
2034 return true;
2035 }
2036 case kExprI64SConvertF64:
2037 return liftoff::EmitTruncateFloatToInt<int64_t, double>(this, dst.gp(),
2038 src.fp(), trap);
2039 case kExprI64UConvertF64: {
2040 RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2041 Cvttsd2uiq(dst.gp(), src.fp(), trap);
2042 return true;
2043 }
2044 case kExprI64SConvertSatF32:
2045 return liftoff::EmitSatTruncateFloatToInt<int64_t, float>(this, dst.gp(),
2046 src.fp());
2047 case kExprI64UConvertSatF32: {
2048 return liftoff::EmitSatTruncateFloatToUInt64<float>(this, dst.gp(),
2049 src.fp());
2050 }
2051 case kExprI64SConvertSatF64:
2052 return liftoff::EmitSatTruncateFloatToInt<int64_t, double>(this, dst.gp(),
2053 src.fp());
2054 case kExprI64UConvertSatF64: {
2055 return liftoff::EmitSatTruncateFloatToUInt64<double>(this, dst.gp(),
2056 src.fp());
2057 }
2058 case kExprI64UConvertI32:
2059 AssertZeroExtended(src.gp());
2060 if (dst.gp() != src.gp()) movl(dst.gp(), src.gp());
2061 return true;
2062 case kExprI64ReinterpretF64:
2063 Movq(dst.gp(), src.fp());
2064 return true;
2065 case kExprF32SConvertI32:
2066 Cvtlsi2ss(dst.fp(), src.gp());
2067 return true;
2068 case kExprF32UConvertI32:
2069 movl(kScratchRegister, src.gp());
2070 Cvtqsi2ss(dst.fp(), kScratchRegister);
2071 return true;
2072 case kExprF32SConvertI64:
2073 Cvtqsi2ss(dst.fp(), src.gp());
2074 return true;
2075 case kExprF32UConvertI64:
2076 Cvtqui2ss(dst.fp(), src.gp());
2077 return true;
2078 case kExprF32ConvertF64:
2079 Cvtsd2ss(dst.fp(), src.fp());
2080 return true;
2081 case kExprF32ReinterpretI32:
2082 Movd(dst.fp(), src.gp());
2083 return true;
2084 case kExprF64SConvertI32:
2085 Cvtlsi2sd(dst.fp(), src.gp());
2086 return true;
2087 case kExprF64UConvertI32:
2088 movl(kScratchRegister, src.gp());
2089 Cvtqsi2sd(dst.fp(), kScratchRegister);
2090 return true;
2091 case kExprF64SConvertI64:
2092 Cvtqsi2sd(dst.fp(), src.gp());
2093 return true;
2094 case kExprF64UConvertI64:
2095 Cvtqui2sd(dst.fp(), src.gp());
2096 return true;
2097 case kExprF64ConvertF32:
2098 Cvtss2sd(dst.fp(), src.fp());
2099 return true;
2100 case kExprF64ReinterpretI64:
2101 Movq(dst.fp(), src.gp());
2102 return true;
2103 default:
2104 UNREACHABLE();
2105 }
2106 }
2107
emit_i32_signextend_i8(Register dst,Register src)2108 void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
2109 movsxbl(dst, src);
2110 }
2111
emit_i32_signextend_i16(Register dst,Register src)2112 void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
2113 movsxwl(dst, src);
2114 }
2115
emit_i64_signextend_i8(LiftoffRegister dst,LiftoffRegister src)2116 void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
2117 LiftoffRegister src) {
2118 movsxbq(dst.gp(), src.gp());
2119 }
2120
emit_i64_signextend_i16(LiftoffRegister dst,LiftoffRegister src)2121 void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
2122 LiftoffRegister src) {
2123 movsxwq(dst.gp(), src.gp());
2124 }
2125
emit_i64_signextend_i32(LiftoffRegister dst,LiftoffRegister src)2126 void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
2127 LiftoffRegister src) {
2128 movsxlq(dst.gp(), src.gp());
2129 }
2130
emit_jump(Label * label)2131 void LiftoffAssembler::emit_jump(Label* label) { jmp(label); }
2132
emit_jump(Register target)2133 void LiftoffAssembler::emit_jump(Register target) { jmp(target); }
2134
emit_cond_jump(LiftoffCondition liftoff_cond,Label * label,ValueKind kind,Register lhs,Register rhs)2135 void LiftoffAssembler::emit_cond_jump(LiftoffCondition liftoff_cond,
2136 Label* label, ValueKind kind,
2137 Register lhs, Register rhs) {
2138 Condition cond = liftoff::ToCondition(liftoff_cond);
2139 if (rhs != no_reg) {
2140 switch (kind) {
2141 case kI32:
2142 cmpl(lhs, rhs);
2143 break;
2144 case kRef:
2145 case kOptRef:
2146 case kRtt:
2147 DCHECK(liftoff_cond == kEqual || liftoff_cond == kUnequal);
2148 V8_FALLTHROUGH;
2149 case kI64:
2150 cmpq(lhs, rhs);
2151 break;
2152 default:
2153 UNREACHABLE();
2154 }
2155 } else {
2156 DCHECK_EQ(kind, kI32);
2157 testl(lhs, lhs);
2158 }
2159
2160 j(cond, label);
2161 }
2162
emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,Label * label,Register lhs,int imm)2163 void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,
2164 Label* label, Register lhs,
2165 int imm) {
2166 Condition cond = liftoff::ToCondition(liftoff_cond);
2167 cmpl(lhs, Immediate(imm));
2168 j(cond, label);
2169 }
2170
emit_i32_subi_jump_negative(Register value,int subtrahend,Label * result_negative)2171 void LiftoffAssembler::emit_i32_subi_jump_negative(Register value,
2172 int subtrahend,
2173 Label* result_negative) {
2174 subl(value, Immediate(subtrahend));
2175 j(negative, result_negative);
2176 }
2177
emit_i32_eqz(Register dst,Register src)2178 void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
2179 testl(src, src);
2180 setcc(equal, dst);
2181 movzxbl(dst, dst);
2182 }
2183
emit_i32_set_cond(LiftoffCondition liftoff_cond,Register dst,Register lhs,Register rhs)2184 void LiftoffAssembler::emit_i32_set_cond(LiftoffCondition liftoff_cond,
2185 Register dst, Register lhs,
2186 Register rhs) {
2187 Condition cond = liftoff::ToCondition(liftoff_cond);
2188 cmpl(lhs, rhs);
2189 setcc(cond, dst);
2190 movzxbl(dst, dst);
2191 }
2192
emit_i64_eqz(Register dst,LiftoffRegister src)2193 void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
2194 testq(src.gp(), src.gp());
2195 setcc(equal, dst);
2196 movzxbl(dst, dst);
2197 }
2198
emit_i64_set_cond(LiftoffCondition liftoff_cond,Register dst,LiftoffRegister lhs,LiftoffRegister rhs)2199 void LiftoffAssembler::emit_i64_set_cond(LiftoffCondition liftoff_cond,
2200 Register dst, LiftoffRegister lhs,
2201 LiftoffRegister rhs) {
2202 Condition cond = liftoff::ToCondition(liftoff_cond);
2203 cmpq(lhs.gp(), rhs.gp());
2204 setcc(cond, dst);
2205 movzxbl(dst, dst);
2206 }
2207
2208 namespace liftoff {
2209 template <void (SharedTurboAssembler::*cmp_op)(DoubleRegister, DoubleRegister)>
EmitFloatSetCond(LiftoffAssembler * assm,Condition cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2210 void EmitFloatSetCond(LiftoffAssembler* assm, Condition cond, Register dst,
2211 DoubleRegister lhs, DoubleRegister rhs) {
2212 Label cont;
2213 Label not_nan;
2214
2215 (assm->*cmp_op)(lhs, rhs);
2216 // If PF is one, one of the operands was NaN. This needs special handling.
2217 assm->j(parity_odd, ¬_nan, Label::kNear);
2218 // Return 1 for f32.ne, 0 for all other cases.
2219 if (cond == not_equal) {
2220 assm->movl(dst, Immediate(1));
2221 } else {
2222 assm->xorl(dst, dst);
2223 }
2224 assm->jmp(&cont, Label::kNear);
2225 assm->bind(¬_nan);
2226
2227 assm->setcc(cond, dst);
2228 assm->movzxbl(dst, dst);
2229 assm->bind(&cont);
2230 }
2231 } // namespace liftoff
2232
emit_f32_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2233 void LiftoffAssembler::emit_f32_set_cond(LiftoffCondition liftoff_cond,
2234 Register dst, DoubleRegister lhs,
2235 DoubleRegister rhs) {
2236 Condition cond = liftoff::ToCondition(liftoff_cond);
2237 liftoff::EmitFloatSetCond<&TurboAssembler::Ucomiss>(this, cond, dst, lhs,
2238 rhs);
2239 }
2240
emit_f64_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2241 void LiftoffAssembler::emit_f64_set_cond(LiftoffCondition liftoff_cond,
2242 Register dst, DoubleRegister lhs,
2243 DoubleRegister rhs) {
2244 Condition cond = liftoff::ToCondition(liftoff_cond);
2245 liftoff::EmitFloatSetCond<&TurboAssembler::Ucomisd>(this, cond, dst, lhs,
2246 rhs);
2247 }
2248
emit_select(LiftoffRegister dst,Register condition,LiftoffRegister true_value,LiftoffRegister false_value,ValueKind kind)2249 bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
2250 LiftoffRegister true_value,
2251 LiftoffRegister false_value,
2252 ValueKind kind) {
2253 if (kind != kI32 && kind != kI64) return false;
2254
2255 testl(condition, condition);
2256
2257 if (kind == kI32) {
2258 if (dst == false_value) {
2259 cmovl(not_zero, dst.gp(), true_value.gp());
2260 } else {
2261 if (dst != true_value) movl(dst.gp(), true_value.gp());
2262 cmovl(zero, dst.gp(), false_value.gp());
2263 }
2264 } else {
2265 if (dst == false_value) {
2266 cmovq(not_zero, dst.gp(), true_value.gp());
2267 } else {
2268 if (dst != true_value) movq(dst.gp(), true_value.gp());
2269 cmovq(zero, dst.gp(), false_value.gp());
2270 }
2271 }
2272
2273 return true;
2274 }
2275
emit_smi_check(Register obj,Label * target,SmiCheckMode mode)2276 void LiftoffAssembler::emit_smi_check(Register obj, Label* target,
2277 SmiCheckMode mode) {
2278 testb(obj, Immediate(kSmiTagMask));
2279 Condition condition = mode == kJumpOnSmi ? zero : not_zero;
2280 j(condition, target);
2281 }
2282
2283 // TODO(fanchenk): Distinguish mov* if data bypass delay matter.
2284 namespace liftoff {
2285 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2286 void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
2287 void EmitSimdCommutativeBinOp(
2288 LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs,
2289 LiftoffRegister rhs, base::Optional<CpuFeature> feature = base::nullopt) {
2290 if (CpuFeatures::IsSupported(AVX)) {
2291 CpuFeatureScope scope(assm, AVX);
2292 (assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
2293 return;
2294 }
2295
2296 base::Optional<CpuFeatureScope> sse_scope;
2297 if (feature.has_value()) sse_scope.emplace(assm, *feature);
2298
2299 if (dst.fp() == rhs.fp()) {
2300 (assm->*sse_op)(dst.fp(), lhs.fp());
2301 } else {
2302 if (dst.fp() != lhs.fp()) (assm->movaps)(dst.fp(), lhs.fp());
2303 (assm->*sse_op)(dst.fp(), rhs.fp());
2304 }
2305 }
2306
2307 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2308 void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
2309 void EmitSimdNonCommutativeBinOp(
2310 LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs,
2311 LiftoffRegister rhs, base::Optional<CpuFeature> feature = base::nullopt) {
2312 if (CpuFeatures::IsSupported(AVX)) {
2313 CpuFeatureScope scope(assm, AVX);
2314 (assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
2315 return;
2316 }
2317
2318 base::Optional<CpuFeatureScope> sse_scope;
2319 if (feature.has_value()) sse_scope.emplace(assm, *feature);
2320
2321 if (dst.fp() == rhs.fp()) {
2322 assm->movaps(kScratchDoubleReg, rhs.fp());
2323 assm->movaps(dst.fp(), lhs.fp());
2324 (assm->*sse_op)(dst.fp(), kScratchDoubleReg);
2325 } else {
2326 if (dst.fp() != lhs.fp()) assm->movaps(dst.fp(), lhs.fp());
2327 (assm->*sse_op)(dst.fp(), rhs.fp());
2328 }
2329 }
2330
2331 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2332 void (Assembler::*sse_op)(XMMRegister, XMMRegister), uint8_t width>
EmitSimdShiftOp(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister operand,LiftoffRegister count)2333 void EmitSimdShiftOp(LiftoffAssembler* assm, LiftoffRegister dst,
2334 LiftoffRegister operand, LiftoffRegister count) {
2335 constexpr int mask = (1 << width) - 1;
2336 assm->movq(kScratchRegister, count.gp());
2337 assm->andq(kScratchRegister, Immediate(mask));
2338 assm->Movq(kScratchDoubleReg, kScratchRegister);
2339 if (CpuFeatures::IsSupported(AVX)) {
2340 CpuFeatureScope scope(assm, AVX);
2341 (assm->*avx_op)(dst.fp(), operand.fp(), kScratchDoubleReg);
2342 } else {
2343 if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
2344 (assm->*sse_op)(dst.fp(), kScratchDoubleReg);
2345 }
2346 }
2347
2348 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, byte),
2349 void (Assembler::*sse_op)(XMMRegister, byte), uint8_t width>
EmitSimdShiftOpImm(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister operand,int32_t count)2350 void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst,
2351 LiftoffRegister operand, int32_t count) {
2352 constexpr int mask = (1 << width) - 1;
2353 byte shift = static_cast<byte>(count & mask);
2354 if (CpuFeatures::IsSupported(AVX)) {
2355 CpuFeatureScope scope(assm, AVX);
2356 (assm->*avx_op)(dst.fp(), operand.fp(), shift);
2357 } else {
2358 if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
2359 (assm->*sse_op)(dst.fp(), shift);
2360 }
2361 }
2362
EmitAnyTrue(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister src)2363 inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
2364 LiftoffRegister src) {
2365 assm->xorq(dst.gp(), dst.gp());
2366 assm->Ptest(src.fp(), src.fp());
2367 assm->setcc(not_equal, dst.gp());
2368 }
2369
2370 template <void (SharedTurboAssembler::*pcmp)(XMMRegister, XMMRegister)>
2371 inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst,
2372 LiftoffRegister src,
2373 base::Optional<CpuFeature> feature = base::nullopt) {
2374 base::Optional<CpuFeatureScope> sse_scope;
2375 if (feature.has_value()) sse_scope.emplace(assm, *feature);
2376
2377 XMMRegister tmp = kScratchDoubleReg;
2378 assm->xorq(dst.gp(), dst.gp());
2379 assm->Pxor(tmp, tmp);
2380 (assm->*pcmp)(tmp, src.fp());
2381 assm->Ptest(tmp, tmp);
2382 assm->setcc(equal, dst.gp());
2383 }
2384
2385 } // namespace liftoff
2386
LoadTransform(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LoadTransformationKind transform,uint32_t * protected_load_pc)2387 void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
2388 Register offset_reg, uintptr_t offset_imm,
2389 LoadType type,
2390 LoadTransformationKind transform,
2391 uint32_t* protected_load_pc) {
2392 Operand src_op = liftoff::GetMemOp(this, src_addr, offset_reg, offset_imm);
2393 *protected_load_pc = pc_offset();
2394 MachineType memtype = type.mem_type();
2395 if (transform == LoadTransformationKind::kExtend) {
2396 if (memtype == MachineType::Int8()) {
2397 Pmovsxbw(dst.fp(), src_op);
2398 } else if (memtype == MachineType::Uint8()) {
2399 Pmovzxbw(dst.fp(), src_op);
2400 } else if (memtype == MachineType::Int16()) {
2401 Pmovsxwd(dst.fp(), src_op);
2402 } else if (memtype == MachineType::Uint16()) {
2403 Pmovzxwd(dst.fp(), src_op);
2404 } else if (memtype == MachineType::Int32()) {
2405 Pmovsxdq(dst.fp(), src_op);
2406 } else if (memtype == MachineType::Uint32()) {
2407 Pmovzxdq(dst.fp(), src_op);
2408 }
2409 } else if (transform == LoadTransformationKind::kZeroExtend) {
2410 if (memtype == MachineType::Int32()) {
2411 Movss(dst.fp(), src_op);
2412 } else {
2413 DCHECK_EQ(MachineType::Int64(), memtype);
2414 Movsd(dst.fp(), src_op);
2415 }
2416 } else {
2417 DCHECK_EQ(LoadTransformationKind::kSplat, transform);
2418 if (memtype == MachineType::Int8()) {
2419 S128Load8Splat(dst.fp(), src_op, kScratchDoubleReg);
2420 } else if (memtype == MachineType::Int16()) {
2421 S128Load16Splat(dst.fp(), src_op, kScratchDoubleReg);
2422 } else if (memtype == MachineType::Int32()) {
2423 S128Load32Splat(dst.fp(), src_op);
2424 } else if (memtype == MachineType::Int64()) {
2425 Movddup(dst.fp(), src_op);
2426 }
2427 }
2428 }
2429
LoadLane(LiftoffRegister dst,LiftoffRegister src,Register addr,Register offset_reg,uintptr_t offset_imm,LoadType type,uint8_t laneidx,uint32_t * protected_load_pc)2430 void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
2431 Register addr, Register offset_reg,
2432 uintptr_t offset_imm, LoadType type,
2433 uint8_t laneidx, uint32_t* protected_load_pc) {
2434 Operand src_op = liftoff::GetMemOp(this, addr, offset_reg, offset_imm);
2435
2436 MachineType mem_type = type.mem_type();
2437 if (mem_type == MachineType::Int8()) {
2438 Pinsrb(dst.fp(), src.fp(), src_op, laneidx, protected_load_pc);
2439 } else if (mem_type == MachineType::Int16()) {
2440 Pinsrw(dst.fp(), src.fp(), src_op, laneidx, protected_load_pc);
2441 } else if (mem_type == MachineType::Int32()) {
2442 Pinsrd(dst.fp(), src.fp(), src_op, laneidx, protected_load_pc);
2443 } else {
2444 DCHECK_EQ(MachineType::Int64(), mem_type);
2445 Pinsrq(dst.fp(), src.fp(), src_op, laneidx, protected_load_pc);
2446 }
2447 }
2448
StoreLane(Register dst,Register offset,uintptr_t offset_imm,LiftoffRegister src,StoreType type,uint8_t lane,uint32_t * protected_store_pc)2449 void LiftoffAssembler::StoreLane(Register dst, Register offset,
2450 uintptr_t offset_imm, LiftoffRegister src,
2451 StoreType type, uint8_t lane,
2452 uint32_t* protected_store_pc) {
2453 Operand dst_op = liftoff::GetMemOp(this, dst, offset, offset_imm);
2454 if (protected_store_pc) *protected_store_pc = pc_offset();
2455 MachineRepresentation rep = type.mem_rep();
2456 if (rep == MachineRepresentation::kWord8) {
2457 Pextrb(dst_op, src.fp(), lane);
2458 } else if (rep == MachineRepresentation::kWord16) {
2459 Pextrw(dst_op, src.fp(), lane);
2460 } else if (rep == MachineRepresentation::kWord32) {
2461 S128Store32Lane(dst_op, src.fp(), lane);
2462 } else {
2463 DCHECK_EQ(MachineRepresentation::kWord64, rep);
2464 S128Store64Lane(dst_op, src.fp(), lane);
2465 }
2466 }
2467
emit_i8x16_shuffle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,const uint8_t shuffle[16],bool is_swizzle)2468 void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
2469 LiftoffRegister lhs,
2470 LiftoffRegister rhs,
2471 const uint8_t shuffle[16],
2472 bool is_swizzle) {
2473 if (is_swizzle) {
2474 uint32_t imms[4];
2475 // Shuffles that use just 1 operand are called swizzles, rhs can be ignored.
2476 wasm::SimdShuffle::Pack16Lanes(imms, shuffle);
2477 TurboAssembler::Move(kScratchDoubleReg, make_uint64(imms[3], imms[2]),
2478 make_uint64(imms[1], imms[0]));
2479 Pshufb(dst.fp(), lhs.fp(), kScratchDoubleReg);
2480 return;
2481 }
2482
2483 uint64_t mask1[2] = {};
2484 for (int i = 15; i >= 0; i--) {
2485 uint8_t lane = shuffle[i];
2486 int j = i >> 3;
2487 mask1[j] <<= 8;
2488 mask1[j] |= lane < kSimd128Size ? lane : 0x80;
2489 }
2490 TurboAssembler::Move(liftoff::kScratchDoubleReg2, mask1[1], mask1[0]);
2491 Pshufb(kScratchDoubleReg, lhs.fp(), liftoff::kScratchDoubleReg2);
2492
2493 uint64_t mask2[2] = {};
2494 for (int i = 15; i >= 0; i--) {
2495 uint8_t lane = shuffle[i];
2496 int j = i >> 3;
2497 mask2[j] <<= 8;
2498 mask2[j] |= lane >= kSimd128Size ? (lane & 0x0F) : 0x80;
2499 }
2500 TurboAssembler::Move(liftoff::kScratchDoubleReg2, mask2[1], mask2[0]);
2501
2502 Pshufb(dst.fp(), rhs.fp(), liftoff::kScratchDoubleReg2);
2503 Por(dst.fp(), kScratchDoubleReg);
2504 }
2505
emit_i8x16_swizzle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2506 void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
2507 LiftoffRegister lhs,
2508 LiftoffRegister rhs) {
2509 I8x16Swizzle(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg,
2510 kScratchRegister);
2511 }
2512
emit_i8x16_popcnt(LiftoffRegister dst,LiftoffRegister src)2513 void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
2514 LiftoffRegister src) {
2515 I8x16Popcnt(dst.fp(), src.fp(), kScratchDoubleReg,
2516 liftoff::kScratchDoubleReg2, kScratchRegister);
2517 }
2518
emit_i8x16_splat(LiftoffRegister dst,LiftoffRegister src)2519 void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
2520 LiftoffRegister src) {
2521 I8x16Splat(dst.fp(), src.gp(), kScratchDoubleReg);
2522 }
2523
emit_i16x8_splat(LiftoffRegister dst,LiftoffRegister src)2524 void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
2525 LiftoffRegister src) {
2526 I16x8Splat(dst.fp(), src.gp());
2527 }
2528
emit_i32x4_splat(LiftoffRegister dst,LiftoffRegister src)2529 void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
2530 LiftoffRegister src) {
2531 Movd(dst.fp(), src.gp());
2532 Pshufd(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
2533 }
2534
emit_i64x2_splat(LiftoffRegister dst,LiftoffRegister src)2535 void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
2536 LiftoffRegister src) {
2537 Movq(dst.fp(), src.gp());
2538 Movddup(dst.fp(), dst.fp());
2539 }
2540
emit_f32x4_splat(LiftoffRegister dst,LiftoffRegister src)2541 void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
2542 LiftoffRegister src) {
2543 F32x4Splat(dst.fp(), src.fp());
2544 }
2545
emit_f64x2_splat(LiftoffRegister dst,LiftoffRegister src)2546 void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
2547 LiftoffRegister src) {
2548 Movddup(dst.fp(), src.fp());
2549 }
2550
emit_i8x16_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2551 void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
2552 LiftoffRegister rhs) {
2553 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
2554 this, dst, lhs, rhs);
2555 }
2556
emit_i8x16_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2557 void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
2558 LiftoffRegister rhs) {
2559 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
2560 this, dst, lhs, rhs);
2561 Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2562 Pxor(dst.fp(), kScratchDoubleReg);
2563 }
2564
emit_i8x16_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2565 void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2566 LiftoffRegister rhs) {
2567 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtb,
2568 &Assembler::pcmpgtb>(this, dst, lhs,
2569 rhs);
2570 }
2571
emit_i8x16_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2572 void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2573 LiftoffRegister rhs) {
2574 DoubleRegister ref = rhs.fp();
2575 if (dst == rhs) {
2576 Movaps(kScratchDoubleReg, rhs.fp());
2577 ref = kScratchDoubleReg;
2578 }
2579 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
2580 this, dst, lhs, rhs, SSE4_1);
2581 Pcmpeqb(dst.fp(), ref);
2582 Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2583 Pxor(dst.fp(), kScratchDoubleReg);
2584 }
2585
emit_i8x16_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2586 void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2587 LiftoffRegister rhs) {
2588 DoubleRegister ref = rhs.fp();
2589 if (dst == rhs) {
2590 Movaps(kScratchDoubleReg, rhs.fp());
2591 ref = kScratchDoubleReg;
2592 }
2593 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
2594 this, dst, lhs, rhs, SSE4_1);
2595 Pcmpeqb(dst.fp(), ref);
2596 }
2597
emit_i8x16_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2598 void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2599 LiftoffRegister rhs) {
2600 DoubleRegister ref = rhs.fp();
2601 if (dst == rhs) {
2602 Movaps(kScratchDoubleReg, rhs.fp());
2603 ref = kScratchDoubleReg;
2604 }
2605 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
2606 this, dst, lhs, rhs);
2607 Pcmpeqb(dst.fp(), ref);
2608 }
2609
emit_i16x8_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2610 void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
2611 LiftoffRegister rhs) {
2612 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
2613 this, dst, lhs, rhs);
2614 }
2615
emit_i16x8_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2616 void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
2617 LiftoffRegister rhs) {
2618 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
2619 this, dst, lhs, rhs);
2620 Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2621 Pxor(dst.fp(), kScratchDoubleReg);
2622 }
2623
emit_i16x8_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2624 void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2625 LiftoffRegister rhs) {
2626 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtw,
2627 &Assembler::pcmpgtw>(this, dst, lhs,
2628 rhs);
2629 }
2630
emit_i16x8_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2631 void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2632 LiftoffRegister rhs) {
2633 DoubleRegister ref = rhs.fp();
2634 if (dst == rhs) {
2635 Movaps(kScratchDoubleReg, rhs.fp());
2636 ref = kScratchDoubleReg;
2637 }
2638 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
2639 this, dst, lhs, rhs);
2640 Pcmpeqw(dst.fp(), ref);
2641 Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2642 Pxor(dst.fp(), kScratchDoubleReg);
2643 }
2644
emit_i16x8_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2645 void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2646 LiftoffRegister rhs) {
2647 DoubleRegister ref = rhs.fp();
2648 if (dst == rhs) {
2649 Movaps(kScratchDoubleReg, rhs.fp());
2650 ref = kScratchDoubleReg;
2651 }
2652 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
2653 this, dst, lhs, rhs);
2654 Pcmpeqw(dst.fp(), ref);
2655 }
2656
emit_i16x8_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2657 void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2658 LiftoffRegister rhs) {
2659 DoubleRegister ref = rhs.fp();
2660 if (dst == rhs) {
2661 Movaps(kScratchDoubleReg, rhs.fp());
2662 ref = kScratchDoubleReg;
2663 }
2664 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
2665 this, dst, lhs, rhs, SSE4_1);
2666 Pcmpeqw(dst.fp(), ref);
2667 }
2668
emit_i32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2669 void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
2670 LiftoffRegister rhs) {
2671 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
2672 this, dst, lhs, rhs);
2673 }
2674
emit_i32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2675 void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
2676 LiftoffRegister rhs) {
2677 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
2678 this, dst, lhs, rhs);
2679 Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2680 Pxor(dst.fp(), kScratchDoubleReg);
2681 }
2682
emit_i32x4_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2683 void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2684 LiftoffRegister rhs) {
2685 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtd,
2686 &Assembler::pcmpgtd>(this, dst, lhs,
2687 rhs);
2688 }
2689
emit_i32x4_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2690 void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2691 LiftoffRegister rhs) {
2692 DoubleRegister ref = rhs.fp();
2693 if (dst == rhs) {
2694 Movaps(kScratchDoubleReg, rhs.fp());
2695 ref = kScratchDoubleReg;
2696 }
2697 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
2698 this, dst, lhs, rhs, SSE4_1);
2699 Pcmpeqd(dst.fp(), ref);
2700 Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2701 Pxor(dst.fp(), kScratchDoubleReg);
2702 }
2703
emit_i32x4_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2704 void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2705 LiftoffRegister rhs) {
2706 DoubleRegister ref = rhs.fp();
2707 if (dst == rhs) {
2708 Movaps(kScratchDoubleReg, rhs.fp());
2709 ref = kScratchDoubleReg;
2710 }
2711 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
2712 this, dst, lhs, rhs, SSE4_1);
2713 Pcmpeqd(dst.fp(), ref);
2714 }
2715
emit_i32x4_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2716 void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2717 LiftoffRegister rhs) {
2718 DoubleRegister ref = rhs.fp();
2719 if (dst == rhs) {
2720 Movaps(kScratchDoubleReg, rhs.fp());
2721 ref = kScratchDoubleReg;
2722 }
2723 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
2724 this, dst, lhs, rhs, SSE4_1);
2725 Pcmpeqd(dst.fp(), ref);
2726 }
2727
emit_i64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2728 void LiftoffAssembler::emit_i64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
2729 LiftoffRegister rhs) {
2730 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqq, &Assembler::pcmpeqq>(
2731 this, dst, lhs, rhs, SSE4_1);
2732 }
2733
emit_i64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2734 void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
2735 LiftoffRegister rhs) {
2736 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqq, &Assembler::pcmpeqq>(
2737 this, dst, lhs, rhs, SSE4_1);
2738 Pcmpeqq(kScratchDoubleReg, kScratchDoubleReg);
2739 Pxor(dst.fp(), kScratchDoubleReg);
2740 }
2741
emit_i64x2_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2742 void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2743 LiftoffRegister rhs) {
2744 // Different register alias requirements depending on CpuFeatures supported:
2745 if (CpuFeatures::IsSupported(AVX) || CpuFeatures::IsSupported(SSE4_2)) {
2746 // 1. AVX, or SSE4_2 no requirements (I64x2GtS takes care of aliasing).
2747 I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2748 } else {
2749 // 2. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
2750 if (dst == lhs || dst == rhs) {
2751 I64x2GtS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp(),
2752 kScratchDoubleReg);
2753 movaps(dst.fp(), liftoff::kScratchDoubleReg2);
2754 } else {
2755 I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2756 }
2757 }
2758 }
2759
emit_i64x2_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2760 void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2761 LiftoffRegister rhs) {
2762 // Different register alias requirements depending on CpuFeatures supported:
2763 if (CpuFeatures::IsSupported(AVX)) {
2764 // 1. AVX, no requirements.
2765 I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2766 } else if (CpuFeatures::IsSupported(SSE4_2)) {
2767 // 2. SSE4_2, dst != lhs.
2768 if (dst == lhs) {
2769 I64x2GeS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp(),
2770 kScratchDoubleReg);
2771 movaps(dst.fp(), liftoff::kScratchDoubleReg2);
2772 } else {
2773 I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2774 }
2775 } else {
2776 // 3. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
2777 if (dst == lhs || dst == rhs) {
2778 I64x2GeS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp(),
2779 kScratchDoubleReg);
2780 movaps(dst.fp(), liftoff::kScratchDoubleReg2);
2781 } else {
2782 I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2783 }
2784 }
2785 }
2786
emit_f32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2787 void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
2788 LiftoffRegister rhs) {
2789 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqps, &Assembler::cmpeqps>(
2790 this, dst, lhs, rhs);
2791 }
2792
emit_f32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2793 void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
2794 LiftoffRegister rhs) {
2795 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqps,
2796 &Assembler::cmpneqps>(this, dst, lhs, rhs);
2797 }
2798
emit_f32x4_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2799 void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
2800 LiftoffRegister rhs) {
2801 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltps,
2802 &Assembler::cmpltps>(this, dst, lhs,
2803 rhs);
2804 }
2805
emit_f32x4_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2806 void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
2807 LiftoffRegister rhs) {
2808 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpleps,
2809 &Assembler::cmpleps>(this, dst, lhs,
2810 rhs);
2811 }
2812
emit_f64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2813 void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
2814 LiftoffRegister rhs) {
2815 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqpd, &Assembler::cmpeqpd>(
2816 this, dst, lhs, rhs);
2817 }
2818
emit_f64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2819 void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
2820 LiftoffRegister rhs) {
2821 liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqpd,
2822 &Assembler::cmpneqpd>(this, dst, lhs, rhs);
2823 }
2824
emit_f64x2_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2825 void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
2826 LiftoffRegister rhs) {
2827 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltpd,
2828 &Assembler::cmpltpd>(this, dst, lhs,
2829 rhs);
2830 }
2831
emit_f64x2_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2832 void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
2833 LiftoffRegister rhs) {
2834 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmplepd,
2835 &Assembler::cmplepd>(this, dst, lhs,
2836 rhs);
2837 }
2838
emit_s128_const(LiftoffRegister dst,const uint8_t imms[16])2839 void LiftoffAssembler::emit_s128_const(LiftoffRegister dst,
2840 const uint8_t imms[16]) {
2841 uint64_t vals[2];
2842 memcpy(vals, imms, sizeof(vals));
2843 TurboAssembler::Move(dst.fp(), vals[1], vals[0]);
2844 }
2845
emit_s128_not(LiftoffRegister dst,LiftoffRegister src)2846 void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
2847 S128Not(dst.fp(), src.fp(), kScratchDoubleReg);
2848 }
2849
emit_s128_and(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2850 void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
2851 LiftoffRegister rhs) {
2852 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpand, &Assembler::pand>(
2853 this, dst, lhs, rhs);
2854 }
2855
emit_s128_or(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2856 void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
2857 LiftoffRegister rhs) {
2858 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpor, &Assembler::por>(
2859 this, dst, lhs, rhs);
2860 }
2861
emit_s128_xor(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2862 void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
2863 LiftoffRegister rhs) {
2864 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpxor, &Assembler::pxor>(
2865 this, dst, lhs, rhs);
2866 }
2867
emit_s128_select(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,LiftoffRegister mask)2868 void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
2869 LiftoffRegister src1,
2870 LiftoffRegister src2,
2871 LiftoffRegister mask) {
2872 // Ensure that we don't overwrite any inputs with the movaps below.
2873 DCHECK_NE(dst, src1);
2874 DCHECK_NE(dst, src2);
2875 if (!CpuFeatures::IsSupported(AVX) && dst != mask) {
2876 movaps(dst.fp(), mask.fp());
2877 S128Select(dst.fp(), dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
2878 } else {
2879 S128Select(dst.fp(), mask.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
2880 }
2881 }
2882
emit_i8x16_neg(LiftoffRegister dst,LiftoffRegister src)2883 void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
2884 LiftoffRegister src) {
2885 if (dst.fp() == src.fp()) {
2886 Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2887 Psignb(dst.fp(), kScratchDoubleReg);
2888 } else {
2889 Pxor(dst.fp(), dst.fp());
2890 Psubb(dst.fp(), src.fp());
2891 }
2892 }
2893
emit_v128_anytrue(LiftoffRegister dst,LiftoffRegister src)2894 void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst,
2895 LiftoffRegister src) {
2896 liftoff::EmitAnyTrue(this, dst, src);
2897 }
2898
emit_i8x16_alltrue(LiftoffRegister dst,LiftoffRegister src)2899 void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst,
2900 LiftoffRegister src) {
2901 liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqb>(this, dst, src);
2902 }
2903
emit_i8x16_bitmask(LiftoffRegister dst,LiftoffRegister src)2904 void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
2905 LiftoffRegister src) {
2906 Pmovmskb(dst.gp(), src.fp());
2907 }
2908
emit_i8x16_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2909 void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
2910 LiftoffRegister rhs) {
2911 I8x16Shl(dst.fp(), lhs.fp(), rhs.gp(), kScratchRegister, kScratchDoubleReg,
2912 liftoff::kScratchDoubleReg2);
2913 }
2914
emit_i8x16_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2915 void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
2916 int32_t rhs) {
2917 I8x16Shl(dst.fp(), lhs.fp(), rhs, kScratchRegister, kScratchDoubleReg);
2918 }
2919
emit_i8x16_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2920 void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
2921 LiftoffRegister lhs,
2922 LiftoffRegister rhs) {
2923 I8x16ShrS(dst.fp(), lhs.fp(), rhs.gp(), kScratchRegister, kScratchDoubleReg,
2924 liftoff::kScratchDoubleReg2);
2925 }
2926
emit_i8x16_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2927 void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
2928 LiftoffRegister lhs, int32_t rhs) {
2929 I8x16ShrS(dst.fp(), lhs.fp(), rhs, kScratchDoubleReg);
2930 }
2931
emit_i8x16_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2932 void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
2933 LiftoffRegister lhs,
2934 LiftoffRegister rhs) {
2935 I8x16ShrU(dst.fp(), lhs.fp(), rhs.gp(), kScratchRegister, kScratchDoubleReg,
2936 liftoff::kScratchDoubleReg2);
2937 }
2938
emit_i8x16_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2939 void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
2940 LiftoffRegister lhs, int32_t rhs) {
2941 I8x16ShrU(dst.fp(), lhs.fp(), rhs, kScratchRegister, kScratchDoubleReg);
2942 }
2943
emit_i8x16_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2944 void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
2945 LiftoffRegister rhs) {
2946 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
2947 this, dst, lhs, rhs);
2948 }
2949
emit_i8x16_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2950 void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
2951 LiftoffRegister lhs,
2952 LiftoffRegister rhs) {
2953 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsb, &Assembler::paddsb>(
2954 this, dst, lhs, rhs);
2955 }
2956
emit_i8x16_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2957 void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst,
2958 LiftoffRegister lhs,
2959 LiftoffRegister rhs) {
2960 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusb, &Assembler::paddusb>(
2961 this, dst, lhs, rhs);
2962 }
2963
emit_i8x16_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2964 void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
2965 LiftoffRegister rhs) {
2966 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubb, &Assembler::psubb>(
2967 this, dst, lhs, rhs);
2968 }
2969
emit_i8x16_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2970 void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
2971 LiftoffRegister lhs,
2972 LiftoffRegister rhs) {
2973 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsb, &Assembler::psubsb>(
2974 this, dst, lhs, rhs);
2975 }
2976
emit_i8x16_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2977 void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst,
2978 LiftoffRegister lhs,
2979 LiftoffRegister rhs) {
2980 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusb,
2981 &Assembler::psubusb>(this, dst, lhs,
2982 rhs);
2983 }
2984
emit_i8x16_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2985 void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
2986 LiftoffRegister lhs,
2987 LiftoffRegister rhs) {
2988 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
2989 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
2990 }
2991
emit_i8x16_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2992 void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
2993 LiftoffRegister lhs,
2994 LiftoffRegister rhs) {
2995 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
2996 this, dst, lhs, rhs);
2997 }
2998
emit_i8x16_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2999 void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
3000 LiftoffRegister lhs,
3001 LiftoffRegister rhs) {
3002 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsb, &Assembler::pmaxsb>(
3003 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3004 }
3005
emit_i8x16_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3006 void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
3007 LiftoffRegister lhs,
3008 LiftoffRegister rhs) {
3009 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
3010 this, dst, lhs, rhs);
3011 }
3012
emit_i16x8_neg(LiftoffRegister dst,LiftoffRegister src)3013 void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
3014 LiftoffRegister src) {
3015 if (dst.fp() == src.fp()) {
3016 Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3017 Psignw(dst.fp(), kScratchDoubleReg);
3018 } else {
3019 Pxor(dst.fp(), dst.fp());
3020 Psubw(dst.fp(), src.fp());
3021 }
3022 }
3023
emit_i16x8_alltrue(LiftoffRegister dst,LiftoffRegister src)3024 void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
3025 LiftoffRegister src) {
3026 liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqw>(this, dst, src);
3027 }
3028
emit_i16x8_bitmask(LiftoffRegister dst,LiftoffRegister src)3029 void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
3030 LiftoffRegister src) {
3031 XMMRegister tmp = kScratchDoubleReg;
3032 Packsswb(tmp, src.fp());
3033 Pmovmskb(dst.gp(), tmp);
3034 shrq(dst.gp(), Immediate(8));
3035 }
3036
emit_i16x8_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3037 void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
3038 LiftoffRegister rhs) {
3039 liftoff::EmitSimdShiftOp<&Assembler::vpsllw, &Assembler::psllw, 4>(this, dst,
3040 lhs, rhs);
3041 }
3042
emit_i16x8_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3043 void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
3044 int32_t rhs) {
3045 liftoff::EmitSimdShiftOpImm<&Assembler::vpsllw, &Assembler::psllw, 4>(
3046 this, dst, lhs, rhs);
3047 }
3048
emit_i16x8_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3049 void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
3050 LiftoffRegister lhs,
3051 LiftoffRegister rhs) {
3052 liftoff::EmitSimdShiftOp<&Assembler::vpsraw, &Assembler::psraw, 4>(this, dst,
3053 lhs, rhs);
3054 }
3055
emit_i16x8_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3056 void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
3057 LiftoffRegister lhs, int32_t rhs) {
3058 liftoff::EmitSimdShiftOpImm<&Assembler::vpsraw, &Assembler::psraw, 4>(
3059 this, dst, lhs, rhs);
3060 }
3061
emit_i16x8_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3062 void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
3063 LiftoffRegister lhs,
3064 LiftoffRegister rhs) {
3065 liftoff::EmitSimdShiftOp<&Assembler::vpsrlw, &Assembler::psrlw, 4>(this, dst,
3066 lhs, rhs);
3067 }
3068
emit_i16x8_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3069 void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
3070 LiftoffRegister lhs, int32_t rhs) {
3071 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlw, &Assembler::psrlw, 4>(
3072 this, dst, lhs, rhs);
3073 }
3074
emit_i16x8_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3075 void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
3076 LiftoffRegister rhs) {
3077 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
3078 this, dst, lhs, rhs);
3079 }
3080
emit_i16x8_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3081 void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
3082 LiftoffRegister lhs,
3083 LiftoffRegister rhs) {
3084 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsw, &Assembler::paddsw>(
3085 this, dst, lhs, rhs);
3086 }
3087
emit_i16x8_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3088 void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
3089 LiftoffRegister lhs,
3090 LiftoffRegister rhs) {
3091 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusw, &Assembler::paddusw>(
3092 this, dst, lhs, rhs);
3093 }
3094
emit_i16x8_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3095 void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
3096 LiftoffRegister rhs) {
3097 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubw, &Assembler::psubw>(
3098 this, dst, lhs, rhs);
3099 }
3100
emit_i16x8_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3101 void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
3102 LiftoffRegister lhs,
3103 LiftoffRegister rhs) {
3104 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsw, &Assembler::psubsw>(
3105 this, dst, lhs, rhs);
3106 }
3107
emit_i16x8_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3108 void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
3109 LiftoffRegister lhs,
3110 LiftoffRegister rhs) {
3111 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusw,
3112 &Assembler::psubusw>(this, dst, lhs,
3113 rhs);
3114 }
3115
emit_i16x8_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3116 void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
3117 LiftoffRegister rhs) {
3118 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmullw, &Assembler::pmullw>(
3119 this, dst, lhs, rhs);
3120 }
3121
emit_i16x8_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3122 void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
3123 LiftoffRegister lhs,
3124 LiftoffRegister rhs) {
3125 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
3126 this, dst, lhs, rhs);
3127 }
3128
emit_i16x8_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3129 void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
3130 LiftoffRegister lhs,
3131 LiftoffRegister rhs) {
3132 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
3133 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3134 }
3135
emit_i16x8_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3136 void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
3137 LiftoffRegister lhs,
3138 LiftoffRegister rhs) {
3139 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsw, &Assembler::pmaxsw>(
3140 this, dst, lhs, rhs);
3141 }
3142
emit_i16x8_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3143 void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
3144 LiftoffRegister lhs,
3145 LiftoffRegister rhs) {
3146 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
3147 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3148 }
3149
emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,LiftoffRegister src)3150 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
3151 LiftoffRegister src) {
3152 I16x8ExtAddPairwiseI8x16S(dst.fp(), src.fp(), kScratchDoubleReg,
3153 kScratchRegister);
3154 }
3155
emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,LiftoffRegister src)3156 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
3157 LiftoffRegister src) {
3158 I16x8ExtAddPairwiseI8x16U(dst.fp(), src.fp(), kScratchRegister);
3159 }
3160
emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3161 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
3162 LiftoffRegister src1,
3163 LiftoffRegister src2) {
3164 I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg,
3165 /*is_signed=*/true);
3166 }
3167
emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3168 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
3169 LiftoffRegister src1,
3170 LiftoffRegister src2) {
3171 I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg,
3172 /*is_signed=*/false);
3173 }
3174
emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3175 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
3176 LiftoffRegister src1,
3177 LiftoffRegister src2) {
3178 I16x8ExtMulHighS(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
3179 }
3180
emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3181 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
3182 LiftoffRegister src1,
3183 LiftoffRegister src2) {
3184 I16x8ExtMulHighU(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
3185 }
3186
emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3187 void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
3188 LiftoffRegister src1,
3189 LiftoffRegister src2) {
3190 I16x8Q15MulRSatS(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
3191 }
3192
emit_i32x4_neg(LiftoffRegister dst,LiftoffRegister src)3193 void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
3194 LiftoffRegister src) {
3195 if (dst.fp() == src.fp()) {
3196 Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3197 Psignd(dst.fp(), kScratchDoubleReg);
3198 } else {
3199 Pxor(dst.fp(), dst.fp());
3200 Psubd(dst.fp(), src.fp());
3201 }
3202 }
3203
emit_i32x4_alltrue(LiftoffRegister dst,LiftoffRegister src)3204 void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
3205 LiftoffRegister src) {
3206 liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqd>(this, dst, src);
3207 }
3208
emit_i32x4_bitmask(LiftoffRegister dst,LiftoffRegister src)3209 void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
3210 LiftoffRegister src) {
3211 Movmskps(dst.gp(), src.fp());
3212 }
3213
emit_i32x4_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3214 void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
3215 LiftoffRegister rhs) {
3216 liftoff::EmitSimdShiftOp<&Assembler::vpslld, &Assembler::pslld, 5>(this, dst,
3217 lhs, rhs);
3218 }
3219
emit_i32x4_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3220 void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
3221 int32_t rhs) {
3222 liftoff::EmitSimdShiftOpImm<&Assembler::vpslld, &Assembler::pslld, 5>(
3223 this, dst, lhs, rhs);
3224 }
3225
emit_i32x4_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3226 void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
3227 LiftoffRegister lhs,
3228 LiftoffRegister rhs) {
3229 liftoff::EmitSimdShiftOp<&Assembler::vpsrad, &Assembler::psrad, 5>(this, dst,
3230 lhs, rhs);
3231 }
3232
emit_i32x4_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3233 void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
3234 LiftoffRegister lhs, int32_t rhs) {
3235 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrad, &Assembler::psrad, 5>(
3236 this, dst, lhs, rhs);
3237 }
3238
emit_i32x4_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3239 void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
3240 LiftoffRegister lhs,
3241 LiftoffRegister rhs) {
3242 liftoff::EmitSimdShiftOp<&Assembler::vpsrld, &Assembler::psrld, 5>(this, dst,
3243 lhs, rhs);
3244 }
3245
emit_i32x4_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3246 void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
3247 LiftoffRegister lhs, int32_t rhs) {
3248 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrld, &Assembler::psrld, 5>(
3249 this, dst, lhs, rhs);
3250 }
3251
emit_i32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3252 void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3253 LiftoffRegister rhs) {
3254 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>(
3255 this, dst, lhs, rhs);
3256 }
3257
emit_i32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3258 void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3259 LiftoffRegister rhs) {
3260 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubd, &Assembler::psubd>(
3261 this, dst, lhs, rhs);
3262 }
3263
emit_i32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3264 void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3265 LiftoffRegister rhs) {
3266 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmulld, &Assembler::pmulld>(
3267 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3268 }
3269
emit_i32x4_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3270 void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
3271 LiftoffRegister lhs,
3272 LiftoffRegister rhs) {
3273 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
3274 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3275 }
3276
emit_i32x4_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3277 void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
3278 LiftoffRegister lhs,
3279 LiftoffRegister rhs) {
3280 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
3281 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3282 }
3283
emit_i32x4_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3284 void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
3285 LiftoffRegister lhs,
3286 LiftoffRegister rhs) {
3287 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsd, &Assembler::pmaxsd>(
3288 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3289 }
3290
emit_i32x4_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3291 void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
3292 LiftoffRegister lhs,
3293 LiftoffRegister rhs) {
3294 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
3295 this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3296 }
3297
emit_i32x4_dot_i16x8_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3298 void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
3299 LiftoffRegister lhs,
3300 LiftoffRegister rhs) {
3301 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaddwd, &Assembler::pmaddwd>(
3302 this, dst, lhs, rhs);
3303 }
3304
emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,LiftoffRegister src)3305 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
3306 LiftoffRegister src) {
3307 I32x4ExtAddPairwiseI16x8S(dst.fp(), src.fp(), kScratchRegister);
3308 }
3309
emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,LiftoffRegister src)3310 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
3311 LiftoffRegister src) {
3312 I32x4ExtAddPairwiseI16x8U(dst.fp(), src.fp(), kScratchDoubleReg);
3313 }
3314
3315 namespace liftoff {
3316 // Helper function to check for register aliasing, AVX support, and moves
3317 // registers around before calling the actual macro-assembler function.
I32x4ExtMulHelper(LiftoffAssembler * assm,XMMRegister dst,XMMRegister src1,XMMRegister src2,bool low,bool is_signed)3318 inline void I32x4ExtMulHelper(LiftoffAssembler* assm, XMMRegister dst,
3319 XMMRegister src1, XMMRegister src2, bool low,
3320 bool is_signed) {
3321 // I32x4ExtMul requires dst == src1 if AVX is not supported.
3322 if (CpuFeatures::IsSupported(AVX) || dst == src1) {
3323 assm->I32x4ExtMul(dst, src1, src2, kScratchDoubleReg, low, is_signed);
3324 } else if (dst != src2) {
3325 // dst != src1 && dst != src2
3326 assm->movaps(dst, src1);
3327 assm->I32x4ExtMul(dst, dst, src2, kScratchDoubleReg, low, is_signed);
3328 } else {
3329 // dst == src2
3330 // Extended multiplication is commutative,
3331 assm->movaps(dst, src2);
3332 assm->I32x4ExtMul(dst, dst, src1, kScratchDoubleReg, low, is_signed);
3333 }
3334 }
3335 } // namespace liftoff
3336
emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3337 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
3338 LiftoffRegister src1,
3339 LiftoffRegister src2) {
3340 liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(), /*low=*/true,
3341 /*is_signed=*/true);
3342 }
3343
emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3344 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
3345 LiftoffRegister src1,
3346 LiftoffRegister src2) {
3347 liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(), /*low=*/true,
3348 /*is_signed=*/false);
3349 }
3350
emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3351 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
3352 LiftoffRegister src1,
3353 LiftoffRegister src2) {
3354 liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(),
3355 /*low=*/false,
3356 /*is_signed=*/true);
3357 }
3358
emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3359 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
3360 LiftoffRegister src1,
3361 LiftoffRegister src2) {
3362 liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(),
3363 /*low=*/false,
3364 /*is_signed=*/false);
3365 }
3366
emit_i64x2_neg(LiftoffRegister dst,LiftoffRegister src)3367 void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
3368 LiftoffRegister src) {
3369 I64x2Neg(dst.fp(), src.fp(), kScratchDoubleReg);
3370 }
3371
emit_i64x2_alltrue(LiftoffRegister dst,LiftoffRegister src)3372 void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst,
3373 LiftoffRegister src) {
3374 liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqq>(this, dst, src, SSE4_1);
3375 }
3376
emit_i64x2_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3377 void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
3378 LiftoffRegister rhs) {
3379 liftoff::EmitSimdShiftOp<&Assembler::vpsllq, &Assembler::psllq, 6>(this, dst,
3380 lhs, rhs);
3381 }
3382
emit_i64x2_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3383 void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
3384 int32_t rhs) {
3385 liftoff::EmitSimdShiftOpImm<&Assembler::vpsllq, &Assembler::psllq, 6>(
3386 this, dst, lhs, rhs);
3387 }
3388
emit_i64x2_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3389 void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
3390 LiftoffRegister lhs,
3391 LiftoffRegister rhs) {
3392 I64x2ShrS(dst.fp(), lhs.fp(), rhs.gp(), kScratchDoubleReg,
3393 liftoff::kScratchDoubleReg2, kScratchRegister);
3394 }
3395
emit_i64x2_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3396 void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
3397 LiftoffRegister lhs, int32_t rhs) {
3398 I64x2ShrS(dst.fp(), lhs.fp(), rhs & 0x3F, kScratchDoubleReg);
3399 }
3400
emit_i64x2_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3401 void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
3402 LiftoffRegister lhs,
3403 LiftoffRegister rhs) {
3404 liftoff::EmitSimdShiftOp<&Assembler::vpsrlq, &Assembler::psrlq, 6>(this, dst,
3405 lhs, rhs);
3406 }
3407
emit_i64x2_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3408 void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
3409 LiftoffRegister lhs, int32_t rhs) {
3410 liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>(
3411 this, dst, lhs, rhs);
3412 }
3413
emit_i64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3414 void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
3415 LiftoffRegister rhs) {
3416 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>(
3417 this, dst, lhs, rhs);
3418 }
3419
emit_i64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3420 void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
3421 LiftoffRegister rhs) {
3422 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubq, &Assembler::psubq>(
3423 this, dst, lhs, rhs);
3424 }
3425
emit_i64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3426 void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
3427 LiftoffRegister rhs) {
3428 static constexpr RegClass tmp_rc = reg_class_for(kS128);
3429 LiftoffRegister tmp1 =
3430 GetUnusedRegister(tmp_rc, LiftoffRegList{dst, lhs, rhs});
3431 LiftoffRegister tmp2 =
3432 GetUnusedRegister(tmp_rc, LiftoffRegList{dst, lhs, rhs, tmp1});
3433 I64x2Mul(dst.fp(), lhs.fp(), rhs.fp(), tmp1.fp(), tmp2.fp());
3434 }
3435
emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3436 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
3437 LiftoffRegister src1,
3438 LiftoffRegister src2) {
3439 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg, /*low=*/true,
3440 /*is_signed=*/true);
3441 }
3442
emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3443 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
3444 LiftoffRegister src1,
3445 LiftoffRegister src2) {
3446 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg, /*low=*/true,
3447 /*is_signed=*/false);
3448 }
3449
emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3450 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
3451 LiftoffRegister src1,
3452 LiftoffRegister src2) {
3453 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg, /*low=*/false,
3454 /*is_signed=*/true);
3455 }
3456
emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3457 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
3458 LiftoffRegister src1,
3459 LiftoffRegister src2) {
3460 I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg, /*low=*/false,
3461 /*is_signed=*/false);
3462 }
3463
emit_i64x2_bitmask(LiftoffRegister dst,LiftoffRegister src)3464 void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
3465 LiftoffRegister src) {
3466 Movmskpd(dst.gp(), src.fp());
3467 }
3468
emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)3469 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
3470 LiftoffRegister src) {
3471 Pmovsxdq(dst.fp(), src.fp());
3472 }
3473
emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)3474 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
3475 LiftoffRegister src) {
3476 I64x2SConvertI32x4High(dst.fp(), src.fp());
3477 }
3478
emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)3479 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
3480 LiftoffRegister src) {
3481 Pmovzxdq(dst.fp(), src.fp());
3482 }
3483
emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)3484 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
3485 LiftoffRegister src) {
3486 I64x2UConvertI32x4High(dst.fp(), src.fp(), kScratchDoubleReg);
3487 }
3488
emit_f32x4_abs(LiftoffRegister dst,LiftoffRegister src)3489 void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
3490 LiftoffRegister src) {
3491 Absps(dst.fp(), src.fp(), kScratchRegister);
3492 }
3493
emit_f32x4_neg(LiftoffRegister dst,LiftoffRegister src)3494 void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
3495 LiftoffRegister src) {
3496 Negps(dst.fp(), src.fp(), kScratchRegister);
3497 }
3498
emit_f32x4_sqrt(LiftoffRegister dst,LiftoffRegister src)3499 void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
3500 LiftoffRegister src) {
3501 Sqrtps(dst.fp(), src.fp());
3502 }
3503
emit_f32x4_ceil(LiftoffRegister dst,LiftoffRegister src)3504 bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
3505 LiftoffRegister src) {
3506 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3507 Roundps(dst.fp(), src.fp(), kRoundUp);
3508 return true;
3509 }
3510
emit_f32x4_floor(LiftoffRegister dst,LiftoffRegister src)3511 bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
3512 LiftoffRegister src) {
3513 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3514 Roundps(dst.fp(), src.fp(), kRoundDown);
3515 return true;
3516 }
3517
emit_f32x4_trunc(LiftoffRegister dst,LiftoffRegister src)3518 bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
3519 LiftoffRegister src) {
3520 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3521 Roundps(dst.fp(), src.fp(), kRoundToZero);
3522 return true;
3523 }
3524
emit_f32x4_nearest_int(LiftoffRegister dst,LiftoffRegister src)3525 bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
3526 LiftoffRegister src) {
3527 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3528 Roundps(dst.fp(), src.fp(), kRoundToNearest);
3529 return true;
3530 }
3531
emit_f32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3532 void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3533 LiftoffRegister rhs) {
3534 liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
3535 this, dst, lhs, rhs);
3536 }
3537
emit_f32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3538 void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3539 LiftoffRegister rhs) {
3540 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubps, &Assembler::subps>(
3541 this, dst, lhs, rhs);
3542 }
3543
emit_f32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3544 void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3545 LiftoffRegister rhs) {
3546 liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulps, &Assembler::mulps>(
3547 this, dst, lhs, rhs);
3548 }
3549
emit_f32x4_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3550 void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
3551 LiftoffRegister rhs) {
3552 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivps, &Assembler::divps>(
3553 this, dst, lhs, rhs);
3554 }
3555
emit_f32x4_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3556 void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
3557 LiftoffRegister rhs) {
3558 F32x4Min(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
3559 }
3560
emit_f32x4_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3561 void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
3562 LiftoffRegister rhs) {
3563 F32x4Max(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
3564 }
3565
emit_f32x4_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3566 void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
3567 LiftoffRegister rhs) {
3568 // Due to the way minps works, pmin(a, b) = minps(b, a).
3569 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminps, &Assembler::minps>(
3570 this, dst, rhs, lhs);
3571 }
3572
emit_f32x4_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3573 void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
3574 LiftoffRegister rhs) {
3575 // Due to the way maxps works, pmax(a, b) = maxps(b, a).
3576 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxps, &Assembler::maxps>(
3577 this, dst, rhs, lhs);
3578 }
3579
emit_f64x2_abs(LiftoffRegister dst,LiftoffRegister src)3580 void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
3581 LiftoffRegister src) {
3582 Abspd(dst.fp(), src.fp(), kScratchRegister);
3583 }
3584
emit_f64x2_neg(LiftoffRegister dst,LiftoffRegister src)3585 void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
3586 LiftoffRegister src) {
3587 Negpd(dst.fp(), src.fp(), kScratchRegister);
3588 }
3589
emit_f64x2_sqrt(LiftoffRegister dst,LiftoffRegister src)3590 void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
3591 LiftoffRegister src) {
3592 Sqrtpd(dst.fp(), src.fp());
3593 }
3594
emit_f64x2_ceil(LiftoffRegister dst,LiftoffRegister src)3595 bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
3596 LiftoffRegister src) {
3597 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3598 Roundpd(dst.fp(), src.fp(), kRoundUp);
3599 return true;
3600 }
3601
emit_f64x2_floor(LiftoffRegister dst,LiftoffRegister src)3602 bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
3603 LiftoffRegister src) {
3604 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3605 Roundpd(dst.fp(), src.fp(), kRoundDown);
3606 return true;
3607 }
3608
emit_f64x2_trunc(LiftoffRegister dst,LiftoffRegister src)3609 bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
3610 LiftoffRegister src) {
3611 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3612 Roundpd(dst.fp(), src.fp(), kRoundToZero);
3613 return true;
3614 }
3615
emit_f64x2_nearest_int(LiftoffRegister dst,LiftoffRegister src)3616 bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
3617 LiftoffRegister src) {
3618 DCHECK(CpuFeatures::IsSupported(SSE4_1));
3619 Roundpd(dst.fp(), src.fp(), kRoundToNearest);
3620 return true;
3621 }
3622
emit_f64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3623 void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
3624 LiftoffRegister rhs) {
3625 liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(
3626 this, dst, lhs, rhs);
3627 }
3628
emit_f64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3629 void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
3630 LiftoffRegister rhs) {
3631 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubpd, &Assembler::subpd>(
3632 this, dst, lhs, rhs);
3633 }
3634
emit_f64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3635 void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
3636 LiftoffRegister rhs) {
3637 liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulpd, &Assembler::mulpd>(
3638 this, dst, lhs, rhs);
3639 }
3640
emit_f64x2_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3641 void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
3642 LiftoffRegister rhs) {
3643 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivpd, &Assembler::divpd>(
3644 this, dst, lhs, rhs);
3645 }
3646
emit_f64x2_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3647 void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
3648 LiftoffRegister rhs) {
3649 F64x2Min(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
3650 }
3651
emit_f64x2_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3652 void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
3653 LiftoffRegister rhs) {
3654 F64x2Max(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
3655 }
3656
emit_f64x2_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3657 void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
3658 LiftoffRegister rhs) {
3659 // Due to the way minpd works, pmin(a, b) = minpd(b, a).
3660 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminpd, &Assembler::minpd>(
3661 this, dst, rhs, lhs);
3662 }
3663
emit_f64x2_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3664 void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
3665 LiftoffRegister rhs) {
3666 // Due to the way maxpd works, pmax(a, b) = maxpd(b, a).
3667 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxpd, &Assembler::maxpd>(
3668 this, dst, rhs, lhs);
3669 }
3670
emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src)3671 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
3672 LiftoffRegister src) {
3673 Cvtdq2pd(dst.fp(), src.fp());
3674 }
3675
emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src)3676 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
3677 LiftoffRegister src) {
3678 F64x2ConvertLowI32x4U(dst.fp(), src.fp(), kScratchRegister);
3679 }
3680
emit_f64x2_promote_low_f32x4(LiftoffRegister dst,LiftoffRegister src)3681 void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
3682 LiftoffRegister src) {
3683 Cvtps2pd(dst.fp(), src.fp());
3684 }
3685
emit_i32x4_sconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)3686 void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
3687 LiftoffRegister src) {
3688 I32x4SConvertF32x4(dst.fp(), src.fp(), kScratchDoubleReg, kScratchRegister);
3689 }
3690
emit_i32x4_uconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)3691 void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
3692 LiftoffRegister src) {
3693 // NAN->0, negative->0.
3694 Pxor(kScratchDoubleReg, kScratchDoubleReg);
3695 if (CpuFeatures::IsSupported(AVX)) {
3696 CpuFeatureScope scope(this, AVX);
3697 vmaxps(dst.fp(), src.fp(), kScratchDoubleReg);
3698 } else {
3699 if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
3700 maxps(dst.fp(), kScratchDoubleReg);
3701 }
3702 // scratch: float representation of max_signed.
3703 Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3704 Psrld(kScratchDoubleReg, uint8_t{1}); // 0x7fffffff
3705 Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
3706 // scratch2: convert (src-max_signed).
3707 // Set positive overflow lanes to 0x7FFFFFFF.
3708 // Set negative lanes to 0.
3709 if (CpuFeatures::IsSupported(AVX)) {
3710 CpuFeatureScope scope(this, AVX);
3711 vsubps(liftoff::kScratchDoubleReg2, dst.fp(), kScratchDoubleReg);
3712 } else {
3713 movaps(liftoff::kScratchDoubleReg2, dst.fp());
3714 subps(liftoff::kScratchDoubleReg2, kScratchDoubleReg);
3715 }
3716 Cmpleps(kScratchDoubleReg, liftoff::kScratchDoubleReg2);
3717 Cvttps2dq(liftoff::kScratchDoubleReg2, liftoff::kScratchDoubleReg2);
3718 Pxor(liftoff::kScratchDoubleReg2, kScratchDoubleReg);
3719 Pxor(kScratchDoubleReg, kScratchDoubleReg);
3720 Pmaxsd(liftoff::kScratchDoubleReg2, kScratchDoubleReg);
3721 // Convert to int. Overflow lanes above max_signed will be 0x80000000.
3722 Cvttps2dq(dst.fp(), dst.fp());
3723 // Add (src-max_signed) for overflow lanes.
3724 Paddd(dst.fp(), liftoff::kScratchDoubleReg2);
3725 }
3726
emit_f32x4_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)3727 void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
3728 LiftoffRegister src) {
3729 Cvtdq2ps(dst.fp(), src.fp());
3730 }
3731
emit_f32x4_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)3732 void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
3733 LiftoffRegister src) {
3734 Pxor(kScratchDoubleReg, kScratchDoubleReg); // Zeros.
3735 Pblendw(kScratchDoubleReg, src.fp(), uint8_t{0x55}); // Get lo 16 bits.
3736 if (CpuFeatures::IsSupported(AVX)) {
3737 CpuFeatureScope scope(this, AVX);
3738 vpsubd(dst.fp(), src.fp(), kScratchDoubleReg); // Get hi 16 bits.
3739 } else {
3740 if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
3741 psubd(dst.fp(), kScratchDoubleReg);
3742 }
3743 Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // Convert lo exactly.
3744 Psrld(dst.fp(), byte{1}); // Divide by 2 to get in unsigned range.
3745 Cvtdq2ps(dst.fp(), dst.fp()); // Convert hi, exactly.
3746 Addps(dst.fp(), dst.fp()); // Double hi, exactly.
3747 Addps(dst.fp(), kScratchDoubleReg); // Add hi and lo, may round.
3748 }
3749
emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,LiftoffRegister src)3750 void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
3751 LiftoffRegister src) {
3752 Cvtpd2ps(dst.fp(), src.fp());
3753 }
3754
emit_i8x16_sconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3755 void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
3756 LiftoffRegister lhs,
3757 LiftoffRegister rhs) {
3758 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpacksswb,
3759 &Assembler::packsswb>(this, dst, lhs,
3760 rhs);
3761 }
3762
emit_i8x16_uconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3763 void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
3764 LiftoffRegister lhs,
3765 LiftoffRegister rhs) {
3766 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackuswb,
3767 &Assembler::packuswb>(this, dst, lhs,
3768 rhs);
3769 }
3770
emit_i16x8_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3771 void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
3772 LiftoffRegister lhs,
3773 LiftoffRegister rhs) {
3774 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackssdw,
3775 &Assembler::packssdw>(this, dst, lhs,
3776 rhs);
3777 }
3778
emit_i16x8_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3779 void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
3780 LiftoffRegister lhs,
3781 LiftoffRegister rhs) {
3782 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackusdw,
3783 &Assembler::packusdw>(this, dst, lhs,
3784 rhs, SSE4_1);
3785 }
3786
emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)3787 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
3788 LiftoffRegister src) {
3789 Pmovsxbw(dst.fp(), src.fp());
3790 }
3791
emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)3792 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
3793 LiftoffRegister src) {
3794 I16x8SConvertI8x16High(dst.fp(), src.fp());
3795 }
3796
emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)3797 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
3798 LiftoffRegister src) {
3799 Pmovzxbw(dst.fp(), src.fp());
3800 }
3801
emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)3802 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
3803 LiftoffRegister src) {
3804 I16x8UConvertI8x16High(dst.fp(), src.fp(), kScratchDoubleReg);
3805 }
3806
emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)3807 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
3808 LiftoffRegister src) {
3809 Pmovsxwd(dst.fp(), src.fp());
3810 }
3811
emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)3812 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
3813 LiftoffRegister src) {
3814 I32x4SConvertI16x8High(dst.fp(), src.fp());
3815 }
3816
emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)3817 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
3818 LiftoffRegister src) {
3819 Pmovzxwd(dst.fp(), src.fp());
3820 }
3821
emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)3822 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
3823 LiftoffRegister src) {
3824 I32x4UConvertI16x8High(dst.fp(), src.fp(), kScratchDoubleReg);
3825 }
3826
emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,LiftoffRegister src)3827 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
3828 LiftoffRegister src) {
3829 I32x4TruncSatF64x2SZero(dst.fp(), src.fp(), kScratchDoubleReg,
3830 kScratchRegister);
3831 }
3832
emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,LiftoffRegister src)3833 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
3834 LiftoffRegister src) {
3835 I32x4TruncSatF64x2UZero(dst.fp(), src.fp(), kScratchDoubleReg,
3836 kScratchRegister);
3837 }
3838
emit_s128_and_not(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3839 void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
3840 LiftoffRegister lhs,
3841 LiftoffRegister rhs) {
3842 liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vandnps, &Assembler::andnps>(
3843 this, dst, rhs, lhs);
3844 }
3845
emit_i8x16_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3846 void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
3847 LiftoffRegister lhs,
3848 LiftoffRegister rhs) {
3849 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgb, &Assembler::pavgb>(
3850 this, dst, lhs, rhs);
3851 }
3852
emit_i16x8_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3853 void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
3854 LiftoffRegister lhs,
3855 LiftoffRegister rhs) {
3856 liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgw, &Assembler::pavgw>(
3857 this, dst, lhs, rhs);
3858 }
3859
emit_i8x16_abs(LiftoffRegister dst,LiftoffRegister src)3860 void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
3861 LiftoffRegister src) {
3862 Pabsb(dst.fp(), src.fp());
3863 }
3864
emit_i16x8_abs(LiftoffRegister dst,LiftoffRegister src)3865 void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
3866 LiftoffRegister src) {
3867 Pabsw(dst.fp(), src.fp());
3868 }
3869
emit_i32x4_abs(LiftoffRegister dst,LiftoffRegister src)3870 void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
3871 LiftoffRegister src) {
3872 Pabsd(dst.fp(), src.fp());
3873 }
3874
emit_i64x2_abs(LiftoffRegister dst,LiftoffRegister src)3875 void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
3876 LiftoffRegister src) {
3877 I64x2Abs(dst.fp(), src.fp(), kScratchDoubleReg);
3878 }
3879
emit_i8x16_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3880 void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
3881 LiftoffRegister lhs,
3882 uint8_t imm_lane_idx) {
3883 Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
3884 movsxbl(dst.gp(), dst.gp());
3885 }
3886
emit_i8x16_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3887 void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
3888 LiftoffRegister lhs,
3889 uint8_t imm_lane_idx) {
3890 Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
3891 }
3892
emit_i16x8_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3893 void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
3894 LiftoffRegister lhs,
3895 uint8_t imm_lane_idx) {
3896 Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
3897 movsxwl(dst.gp(), dst.gp());
3898 }
3899
emit_i16x8_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3900 void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
3901 LiftoffRegister lhs,
3902 uint8_t imm_lane_idx) {
3903 Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
3904 }
3905
emit_i32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3906 void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
3907 LiftoffRegister lhs,
3908 uint8_t imm_lane_idx) {
3909 Pextrd(dst.gp(), lhs.fp(), imm_lane_idx);
3910 }
3911
emit_i64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3912 void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
3913 LiftoffRegister lhs,
3914 uint8_t imm_lane_idx) {
3915 Pextrq(dst.gp(), lhs.fp(), static_cast<int8_t>(imm_lane_idx));
3916 }
3917
emit_f32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3918 void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
3919 LiftoffRegister lhs,
3920 uint8_t imm_lane_idx) {
3921 F32x4ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
3922 }
3923
emit_f64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3924 void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
3925 LiftoffRegister lhs,
3926 uint8_t imm_lane_idx) {
3927 F64x2ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
3928 }
3929
emit_i8x16_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3930 void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
3931 LiftoffRegister src1,
3932 LiftoffRegister src2,
3933 uint8_t imm_lane_idx) {
3934 if (CpuFeatures::IsSupported(AVX)) {
3935 CpuFeatureScope scope(this, AVX);
3936 vpinsrb(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
3937 } else {
3938 CpuFeatureScope scope(this, SSE4_1);
3939 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3940 pinsrb(dst.fp(), src2.gp(), imm_lane_idx);
3941 }
3942 }
3943
emit_i16x8_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3944 void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
3945 LiftoffRegister src1,
3946 LiftoffRegister src2,
3947 uint8_t imm_lane_idx) {
3948 if (CpuFeatures::IsSupported(AVX)) {
3949 CpuFeatureScope scope(this, AVX);
3950 vpinsrw(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
3951 } else {
3952 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3953 pinsrw(dst.fp(), src2.gp(), imm_lane_idx);
3954 }
3955 }
3956
emit_i32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3957 void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
3958 LiftoffRegister src1,
3959 LiftoffRegister src2,
3960 uint8_t imm_lane_idx) {
3961 if (CpuFeatures::IsSupported(AVX)) {
3962 CpuFeatureScope scope(this, AVX);
3963 vpinsrd(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
3964 } else {
3965 CpuFeatureScope scope(this, SSE4_1);
3966 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3967 pinsrd(dst.fp(), src2.gp(), imm_lane_idx);
3968 }
3969 }
3970
emit_i64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3971 void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
3972 LiftoffRegister src1,
3973 LiftoffRegister src2,
3974 uint8_t imm_lane_idx) {
3975 if (CpuFeatures::IsSupported(AVX)) {
3976 CpuFeatureScope scope(this, AVX);
3977 vpinsrq(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
3978 } else {
3979 CpuFeatureScope scope(this, SSE4_1);
3980 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3981 pinsrq(dst.fp(), src2.gp(), imm_lane_idx);
3982 }
3983 }
3984
emit_f32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3985 void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
3986 LiftoffRegister src1,
3987 LiftoffRegister src2,
3988 uint8_t imm_lane_idx) {
3989 if (CpuFeatures::IsSupported(AVX)) {
3990 CpuFeatureScope scope(this, AVX);
3991 vinsertps(dst.fp(), src1.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
3992 } else {
3993 CpuFeatureScope scope(this, SSE4_1);
3994 if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3995 insertps(dst.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
3996 }
3997 }
3998
emit_f64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3999 void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
4000 LiftoffRegister src1,
4001 LiftoffRegister src2,
4002 uint8_t imm_lane_idx) {
4003 F64x2ReplaceLane(dst.fp(), src1.fp(), src2.fp(), imm_lane_idx);
4004 }
4005
StackCheck(Label * ool_code,Register limit_address)4006 void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
4007 cmpq(rsp, Operand(limit_address, 0));
4008 j(below_equal, ool_code);
4009 }
4010
CallTrapCallbackForTesting()4011 void LiftoffAssembler::CallTrapCallbackForTesting() {
4012 PrepareCallCFunction(0);
4013 CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
4014 }
4015
AssertUnreachable(AbortReason reason)4016 void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
4017 TurboAssembler::AssertUnreachable(reason);
4018 }
4019
PushRegisters(LiftoffRegList regs)4020 void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
4021 LiftoffRegList gp_regs = regs & kGpCacheRegList;
4022 while (!gp_regs.is_empty()) {
4023 LiftoffRegister reg = gp_regs.GetFirstRegSet();
4024 pushq(reg.gp());
4025 gp_regs.clear(reg);
4026 }
4027 LiftoffRegList fp_regs = regs & kFpCacheRegList;
4028 unsigned num_fp_regs = fp_regs.GetNumRegsSet();
4029 if (num_fp_regs) {
4030 AllocateStackSpace(num_fp_regs * kSimd128Size);
4031 unsigned offset = 0;
4032 while (!fp_regs.is_empty()) {
4033 LiftoffRegister reg = fp_regs.GetFirstRegSet();
4034 Movdqu(Operand(rsp, offset), reg.fp());
4035 fp_regs.clear(reg);
4036 offset += kSimd128Size;
4037 }
4038 DCHECK_EQ(offset, num_fp_regs * kSimd128Size);
4039 }
4040 }
4041
PopRegisters(LiftoffRegList regs)4042 void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
4043 LiftoffRegList fp_regs = regs & kFpCacheRegList;
4044 unsigned fp_offset = 0;
4045 while (!fp_regs.is_empty()) {
4046 LiftoffRegister reg = fp_regs.GetFirstRegSet();
4047 Movdqu(reg.fp(), Operand(rsp, fp_offset));
4048 fp_regs.clear(reg);
4049 fp_offset += kSimd128Size;
4050 }
4051 if (fp_offset) addq(rsp, Immediate(fp_offset));
4052 LiftoffRegList gp_regs = regs & kGpCacheRegList;
4053 while (!gp_regs.is_empty()) {
4054 LiftoffRegister reg = gp_regs.GetLastRegSet();
4055 popq(reg.gp());
4056 gp_regs.clear(reg);
4057 }
4058 }
4059
RecordSpillsInSafepoint(SafepointTableBuilder::Safepoint & safepoint,LiftoffRegList all_spills,LiftoffRegList ref_spills,int spill_offset)4060 void LiftoffAssembler::RecordSpillsInSafepoint(
4061 SafepointTableBuilder::Safepoint& safepoint, LiftoffRegList all_spills,
4062 LiftoffRegList ref_spills, int spill_offset) {
4063 int spill_space_size = 0;
4064 while (!all_spills.is_empty()) {
4065 LiftoffRegister reg = all_spills.GetFirstRegSet();
4066 if (ref_spills.has(reg)) {
4067 safepoint.DefineTaggedStackSlot(spill_offset);
4068 }
4069 all_spills.clear(reg);
4070 ++spill_offset;
4071 spill_space_size += kSystemPointerSize;
4072 }
4073 // Record the number of additional spill slots.
4074 RecordOolSpillSpaceSize(spill_space_size);
4075 }
4076
DropStackSlotsAndRet(uint32_t num_stack_slots)4077 void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
4078 DCHECK_LT(num_stack_slots,
4079 (1 << 16) / kSystemPointerSize); // 16 bit immediate
4080 ret(static_cast<int>(num_stack_slots * kSystemPointerSize));
4081 }
4082
CallC(const ValueKindSig * sig,const LiftoffRegister * args,const LiftoffRegister * rets,ValueKind out_argument_kind,int stack_bytes,ExternalReference ext_ref)4083 void LiftoffAssembler::CallC(const ValueKindSig* sig,
4084 const LiftoffRegister* args,
4085 const LiftoffRegister* rets,
4086 ValueKind out_argument_kind, int stack_bytes,
4087 ExternalReference ext_ref) {
4088 AllocateStackSpace(stack_bytes);
4089
4090 int arg_bytes = 0;
4091 for (ValueKind param_kind : sig->parameters()) {
4092 liftoff::Store(this, Operand(rsp, arg_bytes), *args++, param_kind);
4093 arg_bytes += value_kind_size(param_kind);
4094 }
4095 DCHECK_LE(arg_bytes, stack_bytes);
4096
4097 // Pass a pointer to the buffer with the arguments to the C function.
4098 movq(arg_reg_1, rsp);
4099
4100 constexpr int kNumCCallArgs = 1;
4101
4102 // Now call the C function.
4103 PrepareCallCFunction(kNumCCallArgs);
4104 CallCFunction(ext_ref, kNumCCallArgs);
4105
4106 // Move return value to the right register.
4107 const LiftoffRegister* next_result_reg = rets;
4108 if (sig->return_count() > 0) {
4109 DCHECK_EQ(1, sig->return_count());
4110 constexpr Register kReturnReg = rax;
4111 if (kReturnReg != next_result_reg->gp()) {
4112 Move(*next_result_reg, LiftoffRegister(kReturnReg), sig->GetReturn(0));
4113 }
4114 ++next_result_reg;
4115 }
4116
4117 // Load potential output value from the buffer on the stack.
4118 if (out_argument_kind != kVoid) {
4119 liftoff::Load(this, *next_result_reg, Operand(rsp, 0), out_argument_kind);
4120 }
4121
4122 addq(rsp, Immediate(stack_bytes));
4123 }
4124
CallNativeWasmCode(Address addr)4125 void LiftoffAssembler::CallNativeWasmCode(Address addr) {
4126 near_call(addr, RelocInfo::WASM_CALL);
4127 }
4128
TailCallNativeWasmCode(Address addr)4129 void LiftoffAssembler::TailCallNativeWasmCode(Address addr) {
4130 near_jmp(addr, RelocInfo::WASM_CALL);
4131 }
4132
CallIndirect(const ValueKindSig * sig,compiler::CallDescriptor * call_descriptor,Register target)4133 void LiftoffAssembler::CallIndirect(const ValueKindSig* sig,
4134 compiler::CallDescriptor* call_descriptor,
4135 Register target) {
4136 if (target == no_reg) {
4137 popq(kScratchRegister);
4138 target = kScratchRegister;
4139 }
4140 call(target);
4141 }
4142
TailCallIndirect(Register target)4143 void LiftoffAssembler::TailCallIndirect(Register target) {
4144 if (target == no_reg) {
4145 popq(kScratchRegister);
4146 target = kScratchRegister;
4147 }
4148 jmp(target);
4149 }
4150
CallRuntimeStub(WasmCode::RuntimeStubId sid)4151 void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) {
4152 // A direct call to a wasm runtime stub defined in this module.
4153 // Just encode the stub index. This will be patched at relocation.
4154 near_call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL);
4155 }
4156
AllocateStackSlot(Register addr,uint32_t size)4157 void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
4158 AllocateStackSpace(size);
4159 movq(addr, rsp);
4160 }
4161
DeallocateStackSlot(uint32_t size)4162 void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
4163 addq(rsp, Immediate(size));
4164 }
4165
MaybeOSR()4166 void LiftoffAssembler::MaybeOSR() {
4167 cmpq(liftoff::GetOSRTargetSlot(), Immediate(0));
4168 j(not_equal, static_cast<Address>(WasmCode::kWasmOnStackReplace),
4169 RelocInfo::WASM_STUB_CALL);
4170 }
4171
emit_set_if_nan(Register dst,DoubleRegister src,ValueKind kind)4172 void LiftoffAssembler::emit_set_if_nan(Register dst, DoubleRegister src,
4173 ValueKind kind) {
4174 if (kind == kF32) {
4175 Ucomiss(src, src);
4176 } else {
4177 DCHECK_EQ(kind, kF64);
4178 Ucomisd(src, src);
4179 }
4180 Label ret;
4181 j(parity_odd, &ret);
4182 movl(Operand(dst, 0), Immediate(1));
4183 bind(&ret);
4184 }
4185
emit_s128_set_if_nan(Register dst,LiftoffRegister src,Register tmp_gp,LiftoffRegister tmp_s128,ValueKind lane_kind)4186 void LiftoffAssembler::emit_s128_set_if_nan(Register dst, LiftoffRegister src,
4187 Register tmp_gp,
4188 LiftoffRegister tmp_s128,
4189 ValueKind lane_kind) {
4190 if (lane_kind == kF32) {
4191 movaps(tmp_s128.fp(), src.fp());
4192 cmpunordps(tmp_s128.fp(), tmp_s128.fp());
4193 } else {
4194 DCHECK_EQ(lane_kind, kF64);
4195 movapd(tmp_s128.fp(), src.fp());
4196 cmpunordpd(tmp_s128.fp(), tmp_s128.fp());
4197 }
4198 pmovmskb(tmp_gp, tmp_s128.fp());
4199 orl(Operand(dst, 0), tmp_gp);
4200 }
4201
Construct(int param_slots)4202 void LiftoffStackSlots::Construct(int param_slots) {
4203 DCHECK_LT(0, slots_.size());
4204 SortInPushOrder();
4205 int last_stack_slot = param_slots;
4206 for (auto& slot : slots_) {
4207 const int stack_slot = slot.dst_slot_;
4208 int stack_decrement = (last_stack_slot - stack_slot) * kSystemPointerSize;
4209 last_stack_slot = stack_slot;
4210 const LiftoffAssembler::VarState& src = slot.src_;
4211 DCHECK_LT(0, stack_decrement);
4212 switch (src.loc()) {
4213 case LiftoffAssembler::VarState::kStack:
4214 if (src.kind() == kI32) {
4215 asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4216 // Load i32 values to a register first to ensure they are zero
4217 // extended.
4218 asm_->movl(kScratchRegister, liftoff::GetStackSlot(slot.src_offset_));
4219 asm_->pushq(kScratchRegister);
4220 } else if (src.kind() == kS128) {
4221 asm_->AllocateStackSpace(stack_decrement - kSimd128Size);
4222 // Since offsets are subtracted from sp, we need a smaller offset to
4223 // push the top of a s128 value.
4224 asm_->pushq(liftoff::GetStackSlot(slot.src_offset_ - 8));
4225 asm_->pushq(liftoff::GetStackSlot(slot.src_offset_));
4226 } else {
4227 asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4228 // For all other types, just push the whole (8-byte) stack slot.
4229 // This is also ok for f32 values (even though we copy 4 uninitialized
4230 // bytes), because f32 and f64 values are clearly distinguished in
4231 // Turbofan, so the uninitialized bytes are never accessed.
4232 asm_->pushq(liftoff::GetStackSlot(slot.src_offset_));
4233 }
4234 break;
4235 case LiftoffAssembler::VarState::kRegister: {
4236 int pushed = src.kind() == kS128 ? kSimd128Size : kSystemPointerSize;
4237 liftoff::push(asm_, src.reg(), src.kind(), stack_decrement - pushed);
4238 break;
4239 }
4240 case LiftoffAssembler::VarState::kIntConst:
4241 asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4242 asm_->pushq(Immediate(src.i32_const()));
4243 break;
4244 }
4245 }
4246 }
4247
4248 #undef RETURN_FALSE_IF_MISSING_CPU_FEATURE
4249
4250 } // namespace wasm
4251 } // namespace internal
4252 } // namespace v8
4253
4254 #endif // V8_WASM_BASELINE_X64_LIFTOFF_ASSEMBLER_X64_H_
4255