• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_WASM_BASELINE_X64_LIFTOFF_ASSEMBLER_X64_H_
6 #define V8_WASM_BASELINE_X64_LIFTOFF_ASSEMBLER_X64_H_
7 
8 #include "src/base/platform/wrappers.h"
9 #include "src/codegen/assembler.h"
10 #include "src/codegen/cpu-features.h"
11 #include "src/codegen/machine-type.h"
12 #include "src/codegen/x64/register-x64.h"
13 #include "src/heap/memory-chunk.h"
14 #include "src/wasm/baseline/liftoff-assembler.h"
15 #include "src/wasm/simd-shuffle.h"
16 #include "src/wasm/wasm-objects.h"
17 
18 namespace v8 {
19 namespace internal {
20 namespace wasm {
21 
22 #define RETURN_FALSE_IF_MISSING_CPU_FEATURE(name)    \
23   if (!CpuFeatures::IsSupported(name)) return false; \
24   CpuFeatureScope feature(this, name);
25 
26 namespace liftoff {
27 
ToCondition(LiftoffCondition liftoff_cond)28 inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
29   switch (liftoff_cond) {
30     case kEqual:
31       return equal;
32     case kUnequal:
33       return not_equal;
34     case kSignedLessThan:
35       return less;
36     case kSignedLessEqual:
37       return less_equal;
38     case kSignedGreaterThan:
39       return greater;
40     case kSignedGreaterEqual:
41       return greater_equal;
42     case kUnsignedLessThan:
43       return below;
44     case kUnsignedLessEqual:
45       return below_equal;
46     case kUnsignedGreaterThan:
47       return above;
48     case kUnsignedGreaterEqual:
49       return above_equal;
50   }
51 }
52 
53 constexpr Register kScratchRegister2 = r11;
54 static_assert(kScratchRegister != kScratchRegister2, "collision");
55 static_assert((kLiftoffAssemblerGpCacheRegs &
56                RegList{kScratchRegister, kScratchRegister2})
57                   .is_empty(),
58               "scratch registers must not be used as cache registers");
59 
60 constexpr DoubleRegister kScratchDoubleReg2 = xmm14;
61 static_assert(kScratchDoubleReg != kScratchDoubleReg2, "collision");
62 static_assert((kLiftoffAssemblerFpCacheRegs &
63                DoubleRegList{kScratchDoubleReg, kScratchDoubleReg2})
64                   .is_empty(),
65               "scratch registers must not be used as cache registers");
66 
67 // rbp-8 holds the stack marker, rbp-16 is the instance parameter.
68 constexpr int kInstanceOffset = 16;
69 constexpr int kFeedbackVectorOffset = 24;  // rbp-24 is the feedback vector.
70 constexpr int kTierupBudgetOffset = 32;    // rbp-32 is the feedback vector.
71 
GetStackSlot(int offset)72 inline Operand GetStackSlot(int offset) { return Operand(rbp, -offset); }
73 
74 // TODO(clemensb): Make this a constexpr variable once Operand is constexpr.
GetInstanceOperand()75 inline Operand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
76 
GetOSRTargetSlot()77 inline Operand GetOSRTargetSlot() { return GetStackSlot(kOSRTargetOffset); }
78 
GetMemOp(LiftoffAssembler * assm,Register addr,Register offset,uintptr_t offset_imm)79 inline Operand GetMemOp(LiftoffAssembler* assm, Register addr, Register offset,
80                         uintptr_t offset_imm) {
81   if (is_uint31(offset_imm)) {
82     int32_t offset_imm32 = static_cast<int32_t>(offset_imm);
83     return offset == no_reg ? Operand(addr, offset_imm32)
84                             : Operand(addr, offset, times_1, offset_imm32);
85   }
86   // Offset immediate does not fit in 31 bits.
87   Register scratch = kScratchRegister;
88   assm->TurboAssembler::Move(scratch, offset_imm);
89   if (offset != no_reg) assm->addq(scratch, offset);
90   return Operand(addr, scratch, times_1, 0);
91 }
92 
Load(LiftoffAssembler * assm,LiftoffRegister dst,Operand src,ValueKind kind)93 inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, Operand src,
94                  ValueKind kind) {
95   switch (kind) {
96     case kI32:
97       assm->movl(dst.gp(), src);
98       break;
99     case kI64:
100     case kOptRef:
101     case kRef:
102     case kRtt:
103       assm->movq(dst.gp(), src);
104       break;
105     case kF32:
106       assm->Movss(dst.fp(), src);
107       break;
108     case kF64:
109       assm->Movsd(dst.fp(), src);
110       break;
111     case kS128:
112       assm->Movdqu(dst.fp(), src);
113       break;
114     default:
115       UNREACHABLE();
116   }
117 }
118 
Store(LiftoffAssembler * assm,Operand dst,LiftoffRegister src,ValueKind kind)119 inline void Store(LiftoffAssembler* assm, Operand dst, LiftoffRegister src,
120                   ValueKind kind) {
121   switch (kind) {
122     case kI32:
123       assm->movl(dst, src.gp());
124       break;
125     case kI64:
126       assm->movq(dst, src.gp());
127       break;
128     case kOptRef:
129     case kRef:
130     case kRtt:
131       assm->StoreTaggedField(dst, src.gp());
132       break;
133     case kF32:
134       assm->Movss(dst, src.fp());
135       break;
136     case kF64:
137       assm->Movsd(dst, src.fp());
138       break;
139     case kS128:
140       assm->Movdqu(dst, src.fp());
141       break;
142     default:
143       UNREACHABLE();
144   }
145 }
146 
147 inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueKind kind,
148                  int padding = 0) {
149   switch (kind) {
150     case kI32:
151     case kI64:
152     case kRef:
153     case kOptRef:
154       assm->AllocateStackSpace(padding);
155       assm->pushq(reg.gp());
156       break;
157     case kF32:
158       assm->AllocateStackSpace(kSystemPointerSize + padding);
159       assm->Movss(Operand(rsp, 0), reg.fp());
160       break;
161     case kF64:
162       assm->AllocateStackSpace(kSystemPointerSize + padding);
163       assm->Movsd(Operand(rsp, 0), reg.fp());
164       break;
165     case kS128:
166       assm->AllocateStackSpace(kSystemPointerSize * 2 + padding);
167       assm->Movdqu(Operand(rsp, 0), reg.fp());
168       break;
169     default:
170       UNREACHABLE();
171   }
172 }
173 
174 constexpr int kSubSpSize = 7;  // 7 bytes for "subq rsp, <imm32>"
175 
176 }  // namespace liftoff
177 
PrepareStackFrame()178 int LiftoffAssembler::PrepareStackFrame() {
179   int offset = pc_offset();
180   // Next we reserve the memory for the whole stack frame. We do not know yet
181   // how big the stack frame will be so we just emit a placeholder instruction.
182   // PatchPrepareStackFrame will patch this in order to increase the stack
183   // appropriately.
184   sub_sp_32(0);
185   DCHECK_EQ(liftoff::kSubSpSize, pc_offset() - offset);
186   return offset;
187 }
188 
PrepareTailCall(int num_callee_stack_params,int stack_param_delta)189 void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params,
190                                        int stack_param_delta) {
191   // Push the return address and frame pointer to complete the stack frame.
192   pushq(Operand(rbp, 8));
193   pushq(Operand(rbp, 0));
194 
195   // Shift the whole frame upwards.
196   const int slot_count = num_callee_stack_params + 2;
197   for (int i = slot_count - 1; i >= 0; --i) {
198     movq(kScratchRegister, Operand(rsp, i * 8));
199     movq(Operand(rbp, (i - stack_param_delta) * 8), kScratchRegister);
200   }
201 
202   // Set the new stack and frame pointer.
203   leaq(rsp, Operand(rbp, -stack_param_delta * 8));
204   popq(rbp);
205 }
206 
AlignFrameSize()207 void LiftoffAssembler::AlignFrameSize() {
208   max_used_spill_offset_ = RoundUp(max_used_spill_offset_, kSystemPointerSize);
209 }
210 
PatchPrepareStackFrame(int offset,SafepointTableBuilder * safepoint_table_builder)211 void LiftoffAssembler::PatchPrepareStackFrame(
212     int offset, SafepointTableBuilder* safepoint_table_builder) {
213   // The frame_size includes the frame marker and the instance slot. Both are
214   // pushed as part of frame construction, so we don't need to allocate memory
215   // for them anymore.
216   int frame_size = GetTotalFrameSize() - 2 * kSystemPointerSize;
217   DCHECK_EQ(0, frame_size % kSystemPointerSize);
218 
219   // We can't run out of space when patching, just pass anything big enough to
220   // not cause the assembler to try to grow the buffer.
221   constexpr int kAvailableSpace = 64;
222   Assembler patching_assembler(
223       AssemblerOptions{},
224       ExternalAssemblerBuffer(buffer_start_ + offset, kAvailableSpace));
225 
226   if (V8_LIKELY(frame_size < 4 * KB)) {
227     // This is the standard case for small frames: just subtract from SP and be
228     // done with it.
229     patching_assembler.sub_sp_32(frame_size);
230     DCHECK_EQ(liftoff::kSubSpSize, patching_assembler.pc_offset());
231     return;
232   }
233 
234   // The frame size is bigger than 4KB, so we might overflow the available stack
235   // space if we first allocate the frame and then do the stack check (we will
236   // need some remaining stack space for throwing the exception). That's why we
237   // check the available stack space before we allocate the frame. To do this we
238   // replace the {__ sub(sp, framesize)} with a jump to OOL code that does this
239   // "extended stack check".
240   //
241   // The OOL code can simply be generated here with the normal assembler,
242   // because all other code generation, including OOL code, has already finished
243   // when {PatchPrepareStackFrame} is called. The function prologue then jumps
244   // to the current {pc_offset()} to execute the OOL code for allocating the
245   // large frame.
246 
247   // Emit the unconditional branch in the function prologue (from {offset} to
248   // {pc_offset()}).
249   patching_assembler.jmp_rel(pc_offset() - offset);
250   DCHECK_GE(liftoff::kSubSpSize, patching_assembler.pc_offset());
251   patching_assembler.Nop(liftoff::kSubSpSize - patching_assembler.pc_offset());
252 
253   // If the frame is bigger than the stack, we throw the stack overflow
254   // exception unconditionally. Thereby we can avoid the integer overflow
255   // check in the condition code.
256   RecordComment("OOL: stack check for large frame");
257   Label continuation;
258   if (frame_size < FLAG_stack_size * 1024) {
259     movq(kScratchRegister,
260          FieldOperand(kWasmInstanceRegister,
261                       WasmInstanceObject::kRealStackLimitAddressOffset));
262     movq(kScratchRegister, Operand(kScratchRegister, 0));
263     addq(kScratchRegister, Immediate(frame_size));
264     cmpq(rsp, kScratchRegister);
265     j(above_equal, &continuation, Label::kNear);
266   }
267 
268   near_call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
269   // The call will not return; just define an empty safepoint.
270   safepoint_table_builder->DefineSafepoint(this);
271   AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
272 
273   bind(&continuation);
274 
275   // Now allocate the stack space. Note that this might do more than just
276   // decrementing the SP; consult {TurboAssembler::AllocateStackSpace}.
277   AllocateStackSpace(frame_size);
278 
279   // Jump back to the start of the function, from {pc_offset()} to
280   // right after the reserved space for the {__ sub(sp, sp, framesize)} (which
281   // is a branch now).
282   int func_start_offset = offset + liftoff::kSubSpSize;
283   jmp_rel(func_start_offset - pc_offset());
284 }
285 
FinishCode()286 void LiftoffAssembler::FinishCode() {}
287 
AbortCompilation()288 void LiftoffAssembler::AbortCompilation() {}
289 
290 // static
StaticStackFrameSize()291 constexpr int LiftoffAssembler::StaticStackFrameSize() {
292   return kOSRTargetOffset;
293 }
294 
SlotSizeForType(ValueKind kind)295 int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
296   return value_kind_full_size(kind);
297 }
298 
NeedsAlignment(ValueKind kind)299 bool LiftoffAssembler::NeedsAlignment(ValueKind kind) {
300   return is_reference(kind);
301 }
302 
LoadConstant(LiftoffRegister reg,WasmValue value,RelocInfo::Mode rmode)303 void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
304                                     RelocInfo::Mode rmode) {
305   switch (value.type().kind()) {
306     case kI32:
307       if (value.to_i32() == 0 && RelocInfo::IsNoInfo(rmode)) {
308         xorl(reg.gp(), reg.gp());
309       } else {
310         movl(reg.gp(), Immediate(value.to_i32(), rmode));
311       }
312       break;
313     case kI64:
314       if (RelocInfo::IsNoInfo(rmode)) {
315         TurboAssembler::Move(reg.gp(), value.to_i64());
316       } else {
317         movq(reg.gp(), Immediate64(value.to_i64(), rmode));
318       }
319       break;
320     case kF32:
321       TurboAssembler::Move(reg.fp(), value.to_f32_boxed().get_bits());
322       break;
323     case kF64:
324       TurboAssembler::Move(reg.fp(), value.to_f64_boxed().get_bits());
325       break;
326     default:
327       UNREACHABLE();
328   }
329 }
330 
LoadInstanceFromFrame(Register dst)331 void LiftoffAssembler::LoadInstanceFromFrame(Register dst) {
332   movq(dst, liftoff::GetInstanceOperand());
333 }
334 
LoadFromInstance(Register dst,Register instance,int offset,int size)335 void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
336                                         int offset, int size) {
337   DCHECK_LE(0, offset);
338   Operand src{instance, offset};
339   switch (size) {
340     case 1:
341       movzxbl(dst, src);
342       break;
343     case 4:
344       movl(dst, src);
345       break;
346     case 8:
347       movq(dst, src);
348       break;
349     default:
350       UNIMPLEMENTED();
351   }
352 }
353 
LoadTaggedPointerFromInstance(Register dst,Register instance,int offset)354 void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
355                                                      Register instance,
356                                                      int offset) {
357   DCHECK_LE(0, offset);
358   LoadTaggedPointerField(dst, Operand(instance, offset));
359 }
360 
LoadExternalPointer(Register dst,Register instance,int offset,ExternalPointerTag tag,Register isolate_root)361 void LiftoffAssembler::LoadExternalPointer(Register dst, Register instance,
362                                            int offset, ExternalPointerTag tag,
363                                            Register isolate_root) {
364   LoadExternalPointerField(dst, FieldOperand(instance, offset), tag,
365                            isolate_root,
366                            IsolateRootLocation::kInScratchRegister);
367 }
368 
SpillInstance(Register instance)369 void LiftoffAssembler::SpillInstance(Register instance) {
370   movq(liftoff::GetInstanceOperand(), instance);
371 }
372 
ResetOSRTarget()373 void LiftoffAssembler::ResetOSRTarget() {
374   movq(liftoff::GetOSRTargetSlot(), Immediate(0));
375 }
376 
LoadTaggedPointer(Register dst,Register src_addr,Register offset_reg,int32_t offset_imm,LiftoffRegList pinned)377 void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
378                                          Register offset_reg,
379                                          int32_t offset_imm,
380                                          LiftoffRegList pinned) {
381   DCHECK_GE(offset_imm, 0);
382   if (FLAG_debug_code && offset_reg != no_reg) {
383     AssertZeroExtended(offset_reg);
384   }
385   Operand src_op = liftoff::GetMemOp(this, src_addr, offset_reg,
386                                      static_cast<uint32_t>(offset_imm));
387   LoadTaggedPointerField(dst, src_op);
388 }
389 
LoadFullPointer(Register dst,Register src_addr,int32_t offset_imm)390 void LiftoffAssembler::LoadFullPointer(Register dst, Register src_addr,
391                                        int32_t offset_imm) {
392   Operand src_op = liftoff::GetMemOp(this, src_addr, no_reg,
393                                      static_cast<uint32_t>(offset_imm));
394   movq(dst, src_op);
395 }
396 
StoreTaggedPointer(Register dst_addr,Register offset_reg,int32_t offset_imm,LiftoffRegister src,LiftoffRegList pinned,SkipWriteBarrier skip_write_barrier)397 void LiftoffAssembler::StoreTaggedPointer(Register dst_addr,
398                                           Register offset_reg,
399                                           int32_t offset_imm,
400                                           LiftoffRegister src,
401                                           LiftoffRegList pinned,
402                                           SkipWriteBarrier skip_write_barrier) {
403   DCHECK_GE(offset_imm, 0);
404   Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg,
405                                      static_cast<uint32_t>(offset_imm));
406   StoreTaggedField(dst_op, src.gp());
407 
408   if (skip_write_barrier || FLAG_disable_write_barriers) return;
409 
410   Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
411   Label write_barrier;
412   Label exit;
413   CheckPageFlag(dst_addr, scratch,
414                 MemoryChunk::kPointersFromHereAreInterestingMask, not_zero,
415                 &write_barrier, Label::kNear);
416   jmp(&exit, Label::kNear);
417   bind(&write_barrier);
418   JumpIfSmi(src.gp(), &exit, Label::kNear);
419   if (COMPRESS_POINTERS_BOOL) {
420     DecompressTaggedPointer(src.gp(), src.gp());
421   }
422   CheckPageFlag(src.gp(), scratch,
423                 MemoryChunk::kPointersToHereAreInterestingMask, zero, &exit,
424                 Label::kNear);
425   leaq(scratch, dst_op);
426 
427   CallRecordWriteStubSaveRegisters(
428       dst_addr, scratch, RememberedSetAction::kEmit, SaveFPRegsMode::kSave,
429       StubCallMode::kCallWasmRuntimeStub);
430   bind(&exit);
431 }
432 
AtomicLoad(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LiftoffRegList pinned)433 void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
434                                   Register offset_reg, uintptr_t offset_imm,
435                                   LoadType type, LiftoffRegList pinned) {
436   Load(dst, src_addr, offset_reg, offset_imm, type, pinned, nullptr, true);
437 }
438 
Load(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LiftoffRegList pinned,uint32_t * protected_load_pc,bool is_load_mem,bool i64_offset)439 void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
440                             Register offset_reg, uintptr_t offset_imm,
441                             LoadType type, LiftoffRegList pinned,
442                             uint32_t* protected_load_pc, bool is_load_mem,
443                             bool i64_offset) {
444   if (offset_reg != no_reg && !i64_offset) {
445     AssertZeroExtended(offset_reg);
446   }
447   Operand src_op = liftoff::GetMemOp(this, src_addr, offset_reg, offset_imm);
448   if (protected_load_pc) *protected_load_pc = pc_offset();
449   switch (type.value()) {
450     case LoadType::kI32Load8U:
451     case LoadType::kI64Load8U:
452       movzxbl(dst.gp(), src_op);
453       break;
454     case LoadType::kI32Load8S:
455       movsxbl(dst.gp(), src_op);
456       break;
457     case LoadType::kI64Load8S:
458       movsxbq(dst.gp(), src_op);
459       break;
460     case LoadType::kI32Load16U:
461     case LoadType::kI64Load16U:
462       movzxwl(dst.gp(), src_op);
463       break;
464     case LoadType::kI32Load16S:
465       movsxwl(dst.gp(), src_op);
466       break;
467     case LoadType::kI64Load16S:
468       movsxwq(dst.gp(), src_op);
469       break;
470     case LoadType::kI32Load:
471     case LoadType::kI64Load32U:
472       movl(dst.gp(), src_op);
473       break;
474     case LoadType::kI64Load32S:
475       movsxlq(dst.gp(), src_op);
476       break;
477     case LoadType::kI64Load:
478       movq(dst.gp(), src_op);
479       break;
480     case LoadType::kF32Load:
481       Movss(dst.fp(), src_op);
482       break;
483     case LoadType::kF64Load:
484       Movsd(dst.fp(), src_op);
485       break;
486     case LoadType::kS128Load:
487       Movdqu(dst.fp(), src_op);
488       break;
489   }
490 }
491 
Store(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList,uint32_t * protected_store_pc,bool is_store_mem)492 void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
493                              uintptr_t offset_imm, LiftoffRegister src,
494                              StoreType type, LiftoffRegList /* pinned */,
495                              uint32_t* protected_store_pc, bool is_store_mem) {
496   Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
497   if (protected_store_pc) *protected_store_pc = pc_offset();
498   switch (type.value()) {
499     case StoreType::kI32Store8:
500     case StoreType::kI64Store8:
501       movb(dst_op, src.gp());
502       break;
503     case StoreType::kI32Store16:
504     case StoreType::kI64Store16:
505       movw(dst_op, src.gp());
506       break;
507     case StoreType::kI32Store:
508     case StoreType::kI64Store32:
509       movl(dst_op, src.gp());
510       break;
511     case StoreType::kI64Store:
512       movq(dst_op, src.gp());
513       break;
514     case StoreType::kF32Store:
515       Movss(dst_op, src.fp());
516       break;
517     case StoreType::kF64Store:
518       Movsd(dst_op, src.fp());
519       break;
520     case StoreType::kS128Store:
521       Movdqu(dst_op, src.fp());
522       break;
523   }
524 }
525 
AtomicStore(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList pinned)526 void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
527                                    uintptr_t offset_imm, LiftoffRegister src,
528                                    StoreType type, LiftoffRegList pinned) {
529   Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
530   Register src_reg = src.gp();
531   if (cache_state()->is_used(src)) {
532     movq(kScratchRegister, src_reg);
533     src_reg = kScratchRegister;
534   }
535   switch (type.value()) {
536     case StoreType::kI32Store8:
537     case StoreType::kI64Store8:
538       xchgb(src_reg, dst_op);
539       break;
540     case StoreType::kI32Store16:
541     case StoreType::kI64Store16:
542       xchgw(src_reg, dst_op);
543       break;
544     case StoreType::kI32Store:
545     case StoreType::kI64Store32:
546       xchgl(src_reg, dst_op);
547       break;
548     case StoreType::kI64Store:
549       xchgq(src_reg, dst_op);
550       break;
551     default:
552       UNREACHABLE();
553   }
554 }
555 
AtomicAdd(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)556 void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
557                                  uintptr_t offset_imm, LiftoffRegister value,
558                                  LiftoffRegister result, StoreType type) {
559   DCHECK(!cache_state()->is_used(result));
560   if (cache_state()->is_used(value)) {
561     // We cannot overwrite {value}, but the {value} register is changed in the
562     // code we generate. Therefore we copy {value} to {result} and use the
563     // {result} register in the code below.
564     movq(result.gp(), value.gp());
565     value = result;
566   }
567   Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
568   lock();
569   switch (type.value()) {
570     case StoreType::kI32Store8:
571     case StoreType::kI64Store8:
572       xaddb(dst_op, value.gp());
573       movzxbq(result.gp(), value.gp());
574       break;
575     case StoreType::kI32Store16:
576     case StoreType::kI64Store16:
577       xaddw(dst_op, value.gp());
578       movzxwq(result.gp(), value.gp());
579       break;
580     case StoreType::kI32Store:
581     case StoreType::kI64Store32:
582       xaddl(dst_op, value.gp());
583       if (value != result) {
584         movq(result.gp(), value.gp());
585       }
586       break;
587     case StoreType::kI64Store:
588       xaddq(dst_op, value.gp());
589       if (value != result) {
590         movq(result.gp(), value.gp());
591       }
592       break;
593     default:
594       UNREACHABLE();
595   }
596 }
597 
AtomicSub(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)598 void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
599                                  uintptr_t offset_imm, LiftoffRegister value,
600                                  LiftoffRegister result, StoreType type) {
601   LiftoffRegList dont_overwrite =
602       cache_state()->used_registers | LiftoffRegList{dst_addr, offset_reg};
603   DCHECK(!dont_overwrite.has(result));
604   if (dont_overwrite.has(value)) {
605     // We cannot overwrite {value}, but the {value} register is changed in the
606     // code we generate. Therefore we copy {value} to {result} and use the
607     // {result} register in the code below.
608     movq(result.gp(), value.gp());
609     value = result;
610   }
611   Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
612   switch (type.value()) {
613     case StoreType::kI32Store8:
614     case StoreType::kI64Store8:
615       negb(value.gp());
616       lock();
617       xaddb(dst_op, value.gp());
618       movzxbq(result.gp(), value.gp());
619       break;
620     case StoreType::kI32Store16:
621     case StoreType::kI64Store16:
622       negw(value.gp());
623       lock();
624       xaddw(dst_op, value.gp());
625       movzxwq(result.gp(), value.gp());
626       break;
627     case StoreType::kI32Store:
628     case StoreType::kI64Store32:
629       negl(value.gp());
630       lock();
631       xaddl(dst_op, value.gp());
632       if (value != result) {
633         movq(result.gp(), value.gp());
634       }
635       break;
636     case StoreType::kI64Store:
637       negq(value.gp());
638       lock();
639       xaddq(dst_op, value.gp());
640       if (value != result) {
641         movq(result.gp(), value.gp());
642       }
643       break;
644     default:
645       UNREACHABLE();
646   }
647 }
648 
649 namespace liftoff {
650 #define __ lasm->
651 
AtomicBinop(LiftoffAssembler * lasm,void (Assembler::* opl)(Register,Register),void (Assembler::* opq)(Register,Register),Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)652 inline void AtomicBinop(LiftoffAssembler* lasm,
653                         void (Assembler::*opl)(Register, Register),
654                         void (Assembler::*opq)(Register, Register),
655                         Register dst_addr, Register offset_reg,
656                         uintptr_t offset_imm, LiftoffRegister value,
657                         LiftoffRegister result, StoreType type) {
658   DCHECK(!__ cache_state()->is_used(result));
659   Register value_reg = value.gp();
660   // The cmpxchg instruction uses rax to store the old value of the
661   // compare-exchange primitive. Therefore we have to spill the register and
662   // move any use to another register.
663   LiftoffRegList pinned = LiftoffRegList{dst_addr, offset_reg, value_reg};
664   __ ClearRegister(rax, {&dst_addr, &offset_reg, &value_reg}, pinned);
665   Operand dst_op = liftoff::GetMemOp(lasm, dst_addr, offset_reg, offset_imm);
666 
667   switch (type.value()) {
668     case StoreType::kI32Store8:
669     case StoreType::kI64Store8: {
670       Label binop;
671       __ xorq(rax, rax);
672       __ movb(rax, dst_op);
673       __ bind(&binop);
674       __ movl(kScratchRegister, rax);
675       (lasm->*opl)(kScratchRegister, value_reg);
676       __ lock();
677       __ cmpxchgb(dst_op, kScratchRegister);
678       __ j(not_equal, &binop);
679       break;
680     }
681     case StoreType::kI32Store16:
682     case StoreType::kI64Store16: {
683       Label binop;
684       __ xorq(rax, rax);
685       __ movw(rax, dst_op);
686       __ bind(&binop);
687       __ movl(kScratchRegister, rax);
688       (lasm->*opl)(kScratchRegister, value_reg);
689       __ lock();
690       __ cmpxchgw(dst_op, kScratchRegister);
691       __ j(not_equal, &binop);
692       break;
693     }
694     case StoreType::kI32Store:
695     case StoreType::kI64Store32: {
696       Label binop;
697       __ movl(rax, dst_op);
698       __ bind(&binop);
699       __ movl(kScratchRegister, rax);
700       (lasm->*opl)(kScratchRegister, value_reg);
701       __ lock();
702       __ cmpxchgl(dst_op, kScratchRegister);
703       __ j(not_equal, &binop);
704       break;
705     }
706     case StoreType::kI64Store: {
707       Label binop;
708       __ movq(rax, dst_op);
709       __ bind(&binop);
710       __ movq(kScratchRegister, rax);
711       (lasm->*opq)(kScratchRegister, value_reg);
712       __ lock();
713       __ cmpxchgq(dst_op, kScratchRegister);
714       __ j(not_equal, &binop);
715       break;
716     }
717     default:
718       UNREACHABLE();
719   }
720 
721   if (result.gp() != rax) {
722     __ movq(result.gp(), rax);
723   }
724 }
725 #undef __
726 }  // namespace liftoff
727 
AtomicAnd(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)728 void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
729                                  uintptr_t offset_imm, LiftoffRegister value,
730                                  LiftoffRegister result, StoreType type) {
731   liftoff::AtomicBinop(this, &Assembler::andl, &Assembler::andq, dst_addr,
732                        offset_reg, offset_imm, value, result, type);
733 }
734 
AtomicOr(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)735 void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
736                                 uintptr_t offset_imm, LiftoffRegister value,
737                                 LiftoffRegister result, StoreType type) {
738   liftoff::AtomicBinop(this, &Assembler::orl, &Assembler::orq, dst_addr,
739                        offset_reg, offset_imm, value, result, type);
740 }
741 
AtomicXor(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)742 void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
743                                  uintptr_t offset_imm, LiftoffRegister value,
744                                  LiftoffRegister result, StoreType type) {
745   liftoff::AtomicBinop(this, &Assembler::xorl, &Assembler::xorq, dst_addr,
746                        offset_reg, offset_imm, value, result, type);
747 }
748 
AtomicExchange(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)749 void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
750                                       uintptr_t offset_imm,
751                                       LiftoffRegister value,
752                                       LiftoffRegister result, StoreType type) {
753   DCHECK(!cache_state()->is_used(result));
754   if (cache_state()->is_used(value)) {
755     // We cannot overwrite {value}, but the {value} register is changed in the
756     // code we generate. Therefore we copy {value} to {result} and use the
757     // {result} register in the code below.
758     movq(result.gp(), value.gp());
759     value = result;
760   }
761   Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
762   switch (type.value()) {
763     case StoreType::kI32Store8:
764     case StoreType::kI64Store8:
765       xchgb(value.gp(), dst_op);
766       movzxbq(result.gp(), value.gp());
767       break;
768     case StoreType::kI32Store16:
769     case StoreType::kI64Store16:
770       xchgw(value.gp(), dst_op);
771       movzxwq(result.gp(), value.gp());
772       break;
773     case StoreType::kI32Store:
774     case StoreType::kI64Store32:
775       xchgl(value.gp(), dst_op);
776       if (value != result) {
777         movq(result.gp(), value.gp());
778       }
779       break;
780     case StoreType::kI64Store:
781       xchgq(value.gp(), dst_op);
782       if (value != result) {
783         movq(result.gp(), value.gp());
784       }
785       break;
786     default:
787       UNREACHABLE();
788   }
789 }
790 
AtomicCompareExchange(Register dst_addr,Register offset_reg,uintptr_t offset_imm,LiftoffRegister expected,LiftoffRegister new_value,LiftoffRegister result,StoreType type)791 void LiftoffAssembler::AtomicCompareExchange(
792     Register dst_addr, Register offset_reg, uintptr_t offset_imm,
793     LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
794     StoreType type) {
795   Register value_reg = new_value.gp();
796   // The cmpxchg instruction uses rax to store the old value of the
797   // compare-exchange primitive. Therefore we have to spill the register and
798   // move any use to another register.
799   LiftoffRegList pinned =
800       LiftoffRegList{dst_addr, offset_reg, expected, value_reg};
801   ClearRegister(rax, {&dst_addr, &offset_reg, &value_reg}, pinned);
802   if (expected.gp() != rax) {
803     movq(rax, expected.gp());
804   }
805 
806   Operand dst_op = liftoff::GetMemOp(this, dst_addr, offset_reg, offset_imm);
807 
808   lock();
809   switch (type.value()) {
810     case StoreType::kI32Store8:
811     case StoreType::kI64Store8: {
812       cmpxchgb(dst_op, value_reg);
813       movzxbq(result.gp(), rax);
814       break;
815     }
816     case StoreType::kI32Store16:
817     case StoreType::kI64Store16: {
818       cmpxchgw(dst_op, value_reg);
819       movzxwq(result.gp(), rax);
820       break;
821     }
822     case StoreType::kI32Store: {
823       cmpxchgl(dst_op, value_reg);
824       if (result.gp() != rax) {
825         movl(result.gp(), rax);
826       }
827       break;
828     }
829     case StoreType::kI64Store32: {
830       cmpxchgl(dst_op, value_reg);
831       // Zero extension.
832       movl(result.gp(), rax);
833       break;
834     }
835     case StoreType::kI64Store: {
836       cmpxchgq(dst_op, value_reg);
837       if (result.gp() != rax) {
838         movq(result.gp(), rax);
839       }
840       break;
841     }
842     default:
843       UNREACHABLE();
844   }
845 }
846 
AtomicFence()847 void LiftoffAssembler::AtomicFence() { mfence(); }
848 
LoadCallerFrameSlot(LiftoffRegister dst,uint32_t caller_slot_idx,ValueKind kind)849 void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
850                                            uint32_t caller_slot_idx,
851                                            ValueKind kind) {
852   Operand src(rbp, kSystemPointerSize * (caller_slot_idx + 1));
853   liftoff::Load(this, dst, src, kind);
854 }
855 
StoreCallerFrameSlot(LiftoffRegister src,uint32_t caller_slot_idx,ValueKind kind)856 void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
857                                             uint32_t caller_slot_idx,
858                                             ValueKind kind) {
859   Operand dst(rbp, kSystemPointerSize * (caller_slot_idx + 1));
860   liftoff::Store(this, dst, src, kind);
861 }
862 
LoadReturnStackSlot(LiftoffRegister reg,int offset,ValueKind kind)863 void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister reg, int offset,
864                                            ValueKind kind) {
865   Operand src(rsp, offset);
866   liftoff::Load(this, reg, src, kind);
867 }
868 
MoveStackValue(uint32_t dst_offset,uint32_t src_offset,ValueKind kind)869 void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
870                                       ValueKind kind) {
871   DCHECK_NE(dst_offset, src_offset);
872   Operand dst = liftoff::GetStackSlot(dst_offset);
873   Operand src = liftoff::GetStackSlot(src_offset);
874   switch (SlotSizeForType(kind)) {
875     case 4:
876       movl(kScratchRegister, src);
877       movl(dst, kScratchRegister);
878       break;
879     case 8:
880       movq(kScratchRegister, src);
881       movq(dst, kScratchRegister);
882       break;
883     case 16:
884       Movdqu(kScratchDoubleReg, src);
885       Movdqu(dst, kScratchDoubleReg);
886       break;
887     default:
888       UNREACHABLE();
889   }
890 }
891 
Move(Register dst,Register src,ValueKind kind)892 void LiftoffAssembler::Move(Register dst, Register src, ValueKind kind) {
893   DCHECK_NE(dst, src);
894   if (kind == kI32) {
895     movl(dst, src);
896   } else {
897     DCHECK(kI64 == kind || is_reference(kind));
898     movq(dst, src);
899   }
900 }
901 
Move(DoubleRegister dst,DoubleRegister src,ValueKind kind)902 void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
903                             ValueKind kind) {
904   DCHECK_NE(dst, src);
905   if (kind == kF32) {
906     Movss(dst, src);
907   } else if (kind == kF64) {
908     Movsd(dst, src);
909   } else {
910     DCHECK_EQ(kS128, kind);
911     Movapd(dst, src);
912   }
913 }
914 
Spill(int offset,LiftoffRegister reg,ValueKind kind)915 void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueKind kind) {
916   RecordUsedSpillOffset(offset);
917   Operand dst = liftoff::GetStackSlot(offset);
918   switch (kind) {
919     case kI32:
920       movl(dst, reg.gp());
921       break;
922     case kI64:
923     case kOptRef:
924     case kRef:
925     case kRtt:
926       movq(dst, reg.gp());
927       break;
928     case kF32:
929       Movss(dst, reg.fp());
930       break;
931     case kF64:
932       Movsd(dst, reg.fp());
933       break;
934     case kS128:
935       Movdqu(dst, reg.fp());
936       break;
937     default:
938       UNREACHABLE();
939   }
940 }
941 
Spill(int offset,WasmValue value)942 void LiftoffAssembler::Spill(int offset, WasmValue value) {
943   RecordUsedSpillOffset(offset);
944   Operand dst = liftoff::GetStackSlot(offset);
945   switch (value.type().kind()) {
946     case kI32:
947       movl(dst, Immediate(value.to_i32()));
948       break;
949     case kI64: {
950       if (is_int32(value.to_i64())) {
951         // Sign extend low word.
952         movq(dst, Immediate(static_cast<int32_t>(value.to_i64())));
953       } else if (is_uint32(value.to_i64())) {
954         // Zero extend low word.
955         movl(kScratchRegister, Immediate(static_cast<int32_t>(value.to_i64())));
956         movq(dst, kScratchRegister);
957       } else {
958         movq(kScratchRegister, value.to_i64());
959         movq(dst, kScratchRegister);
960       }
961       break;
962     }
963     default:
964       // We do not track f32 and f64 constants, hence they are unreachable.
965       UNREACHABLE();
966   }
967 }
968 
Fill(LiftoffRegister reg,int offset,ValueKind kind)969 void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueKind kind) {
970   liftoff::Load(this, reg, liftoff::GetStackSlot(offset), kind);
971 }
972 
FillI64Half(Register,int offset,RegPairHalf)973 void LiftoffAssembler::FillI64Half(Register, int offset, RegPairHalf) {
974   UNREACHABLE();
975 }
976 
FillStackSlotsWithZero(int start,int size)977 void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
978   DCHECK_LT(0, size);
979   RecordUsedSpillOffset(start + size);
980 
981   if (size <= 3 * kStackSlotSize) {
982     // Special straight-line code for up to three slots
983     // (7-10 bytes per slot: REX C7 <1-4 bytes op> <4 bytes imm>),
984     // And a movd (6-9 byte) when size % 8 != 0;
985     uint32_t remainder = size;
986     for (; remainder >= kStackSlotSize; remainder -= kStackSlotSize) {
987       movq(liftoff::GetStackSlot(start + remainder), Immediate(0));
988     }
989     DCHECK(remainder == 4 || remainder == 0);
990     if (remainder) {
991       movl(liftoff::GetStackSlot(start + remainder), Immediate(0));
992     }
993   } else {
994     // General case for bigger counts.
995     // This sequence takes 19-22 bytes (3 for pushes, 4-7 for lea, 2 for xor, 5
996     // for mov, 2 for repstosl, 3 for pops).
997     pushq(rax);
998     pushq(rcx);
999     pushq(rdi);
1000     leaq(rdi, liftoff::GetStackSlot(start + size));
1001     xorl(rax, rax);
1002     // Convert size (bytes) to doublewords (4-bytes).
1003     movl(rcx, Immediate(size / 4));
1004     repstosl();
1005     popq(rdi);
1006     popq(rcx);
1007     popq(rax);
1008   }
1009 }
1010 
emit_i32_add(Register dst,Register lhs,Register rhs)1011 void LiftoffAssembler::emit_i32_add(Register dst, Register lhs, Register rhs) {
1012   if (lhs != dst) {
1013     leal(dst, Operand(lhs, rhs, times_1, 0));
1014   } else {
1015     addl(dst, rhs);
1016   }
1017 }
1018 
emit_i32_addi(Register dst,Register lhs,int32_t imm)1019 void LiftoffAssembler::emit_i32_addi(Register dst, Register lhs, int32_t imm) {
1020   if (lhs != dst) {
1021     leal(dst, Operand(lhs, imm));
1022   } else {
1023     addl(dst, Immediate(imm));
1024   }
1025 }
1026 
emit_i32_sub(Register dst,Register lhs,Register rhs)1027 void LiftoffAssembler::emit_i32_sub(Register dst, Register lhs, Register rhs) {
1028   if (dst != rhs) {
1029     // Default path.
1030     if (dst != lhs) movl(dst, lhs);
1031     subl(dst, rhs);
1032   } else if (lhs == rhs) {
1033     // Degenerate case.
1034     xorl(dst, dst);
1035   } else {
1036     // Emit {dst = lhs + -rhs} if dst == rhs.
1037     negl(dst);
1038     addl(dst, lhs);
1039   }
1040 }
1041 
emit_i32_subi(Register dst,Register lhs,int32_t imm)1042 void LiftoffAssembler::emit_i32_subi(Register dst, Register lhs, int32_t imm) {
1043   if (dst != lhs) {
1044     // We'll have to implement an UB-safe version if we need this corner case.
1045     DCHECK_NE(imm, kMinInt);
1046     leal(dst, Operand(lhs, -imm));
1047   } else {
1048     subl(dst, Immediate(imm));
1049   }
1050 }
1051 
1052 namespace liftoff {
1053 template <void (Assembler::*op)(Register, Register),
1054           void (Assembler::*mov)(Register, Register)>
EmitCommutativeBinOp(LiftoffAssembler * assm,Register dst,Register lhs,Register rhs)1055 void EmitCommutativeBinOp(LiftoffAssembler* assm, Register dst, Register lhs,
1056                           Register rhs) {
1057   if (dst == rhs) {
1058     (assm->*op)(dst, lhs);
1059   } else {
1060     if (dst != lhs) (assm->*mov)(dst, lhs);
1061     (assm->*op)(dst, rhs);
1062   }
1063 }
1064 
1065 template <void (Assembler::*op)(Register, Immediate),
1066           void (Assembler::*mov)(Register, Register)>
EmitCommutativeBinOpImm(LiftoffAssembler * assm,Register dst,Register lhs,int32_t imm)1067 void EmitCommutativeBinOpImm(LiftoffAssembler* assm, Register dst, Register lhs,
1068                              int32_t imm) {
1069   if (dst != lhs) (assm->*mov)(dst, lhs);
1070   (assm->*op)(dst, Immediate(imm));
1071 }
1072 
1073 }  // namespace liftoff
1074 
emit_i32_mul(Register dst,Register lhs,Register rhs)1075 void LiftoffAssembler::emit_i32_mul(Register dst, Register lhs, Register rhs) {
1076   liftoff::EmitCommutativeBinOp<&Assembler::imull, &Assembler::movl>(this, dst,
1077                                                                      lhs, rhs);
1078 }
1079 
1080 namespace liftoff {
1081 enum class DivOrRem : uint8_t { kDiv, kRem };
1082 template <typename type, DivOrRem div_or_rem>
EmitIntDivOrRem(LiftoffAssembler * assm,Register dst,Register lhs,Register rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1083 void EmitIntDivOrRem(LiftoffAssembler* assm, Register dst, Register lhs,
1084                      Register rhs, Label* trap_div_by_zero,
1085                      Label* trap_div_unrepresentable) {
1086   constexpr bool needs_unrepresentable_check =
1087       std::is_signed<type>::value && div_or_rem == DivOrRem::kDiv;
1088   constexpr bool special_case_minus_1 =
1089       std::is_signed<type>::value && div_or_rem == DivOrRem::kRem;
1090   DCHECK_EQ(needs_unrepresentable_check, trap_div_unrepresentable != nullptr);
1091 
1092 #define iop(name, ...)            \
1093   do {                            \
1094     if (sizeof(type) == 4) {      \
1095       assm->name##l(__VA_ARGS__); \
1096     } else {                      \
1097       assm->name##q(__VA_ARGS__); \
1098     }                             \
1099   } while (false)
1100 
1101   // For division, the lhs is always taken from {edx:eax}. Thus, make sure that
1102   // these registers are unused. If {rhs} is stored in one of them, move it to
1103   // another temporary register.
1104   // Do all this before any branch, such that the code is executed
1105   // unconditionally, as the cache state will also be modified unconditionally.
1106   assm->SpillRegisters(rdx, rax);
1107   if (rhs == rax || rhs == rdx) {
1108     iop(mov, kScratchRegister, rhs);
1109     rhs = kScratchRegister;
1110   }
1111 
1112   // Check for division by zero.
1113   iop(test, rhs, rhs);
1114   assm->j(zero, trap_div_by_zero);
1115 
1116   Label done;
1117   if (needs_unrepresentable_check) {
1118     // Check for {kMinInt / -1}. This is unrepresentable.
1119     Label do_div;
1120     iop(cmp, rhs, Immediate(-1));
1121     assm->j(not_equal, &do_div);
1122     // {lhs} is min int if {lhs - 1} overflows.
1123     iop(cmp, lhs, Immediate(1));
1124     assm->j(overflow, trap_div_unrepresentable);
1125     assm->bind(&do_div);
1126   } else if (special_case_minus_1) {
1127     // {lhs % -1} is always 0 (needs to be special cased because {kMinInt / -1}
1128     // cannot be computed).
1129     Label do_rem;
1130     iop(cmp, rhs, Immediate(-1));
1131     assm->j(not_equal, &do_rem);
1132     // clang-format off
1133     // (conflicts with presubmit checks because it is confused about "xor")
1134     iop(xor, dst, dst);
1135     // clang-format on
1136     assm->jmp(&done);
1137     assm->bind(&do_rem);
1138   }
1139 
1140   // Now move {lhs} into {eax}, then zero-extend or sign-extend into {edx}, then
1141   // do the division.
1142   if (lhs != rax) iop(mov, rax, lhs);
1143   if (std::is_same<int32_t, type>::value) {  // i32
1144     assm->cdq();
1145     assm->idivl(rhs);
1146   } else if (std::is_same<uint32_t, type>::value) {  // u32
1147     assm->xorl(rdx, rdx);
1148     assm->divl(rhs);
1149   } else if (std::is_same<int64_t, type>::value) {  // i64
1150     assm->cqo();
1151     assm->idivq(rhs);
1152   } else {  // u64
1153     assm->xorq(rdx, rdx);
1154     assm->divq(rhs);
1155   }
1156 
1157   // Move back the result (in {eax} or {edx}) into the {dst} register.
1158   constexpr Register kResultReg = div_or_rem == DivOrRem::kDiv ? rax : rdx;
1159   if (dst != kResultReg) {
1160     iop(mov, dst, kResultReg);
1161   }
1162   if (special_case_minus_1) assm->bind(&done);
1163 }
1164 }  // namespace liftoff
1165 
emit_i32_divs(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1166 void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
1167                                      Label* trap_div_by_zero,
1168                                      Label* trap_div_unrepresentable) {
1169   liftoff::EmitIntDivOrRem<int32_t, liftoff::DivOrRem::kDiv>(
1170       this, dst, lhs, rhs, trap_div_by_zero, trap_div_unrepresentable);
1171 }
1172 
emit_i32_divu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1173 void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
1174                                      Label* trap_div_by_zero) {
1175   liftoff::EmitIntDivOrRem<uint32_t, liftoff::DivOrRem::kDiv>(
1176       this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1177 }
1178 
emit_i32_rems(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1179 void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
1180                                      Label* trap_div_by_zero) {
1181   liftoff::EmitIntDivOrRem<int32_t, liftoff::DivOrRem::kRem>(
1182       this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1183 }
1184 
emit_i32_remu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1185 void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
1186                                      Label* trap_div_by_zero) {
1187   liftoff::EmitIntDivOrRem<uint32_t, liftoff::DivOrRem::kRem>(
1188       this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1189 }
1190 
emit_i32_and(Register dst,Register lhs,Register rhs)1191 void LiftoffAssembler::emit_i32_and(Register dst, Register lhs, Register rhs) {
1192   liftoff::EmitCommutativeBinOp<&Assembler::andl, &Assembler::movl>(this, dst,
1193                                                                     lhs, rhs);
1194 }
1195 
emit_i32_andi(Register dst,Register lhs,int32_t imm)1196 void LiftoffAssembler::emit_i32_andi(Register dst, Register lhs, int32_t imm) {
1197   liftoff::EmitCommutativeBinOpImm<&Assembler::andl, &Assembler::movl>(
1198       this, dst, lhs, imm);
1199 }
1200 
emit_i32_or(Register dst,Register lhs,Register rhs)1201 void LiftoffAssembler::emit_i32_or(Register dst, Register lhs, Register rhs) {
1202   liftoff::EmitCommutativeBinOp<&Assembler::orl, &Assembler::movl>(this, dst,
1203                                                                    lhs, rhs);
1204 }
1205 
emit_i32_ori(Register dst,Register lhs,int32_t imm)1206 void LiftoffAssembler::emit_i32_ori(Register dst, Register lhs, int32_t imm) {
1207   liftoff::EmitCommutativeBinOpImm<&Assembler::orl, &Assembler::movl>(this, dst,
1208                                                                       lhs, imm);
1209 }
1210 
emit_i32_xor(Register dst,Register lhs,Register rhs)1211 void LiftoffAssembler::emit_i32_xor(Register dst, Register lhs, Register rhs) {
1212   liftoff::EmitCommutativeBinOp<&Assembler::xorl, &Assembler::movl>(this, dst,
1213                                                                     lhs, rhs);
1214 }
1215 
emit_i32_xori(Register dst,Register lhs,int32_t imm)1216 void LiftoffAssembler::emit_i32_xori(Register dst, Register lhs, int32_t imm) {
1217   liftoff::EmitCommutativeBinOpImm<&Assembler::xorl, &Assembler::movl>(
1218       this, dst, lhs, imm);
1219 }
1220 
1221 namespace liftoff {
1222 template <ValueKind kind>
EmitShiftOperation(LiftoffAssembler * assm,Register dst,Register src,Register amount,void (Assembler::* emit_shift)(Register))1223 inline void EmitShiftOperation(LiftoffAssembler* assm, Register dst,
1224                                Register src, Register amount,
1225                                void (Assembler::*emit_shift)(Register)) {
1226   // If dst is rcx, compute into the scratch register first, then move to rcx.
1227   if (dst == rcx) {
1228     assm->Move(kScratchRegister, src, kind);
1229     if (amount != rcx) assm->Move(rcx, amount, kind);
1230     (assm->*emit_shift)(kScratchRegister);
1231     assm->Move(rcx, kScratchRegister, kind);
1232     return;
1233   }
1234 
1235   // Move amount into rcx. If rcx is in use, move its content into the scratch
1236   // register. If src is rcx, src is now the scratch register.
1237   bool use_scratch = false;
1238   if (amount != rcx) {
1239     use_scratch =
1240         src == rcx || assm->cache_state()->is_used(LiftoffRegister(rcx));
1241     if (use_scratch) assm->movq(kScratchRegister, rcx);
1242     if (src == rcx) src = kScratchRegister;
1243     assm->Move(rcx, amount, kind);
1244   }
1245 
1246   // Do the actual shift.
1247   if (dst != src) assm->Move(dst, src, kind);
1248   (assm->*emit_shift)(dst);
1249 
1250   // Restore rcx if needed.
1251   if (use_scratch) assm->movq(rcx, kScratchRegister);
1252 }
1253 }  // namespace liftoff
1254 
emit_i32_shl(Register dst,Register src,Register amount)1255 void LiftoffAssembler::emit_i32_shl(Register dst, Register src,
1256                                     Register amount) {
1257   liftoff::EmitShiftOperation<kI32>(this, dst, src, amount,
1258                                     &Assembler::shll_cl);
1259 }
1260 
emit_i32_shli(Register dst,Register src,int32_t amount)1261 void LiftoffAssembler::emit_i32_shli(Register dst, Register src,
1262                                      int32_t amount) {
1263   if (dst != src) movl(dst, src);
1264   shll(dst, Immediate(amount & 31));
1265 }
1266 
emit_i32_sar(Register dst,Register src,Register amount)1267 void LiftoffAssembler::emit_i32_sar(Register dst, Register src,
1268                                     Register amount) {
1269   liftoff::EmitShiftOperation<kI32>(this, dst, src, amount,
1270                                     &Assembler::sarl_cl);
1271 }
1272 
emit_i32_sari(Register dst,Register src,int32_t amount)1273 void LiftoffAssembler::emit_i32_sari(Register dst, Register src,
1274                                      int32_t amount) {
1275   if (dst != src) movl(dst, src);
1276   sarl(dst, Immediate(amount & 31));
1277 }
1278 
emit_i32_shr(Register dst,Register src,Register amount)1279 void LiftoffAssembler::emit_i32_shr(Register dst, Register src,
1280                                     Register amount) {
1281   liftoff::EmitShiftOperation<kI32>(this, dst, src, amount,
1282                                     &Assembler::shrl_cl);
1283 }
1284 
emit_i32_shri(Register dst,Register src,int32_t amount)1285 void LiftoffAssembler::emit_i32_shri(Register dst, Register src,
1286                                      int32_t amount) {
1287   if (dst != src) movl(dst, src);
1288   shrl(dst, Immediate(amount & 31));
1289 }
1290 
emit_i32_clz(Register dst,Register src)1291 void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
1292   Lzcntl(dst, src);
1293 }
1294 
emit_i32_ctz(Register dst,Register src)1295 void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
1296   Tzcntl(dst, src);
1297 }
1298 
emit_i32_popcnt(Register dst,Register src)1299 bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
1300   if (!CpuFeatures::IsSupported(POPCNT)) return false;
1301   CpuFeatureScope scope(this, POPCNT);
1302   popcntl(dst, src);
1303   return true;
1304 }
1305 
emit_i64_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1306 void LiftoffAssembler::emit_i64_add(LiftoffRegister dst, LiftoffRegister lhs,
1307                                     LiftoffRegister rhs) {
1308   if (lhs.gp() != dst.gp()) {
1309     leaq(dst.gp(), Operand(lhs.gp(), rhs.gp(), times_1, 0));
1310   } else {
1311     addq(dst.gp(), rhs.gp());
1312   }
1313 }
1314 
emit_i64_addi(LiftoffRegister dst,LiftoffRegister lhs,int64_t imm)1315 void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs,
1316                                      int64_t imm) {
1317   if (!is_int32(imm)) {
1318     TurboAssembler::Move(kScratchRegister, imm);
1319     if (lhs.gp() == dst.gp()) {
1320       addq(dst.gp(), kScratchRegister);
1321     } else {
1322       leaq(dst.gp(), Operand(lhs.gp(), kScratchRegister, times_1, 0));
1323     }
1324   } else if (lhs.gp() == dst.gp()) {
1325     addq(dst.gp(), Immediate(static_cast<int32_t>(imm)));
1326   } else {
1327     leaq(dst.gp(), Operand(lhs.gp(), static_cast<int32_t>(imm)));
1328   }
1329 }
1330 
emit_i64_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1331 void LiftoffAssembler::emit_i64_sub(LiftoffRegister dst, LiftoffRegister lhs,
1332                                     LiftoffRegister rhs) {
1333   if (lhs.gp() == rhs.gp()) {
1334     xorq(dst.gp(), dst.gp());
1335   } else if (dst.gp() == rhs.gp()) {
1336     negq(dst.gp());
1337     addq(dst.gp(), lhs.gp());
1338   } else {
1339     if (dst.gp() != lhs.gp()) movq(dst.gp(), lhs.gp());
1340     subq(dst.gp(), rhs.gp());
1341   }
1342 }
1343 
emit_i64_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1344 void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs,
1345                                     LiftoffRegister rhs) {
1346   liftoff::EmitCommutativeBinOp<&Assembler::imulq, &Assembler::movq>(
1347       this, dst.gp(), lhs.gp(), rhs.gp());
1348 }
1349 
emit_i64_divs(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1350 bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
1351                                      LiftoffRegister rhs,
1352                                      Label* trap_div_by_zero,
1353                                      Label* trap_div_unrepresentable) {
1354   liftoff::EmitIntDivOrRem<int64_t, liftoff::DivOrRem::kDiv>(
1355       this, dst.gp(), lhs.gp(), rhs.gp(), trap_div_by_zero,
1356       trap_div_unrepresentable);
1357   return true;
1358 }
1359 
emit_i64_divu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1360 bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
1361                                      LiftoffRegister rhs,
1362                                      Label* trap_div_by_zero) {
1363   liftoff::EmitIntDivOrRem<uint64_t, liftoff::DivOrRem::kDiv>(
1364       this, dst.gp(), lhs.gp(), rhs.gp(), trap_div_by_zero, nullptr);
1365   return true;
1366 }
1367 
emit_i64_rems(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1368 bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
1369                                      LiftoffRegister rhs,
1370                                      Label* trap_div_by_zero) {
1371   liftoff::EmitIntDivOrRem<int64_t, liftoff::DivOrRem::kRem>(
1372       this, dst.gp(), lhs.gp(), rhs.gp(), trap_div_by_zero, nullptr);
1373   return true;
1374 }
1375 
emit_i64_remu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1376 bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
1377                                      LiftoffRegister rhs,
1378                                      Label* trap_div_by_zero) {
1379   liftoff::EmitIntDivOrRem<uint64_t, liftoff::DivOrRem::kRem>(
1380       this, dst.gp(), lhs.gp(), rhs.gp(), trap_div_by_zero, nullptr);
1381   return true;
1382 }
1383 
emit_i64_and(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1384 void LiftoffAssembler::emit_i64_and(LiftoffRegister dst, LiftoffRegister lhs,
1385                                     LiftoffRegister rhs) {
1386   liftoff::EmitCommutativeBinOp<&Assembler::andq, &Assembler::movq>(
1387       this, dst.gp(), lhs.gp(), rhs.gp());
1388 }
1389 
emit_i64_andi(LiftoffRegister dst,LiftoffRegister lhs,int32_t imm)1390 void LiftoffAssembler::emit_i64_andi(LiftoffRegister dst, LiftoffRegister lhs,
1391                                      int32_t imm) {
1392   liftoff::EmitCommutativeBinOpImm<&Assembler::andq, &Assembler::movq>(
1393       this, dst.gp(), lhs.gp(), imm);
1394 }
1395 
emit_i64_or(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1396 void LiftoffAssembler::emit_i64_or(LiftoffRegister dst, LiftoffRegister lhs,
1397                                    LiftoffRegister rhs) {
1398   liftoff::EmitCommutativeBinOp<&Assembler::orq, &Assembler::movq>(
1399       this, dst.gp(), lhs.gp(), rhs.gp());
1400 }
1401 
emit_i64_ori(LiftoffRegister dst,LiftoffRegister lhs,int32_t imm)1402 void LiftoffAssembler::emit_i64_ori(LiftoffRegister dst, LiftoffRegister lhs,
1403                                     int32_t imm) {
1404   liftoff::EmitCommutativeBinOpImm<&Assembler::orq, &Assembler::movq>(
1405       this, dst.gp(), lhs.gp(), imm);
1406 }
1407 
emit_i64_xor(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1408 void LiftoffAssembler::emit_i64_xor(LiftoffRegister dst, LiftoffRegister lhs,
1409                                     LiftoffRegister rhs) {
1410   liftoff::EmitCommutativeBinOp<&Assembler::xorq, &Assembler::movq>(
1411       this, dst.gp(), lhs.gp(), rhs.gp());
1412 }
1413 
emit_i64_xori(LiftoffRegister dst,LiftoffRegister lhs,int32_t imm)1414 void LiftoffAssembler::emit_i64_xori(LiftoffRegister dst, LiftoffRegister lhs,
1415                                      int32_t imm) {
1416   liftoff::EmitCommutativeBinOpImm<&Assembler::xorq, &Assembler::movq>(
1417       this, dst.gp(), lhs.gp(), imm);
1418 }
1419 
emit_i64_shl(LiftoffRegister dst,LiftoffRegister src,Register amount)1420 void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
1421                                     Register amount) {
1422   liftoff::EmitShiftOperation<kI64>(this, dst.gp(), src.gp(), amount,
1423                                     &Assembler::shlq_cl);
1424 }
1425 
emit_i64_shli(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1426 void LiftoffAssembler::emit_i64_shli(LiftoffRegister dst, LiftoffRegister src,
1427                                      int32_t amount) {
1428   if (dst.gp() != src.gp()) movq(dst.gp(), src.gp());
1429   shlq(dst.gp(), Immediate(amount & 63));
1430 }
1431 
emit_i64_sar(LiftoffRegister dst,LiftoffRegister src,Register amount)1432 void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
1433                                     Register amount) {
1434   liftoff::EmitShiftOperation<kI64>(this, dst.gp(), src.gp(), amount,
1435                                     &Assembler::sarq_cl);
1436 }
1437 
emit_i64_sari(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1438 void LiftoffAssembler::emit_i64_sari(LiftoffRegister dst, LiftoffRegister src,
1439                                      int32_t amount) {
1440   if (dst.gp() != src.gp()) movq(dst.gp(), src.gp());
1441   sarq(dst.gp(), Immediate(amount & 63));
1442 }
1443 
emit_i64_shr(LiftoffRegister dst,LiftoffRegister src,Register amount)1444 void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
1445                                     Register amount) {
1446   liftoff::EmitShiftOperation<kI64>(this, dst.gp(), src.gp(), amount,
1447                                     &Assembler::shrq_cl);
1448 }
1449 
emit_i64_shri(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1450 void LiftoffAssembler::emit_i64_shri(LiftoffRegister dst, LiftoffRegister src,
1451                                      int32_t amount) {
1452   if (dst != src) movq(dst.gp(), src.gp());
1453   shrq(dst.gp(), Immediate(amount & 63));
1454 }
1455 
emit_i64_clz(LiftoffRegister dst,LiftoffRegister src)1456 void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
1457   Lzcntq(dst.gp(), src.gp());
1458 }
1459 
emit_i64_ctz(LiftoffRegister dst,LiftoffRegister src)1460 void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
1461   Tzcntq(dst.gp(), src.gp());
1462 }
1463 
emit_i64_popcnt(LiftoffRegister dst,LiftoffRegister src)1464 bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
1465                                        LiftoffRegister src) {
1466   if (!CpuFeatures::IsSupported(POPCNT)) return false;
1467   CpuFeatureScope scope(this, POPCNT);
1468   popcntq(dst.gp(), src.gp());
1469   return true;
1470 }
1471 
IncrementSmi(LiftoffRegister dst,int offset)1472 void LiftoffAssembler::IncrementSmi(LiftoffRegister dst, int offset) {
1473   SmiAddConstant(Operand(dst.gp(), offset), Smi::FromInt(1));
1474 }
1475 
emit_u32_to_uintptr(Register dst,Register src)1476 void LiftoffAssembler::emit_u32_to_uintptr(Register dst, Register src) {
1477   movl(dst, src);
1478 }
1479 
emit_f32_add(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1480 void LiftoffAssembler::emit_f32_add(DoubleRegister dst, DoubleRegister lhs,
1481                                     DoubleRegister rhs) {
1482   if (CpuFeatures::IsSupported(AVX)) {
1483     CpuFeatureScope scope(this, AVX);
1484     vaddss(dst, lhs, rhs);
1485   } else if (dst == rhs) {
1486     addss(dst, lhs);
1487   } else {
1488     if (dst != lhs) movss(dst, lhs);
1489     addss(dst, rhs);
1490   }
1491 }
1492 
emit_f32_sub(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1493 void LiftoffAssembler::emit_f32_sub(DoubleRegister dst, DoubleRegister lhs,
1494                                     DoubleRegister rhs) {
1495   if (CpuFeatures::IsSupported(AVX)) {
1496     CpuFeatureScope scope(this, AVX);
1497     vsubss(dst, lhs, rhs);
1498   } else if (dst == rhs) {
1499     movss(kScratchDoubleReg, rhs);
1500     movss(dst, lhs);
1501     subss(dst, kScratchDoubleReg);
1502   } else {
1503     if (dst != lhs) movss(dst, lhs);
1504     subss(dst, rhs);
1505   }
1506 }
1507 
emit_f32_mul(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1508 void LiftoffAssembler::emit_f32_mul(DoubleRegister dst, DoubleRegister lhs,
1509                                     DoubleRegister rhs) {
1510   if (CpuFeatures::IsSupported(AVX)) {
1511     CpuFeatureScope scope(this, AVX);
1512     vmulss(dst, lhs, rhs);
1513   } else if (dst == rhs) {
1514     mulss(dst, lhs);
1515   } else {
1516     if (dst != lhs) movss(dst, lhs);
1517     mulss(dst, rhs);
1518   }
1519 }
1520 
emit_f32_div(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1521 void LiftoffAssembler::emit_f32_div(DoubleRegister dst, DoubleRegister lhs,
1522                                     DoubleRegister rhs) {
1523   if (CpuFeatures::IsSupported(AVX)) {
1524     CpuFeatureScope scope(this, AVX);
1525     vdivss(dst, lhs, rhs);
1526   } else if (dst == rhs) {
1527     movss(kScratchDoubleReg, rhs);
1528     movss(dst, lhs);
1529     divss(dst, kScratchDoubleReg);
1530   } else {
1531     if (dst != lhs) movss(dst, lhs);
1532     divss(dst, rhs);
1533   }
1534 }
1535 
1536 namespace liftoff {
1537 enum class MinOrMax : uint8_t { kMin, kMax };
1538 template <typename type>
EmitFloatMinOrMax(LiftoffAssembler * assm,DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs,MinOrMax min_or_max)1539 inline void EmitFloatMinOrMax(LiftoffAssembler* assm, DoubleRegister dst,
1540                               DoubleRegister lhs, DoubleRegister rhs,
1541                               MinOrMax min_or_max) {
1542   Label is_nan;
1543   Label lhs_below_rhs;
1544   Label lhs_above_rhs;
1545   Label done;
1546 
1547 #define dop(name, ...)            \
1548   do {                            \
1549     if (sizeof(type) == 4) {      \
1550       assm->name##s(__VA_ARGS__); \
1551     } else {                      \
1552       assm->name##d(__VA_ARGS__); \
1553     }                             \
1554   } while (false)
1555 
1556   // Check the easy cases first: nan (e.g. unordered), smaller and greater.
1557   // NaN has to be checked first, because PF=1 implies CF=1.
1558   dop(Ucomis, lhs, rhs);
1559   assm->j(parity_even, &is_nan, Label::kNear);   // PF=1
1560   assm->j(below, &lhs_below_rhs, Label::kNear);  // CF=1
1561   assm->j(above, &lhs_above_rhs, Label::kNear);  // CF=0 && ZF=0
1562 
1563   // If we get here, then either
1564   // a) {lhs == rhs},
1565   // b) {lhs == -0.0} and {rhs == 0.0}, or
1566   // c) {lhs == 0.0} and {rhs == -0.0}.
1567   // For a), it does not matter whether we return {lhs} or {rhs}. Check the sign
1568   // bit of {rhs} to differentiate b) and c).
1569   dop(Movmskp, kScratchRegister, rhs);
1570   assm->testl(kScratchRegister, Immediate(1));
1571   assm->j(zero, &lhs_below_rhs, Label::kNear);
1572   assm->jmp(&lhs_above_rhs, Label::kNear);
1573 
1574   assm->bind(&is_nan);
1575   // Create a NaN output.
1576   dop(Xorp, dst, dst);
1577   dop(Divs, dst, dst);
1578   assm->jmp(&done, Label::kNear);
1579 
1580   assm->bind(&lhs_below_rhs);
1581   DoubleRegister lhs_below_rhs_src = min_or_max == MinOrMax::kMin ? lhs : rhs;
1582   if (dst != lhs_below_rhs_src) dop(Movs, dst, lhs_below_rhs_src);
1583   assm->jmp(&done, Label::kNear);
1584 
1585   assm->bind(&lhs_above_rhs);
1586   DoubleRegister lhs_above_rhs_src = min_or_max == MinOrMax::kMin ? rhs : lhs;
1587   if (dst != lhs_above_rhs_src) dop(Movs, dst, lhs_above_rhs_src);
1588 
1589   assm->bind(&done);
1590 }
1591 }  // namespace liftoff
1592 
emit_f32_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1593 void LiftoffAssembler::emit_f32_min(DoubleRegister dst, DoubleRegister lhs,
1594                                     DoubleRegister rhs) {
1595   liftoff::EmitFloatMinOrMax<float>(this, dst, lhs, rhs,
1596                                     liftoff::MinOrMax::kMin);
1597 }
1598 
emit_f32_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1599 void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs,
1600                                     DoubleRegister rhs) {
1601   liftoff::EmitFloatMinOrMax<float>(this, dst, lhs, rhs,
1602                                     liftoff::MinOrMax::kMax);
1603 }
1604 
emit_f32_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1605 void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
1606                                          DoubleRegister rhs) {
1607   static constexpr int kF32SignBit = 1 << 31;
1608   Movd(kScratchRegister, lhs);
1609   andl(kScratchRegister, Immediate(~kF32SignBit));
1610   Movd(liftoff::kScratchRegister2, rhs);
1611   andl(liftoff::kScratchRegister2, Immediate(kF32SignBit));
1612   orl(kScratchRegister, liftoff::kScratchRegister2);
1613   Movd(dst, kScratchRegister);
1614 }
1615 
emit_f32_abs(DoubleRegister dst,DoubleRegister src)1616 void LiftoffAssembler::emit_f32_abs(DoubleRegister dst, DoubleRegister src) {
1617   static constexpr uint32_t kSignBit = uint32_t{1} << 31;
1618   if (dst == src) {
1619     TurboAssembler::Move(kScratchDoubleReg, kSignBit - 1);
1620     Andps(dst, kScratchDoubleReg);
1621   } else {
1622     TurboAssembler::Move(dst, kSignBit - 1);
1623     Andps(dst, src);
1624   }
1625 }
1626 
emit_f32_neg(DoubleRegister dst,DoubleRegister src)1627 void LiftoffAssembler::emit_f32_neg(DoubleRegister dst, DoubleRegister src) {
1628   static constexpr uint32_t kSignBit = uint32_t{1} << 31;
1629   if (dst == src) {
1630     TurboAssembler::Move(kScratchDoubleReg, kSignBit);
1631     Xorps(dst, kScratchDoubleReg);
1632   } else {
1633     TurboAssembler::Move(dst, kSignBit);
1634     Xorps(dst, src);
1635   }
1636 }
1637 
emit_f32_ceil(DoubleRegister dst,DoubleRegister src)1638 bool LiftoffAssembler::emit_f32_ceil(DoubleRegister dst, DoubleRegister src) {
1639   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1640   Roundss(dst, src, kRoundUp);
1641   return true;
1642 }
1643 
emit_f32_floor(DoubleRegister dst,DoubleRegister src)1644 bool LiftoffAssembler::emit_f32_floor(DoubleRegister dst, DoubleRegister src) {
1645   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1646   Roundss(dst, src, kRoundDown);
1647   return true;
1648 }
1649 
emit_f32_trunc(DoubleRegister dst,DoubleRegister src)1650 bool LiftoffAssembler::emit_f32_trunc(DoubleRegister dst, DoubleRegister src) {
1651   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1652   Roundss(dst, src, kRoundToZero);
1653   return true;
1654 }
1655 
emit_f32_nearest_int(DoubleRegister dst,DoubleRegister src)1656 bool LiftoffAssembler::emit_f32_nearest_int(DoubleRegister dst,
1657                                             DoubleRegister src) {
1658   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1659   Roundss(dst, src, kRoundToNearest);
1660   return true;
1661 }
1662 
emit_f32_sqrt(DoubleRegister dst,DoubleRegister src)1663 void LiftoffAssembler::emit_f32_sqrt(DoubleRegister dst, DoubleRegister src) {
1664   Sqrtss(dst, src);
1665 }
1666 
emit_f64_add(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1667 void LiftoffAssembler::emit_f64_add(DoubleRegister dst, DoubleRegister lhs,
1668                                     DoubleRegister rhs) {
1669   if (CpuFeatures::IsSupported(AVX)) {
1670     CpuFeatureScope scope(this, AVX);
1671     vaddsd(dst, lhs, rhs);
1672   } else if (dst == rhs) {
1673     addsd(dst, lhs);
1674   } else {
1675     if (dst != lhs) movsd(dst, lhs);
1676     addsd(dst, rhs);
1677   }
1678 }
1679 
emit_f64_sub(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1680 void LiftoffAssembler::emit_f64_sub(DoubleRegister dst, DoubleRegister lhs,
1681                                     DoubleRegister rhs) {
1682   if (CpuFeatures::IsSupported(AVX)) {
1683     CpuFeatureScope scope(this, AVX);
1684     vsubsd(dst, lhs, rhs);
1685   } else if (dst == rhs) {
1686     movsd(kScratchDoubleReg, rhs);
1687     movsd(dst, lhs);
1688     subsd(dst, kScratchDoubleReg);
1689   } else {
1690     if (dst != lhs) movsd(dst, lhs);
1691     subsd(dst, rhs);
1692   }
1693 }
1694 
emit_f64_mul(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1695 void LiftoffAssembler::emit_f64_mul(DoubleRegister dst, DoubleRegister lhs,
1696                                     DoubleRegister rhs) {
1697   if (CpuFeatures::IsSupported(AVX)) {
1698     CpuFeatureScope scope(this, AVX);
1699     vmulsd(dst, lhs, rhs);
1700   } else if (dst == rhs) {
1701     mulsd(dst, lhs);
1702   } else {
1703     if (dst != lhs) movsd(dst, lhs);
1704     mulsd(dst, rhs);
1705   }
1706 }
1707 
emit_f64_div(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1708 void LiftoffAssembler::emit_f64_div(DoubleRegister dst, DoubleRegister lhs,
1709                                     DoubleRegister rhs) {
1710   if (CpuFeatures::IsSupported(AVX)) {
1711     CpuFeatureScope scope(this, AVX);
1712     vdivsd(dst, lhs, rhs);
1713   } else if (dst == rhs) {
1714     movsd(kScratchDoubleReg, rhs);
1715     movsd(dst, lhs);
1716     divsd(dst, kScratchDoubleReg);
1717   } else {
1718     if (dst != lhs) movsd(dst, lhs);
1719     divsd(dst, rhs);
1720   }
1721 }
1722 
emit_f64_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1723 void LiftoffAssembler::emit_f64_min(DoubleRegister dst, DoubleRegister lhs,
1724                                     DoubleRegister rhs) {
1725   liftoff::EmitFloatMinOrMax<double>(this, dst, lhs, rhs,
1726                                      liftoff::MinOrMax::kMin);
1727 }
1728 
emit_f64_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1729 void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
1730                                          DoubleRegister rhs) {
1731   // Extract sign bit from {rhs} into {kScratchRegister2}.
1732   Movq(liftoff::kScratchRegister2, rhs);
1733   shrq(liftoff::kScratchRegister2, Immediate(63));
1734   shlq(liftoff::kScratchRegister2, Immediate(63));
1735   // Reset sign bit of {lhs} (in {kScratchRegister}).
1736   Movq(kScratchRegister, lhs);
1737   btrq(kScratchRegister, Immediate(63));
1738   // Combine both values into {kScratchRegister} and move into {dst}.
1739   orq(kScratchRegister, liftoff::kScratchRegister2);
1740   Movq(dst, kScratchRegister);
1741 }
1742 
emit_f64_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1743 void LiftoffAssembler::emit_f64_max(DoubleRegister dst, DoubleRegister lhs,
1744                                     DoubleRegister rhs) {
1745   liftoff::EmitFloatMinOrMax<double>(this, dst, lhs, rhs,
1746                                      liftoff::MinOrMax::kMax);
1747 }
1748 
emit_f64_abs(DoubleRegister dst,DoubleRegister src)1749 void LiftoffAssembler::emit_f64_abs(DoubleRegister dst, DoubleRegister src) {
1750   static constexpr uint64_t kSignBit = uint64_t{1} << 63;
1751   if (dst == src) {
1752     TurboAssembler::Move(kScratchDoubleReg, kSignBit - 1);
1753     Andpd(dst, kScratchDoubleReg);
1754   } else {
1755     TurboAssembler::Move(dst, kSignBit - 1);
1756     Andpd(dst, src);
1757   }
1758 }
1759 
emit_f64_neg(DoubleRegister dst,DoubleRegister src)1760 void LiftoffAssembler::emit_f64_neg(DoubleRegister dst, DoubleRegister src) {
1761   static constexpr uint64_t kSignBit = uint64_t{1} << 63;
1762   if (dst == src) {
1763     TurboAssembler::Move(kScratchDoubleReg, kSignBit);
1764     Xorpd(dst, kScratchDoubleReg);
1765   } else {
1766     TurboAssembler::Move(dst, kSignBit);
1767     Xorpd(dst, src);
1768   }
1769 }
1770 
emit_f64_ceil(DoubleRegister dst,DoubleRegister src)1771 bool LiftoffAssembler::emit_f64_ceil(DoubleRegister dst, DoubleRegister src) {
1772   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1773   Roundsd(dst, src, kRoundUp);
1774   return true;
1775 }
1776 
emit_f64_floor(DoubleRegister dst,DoubleRegister src)1777 bool LiftoffAssembler::emit_f64_floor(DoubleRegister dst, DoubleRegister src) {
1778   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1779   Roundsd(dst, src, kRoundDown);
1780   return true;
1781 }
1782 
emit_f64_trunc(DoubleRegister dst,DoubleRegister src)1783 bool LiftoffAssembler::emit_f64_trunc(DoubleRegister dst, DoubleRegister src) {
1784   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1785   Roundsd(dst, src, kRoundToZero);
1786   return true;
1787 }
1788 
emit_f64_nearest_int(DoubleRegister dst,DoubleRegister src)1789 bool LiftoffAssembler::emit_f64_nearest_int(DoubleRegister dst,
1790                                             DoubleRegister src) {
1791   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
1792   Roundsd(dst, src, kRoundToNearest);
1793   return true;
1794 }
1795 
emit_f64_sqrt(DoubleRegister dst,DoubleRegister src)1796 void LiftoffAssembler::emit_f64_sqrt(DoubleRegister dst, DoubleRegister src) {
1797   Sqrtsd(dst, src);
1798 }
1799 
1800 namespace liftoff {
1801 #define __ assm->
1802 // Used for float to int conversions. If the value in {converted_back} equals
1803 // {src} afterwards, the conversion succeeded.
1804 template <typename dst_type, typename src_type>
ConvertFloatToIntAndBack(LiftoffAssembler * assm,Register dst,DoubleRegister src,DoubleRegister converted_back)1805 inline void ConvertFloatToIntAndBack(LiftoffAssembler* assm, Register dst,
1806                                      DoubleRegister src,
1807                                      DoubleRegister converted_back) {
1808   if (std::is_same<double, src_type>::value) {  // f64
1809     if (std::is_same<int32_t, dst_type>::value) {  // f64 -> i32
1810       __ Cvttsd2si(dst, src);
1811       __ Cvtlsi2sd(converted_back, dst);
1812     } else if (std::is_same<uint32_t, dst_type>::value) {  // f64 -> u32
1813       __ Cvttsd2siq(dst, src);
1814       __ movl(dst, dst);
1815       __ Cvtqsi2sd(converted_back, dst);
1816     } else if (std::is_same<int64_t, dst_type>::value) {  // f64 -> i64
1817       __ Cvttsd2siq(dst, src);
1818       __ Cvtqsi2sd(converted_back, dst);
1819     } else {
1820       UNREACHABLE();
1821     }
1822   } else {                                  // f32
1823     if (std::is_same<int32_t, dst_type>::value) {  // f32 -> i32
1824       __ Cvttss2si(dst, src);
1825       __ Cvtlsi2ss(converted_back, dst);
1826     } else if (std::is_same<uint32_t, dst_type>::value) {  // f32 -> u32
1827       __ Cvttss2siq(dst, src);
1828       __ movl(dst, dst);
1829       __ Cvtqsi2ss(converted_back, dst);
1830     } else if (std::is_same<int64_t, dst_type>::value) {  // f32 -> i64
1831       __ Cvttss2siq(dst, src);
1832       __ Cvtqsi2ss(converted_back, dst);
1833     } else {
1834       UNREACHABLE();
1835     }
1836   }
1837 }
1838 
1839 template <typename dst_type, typename src_type>
EmitTruncateFloatToInt(LiftoffAssembler * assm,Register dst,DoubleRegister src,Label * trap)1840 inline bool EmitTruncateFloatToInt(LiftoffAssembler* assm, Register dst,
1841                                    DoubleRegister src, Label* trap) {
1842   if (!CpuFeatures::IsSupported(SSE4_1)) {
1843     __ bailout(kMissingCPUFeature, "no SSE4.1");
1844     return true;
1845   }
1846   CpuFeatureScope feature(assm, SSE4_1);
1847 
1848   DoubleRegister rounded = kScratchDoubleReg;
1849   DoubleRegister converted_back = kScratchDoubleReg2;
1850 
1851   if (std::is_same<double, src_type>::value) {  // f64
1852     __ Roundsd(rounded, src, kRoundToZero);
1853   } else {  // f32
1854     __ Roundss(rounded, src, kRoundToZero);
1855   }
1856   ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
1857                                                converted_back);
1858   if (std::is_same<double, src_type>::value) {  // f64
1859     __ Ucomisd(converted_back, rounded);
1860   } else {  // f32
1861     __ Ucomiss(converted_back, rounded);
1862   }
1863 
1864   // Jump to trap if PF is 0 (one of the operands was NaN) or they are not
1865   // equal.
1866   __ j(parity_even, trap);
1867   __ j(not_equal, trap);
1868   return true;
1869 }
1870 
1871 template <typename dst_type, typename src_type>
EmitSatTruncateFloatToInt(LiftoffAssembler * assm,Register dst,DoubleRegister src)1872 inline bool EmitSatTruncateFloatToInt(LiftoffAssembler* assm, Register dst,
1873                                       DoubleRegister src) {
1874   if (!CpuFeatures::IsSupported(SSE4_1)) {
1875     __ bailout(kMissingCPUFeature, "no SSE4.1");
1876     return true;
1877   }
1878   CpuFeatureScope feature(assm, SSE4_1);
1879 
1880   Label done;
1881   Label not_nan;
1882   Label src_positive;
1883 
1884   DoubleRegister rounded = kScratchDoubleReg;
1885   DoubleRegister converted_back = kScratchDoubleReg2;
1886   DoubleRegister zero_reg = kScratchDoubleReg;
1887 
1888   if (std::is_same<double, src_type>::value) {  // f64
1889     __ Roundsd(rounded, src, kRoundToZero);
1890   } else {  // f32
1891     __ Roundss(rounded, src, kRoundToZero);
1892   }
1893 
1894   ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
1895                                                converted_back);
1896   if (std::is_same<double, src_type>::value) {  // f64
1897     __ Ucomisd(converted_back, rounded);
1898   } else {  // f32
1899     __ Ucomiss(converted_back, rounded);
1900   }
1901 
1902   // Return 0 if PF is 0 (one of the operands was NaN)
1903   __ j(parity_odd, &not_nan);
1904   __ xorl(dst, dst);
1905   __ jmp(&done);
1906 
1907   __ bind(&not_nan);
1908   // If rounding is as expected, return result
1909   __ j(equal, &done);
1910 
1911   __ xorpd(zero_reg, zero_reg);
1912 
1913   // if out-of-bounds, check if src is positive
1914   if (std::is_same<double, src_type>::value) {  // f64
1915     __ Ucomisd(src, zero_reg);
1916   } else {  // f32
1917     __ Ucomiss(src, zero_reg);
1918   }
1919   __ j(above, &src_positive);
1920   if (std::is_same<int32_t, dst_type>::value ||
1921       std::is_same<uint32_t, dst_type>::value) {  // i32
1922     __ movl(
1923         dst,
1924         Immediate(static_cast<int32_t>(std::numeric_limits<dst_type>::min())));
1925   } else if (std::is_same<int64_t, dst_type>::value) {  // i64s
1926     __ movq(dst, Immediate64(std::numeric_limits<dst_type>::min()));
1927   } else {
1928     UNREACHABLE();
1929   }
1930   __ jmp(&done);
1931 
1932   __ bind(&src_positive);
1933   if (std::is_same<int32_t, dst_type>::value ||
1934       std::is_same<uint32_t, dst_type>::value) {  // i32
1935     __ movl(
1936         dst,
1937         Immediate(static_cast<int32_t>(std::numeric_limits<dst_type>::max())));
1938   } else if (std::is_same<int64_t, dst_type>::value) {  // i64s
1939     __ movq(dst, Immediate64(std::numeric_limits<dst_type>::max()));
1940   } else {
1941     UNREACHABLE();
1942   }
1943 
1944   __ bind(&done);
1945   return true;
1946 }
1947 
1948 template <typename src_type>
EmitSatTruncateFloatToUInt64(LiftoffAssembler * assm,Register dst,DoubleRegister src)1949 inline bool EmitSatTruncateFloatToUInt64(LiftoffAssembler* assm, Register dst,
1950                                          DoubleRegister src) {
1951   if (!CpuFeatures::IsSupported(SSE4_1)) {
1952     __ bailout(kMissingCPUFeature, "no SSE4.1");
1953     return true;
1954   }
1955   CpuFeatureScope feature(assm, SSE4_1);
1956 
1957   Label done;
1958   Label neg_or_nan;
1959   Label overflow;
1960 
1961   DoubleRegister zero_reg = kScratchDoubleReg;
1962 
1963   __ xorpd(zero_reg, zero_reg);
1964   if (std::is_same<double, src_type>::value) {  // f64
1965     __ Ucomisd(src, zero_reg);
1966   } else {  // f32
1967     __ Ucomiss(src, zero_reg);
1968   }
1969   // Check if NaN
1970   __ j(parity_even, &neg_or_nan);
1971   __ j(below, &neg_or_nan);
1972   if (std::is_same<double, src_type>::value) {  // f64
1973     __ Cvttsd2uiq(dst, src, &overflow);
1974   } else {  // f32
1975     __ Cvttss2uiq(dst, src, &overflow);
1976   }
1977   __ jmp(&done);
1978 
1979   __ bind(&neg_or_nan);
1980   __ movq(dst, zero_reg);
1981   __ jmp(&done);
1982 
1983   __ bind(&overflow);
1984   __ movq(dst, Immediate64(std::numeric_limits<uint64_t>::max()));
1985   __ bind(&done);
1986   return true;
1987 }
1988 #undef __
1989 }  // namespace liftoff
1990 
emit_type_conversion(WasmOpcode opcode,LiftoffRegister dst,LiftoffRegister src,Label * trap)1991 bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
1992                                             LiftoffRegister dst,
1993                                             LiftoffRegister src, Label* trap) {
1994   switch (opcode) {
1995     case kExprI32ConvertI64:
1996       movl(dst.gp(), src.gp());
1997       return true;
1998     case kExprI32SConvertF32:
1999       return liftoff::EmitTruncateFloatToInt<int32_t, float>(this, dst.gp(),
2000                                                              src.fp(), trap);
2001     case kExprI32UConvertF32:
2002       return liftoff::EmitTruncateFloatToInt<uint32_t, float>(this, dst.gp(),
2003                                                               src.fp(), trap);
2004     case kExprI32SConvertF64:
2005       return liftoff::EmitTruncateFloatToInt<int32_t, double>(this, dst.gp(),
2006                                                               src.fp(), trap);
2007     case kExprI32UConvertF64:
2008       return liftoff::EmitTruncateFloatToInt<uint32_t, double>(this, dst.gp(),
2009                                                                src.fp(), trap);
2010     case kExprI32SConvertSatF32:
2011       return liftoff::EmitSatTruncateFloatToInt<int32_t, float>(this, dst.gp(),
2012                                                                 src.fp());
2013     case kExprI32UConvertSatF32:
2014       return liftoff::EmitSatTruncateFloatToInt<uint32_t, float>(this, dst.gp(),
2015                                                                  src.fp());
2016     case kExprI32SConvertSatF64:
2017       return liftoff::EmitSatTruncateFloatToInt<int32_t, double>(this, dst.gp(),
2018                                                                  src.fp());
2019     case kExprI32UConvertSatF64:
2020       return liftoff::EmitSatTruncateFloatToInt<uint32_t, double>(
2021           this, dst.gp(), src.fp());
2022     case kExprI32ReinterpretF32:
2023       Movd(dst.gp(), src.fp());
2024       return true;
2025     case kExprI64SConvertI32:
2026       movsxlq(dst.gp(), src.gp());
2027       return true;
2028     case kExprI64SConvertF32:
2029       return liftoff::EmitTruncateFloatToInt<int64_t, float>(this, dst.gp(),
2030                                                              src.fp(), trap);
2031     case kExprI64UConvertF32: {
2032       RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2033       Cvttss2uiq(dst.gp(), src.fp(), trap);
2034       return true;
2035     }
2036     case kExprI64SConvertF64:
2037       return liftoff::EmitTruncateFloatToInt<int64_t, double>(this, dst.gp(),
2038                                                               src.fp(), trap);
2039     case kExprI64UConvertF64: {
2040       RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2041       Cvttsd2uiq(dst.gp(), src.fp(), trap);
2042       return true;
2043     }
2044     case kExprI64SConvertSatF32:
2045       return liftoff::EmitSatTruncateFloatToInt<int64_t, float>(this, dst.gp(),
2046                                                                 src.fp());
2047     case kExprI64UConvertSatF32: {
2048       return liftoff::EmitSatTruncateFloatToUInt64<float>(this, dst.gp(),
2049                                                           src.fp());
2050     }
2051     case kExprI64SConvertSatF64:
2052       return liftoff::EmitSatTruncateFloatToInt<int64_t, double>(this, dst.gp(),
2053                                                                  src.fp());
2054     case kExprI64UConvertSatF64: {
2055       return liftoff::EmitSatTruncateFloatToUInt64<double>(this, dst.gp(),
2056                                                            src.fp());
2057     }
2058     case kExprI64UConvertI32:
2059       AssertZeroExtended(src.gp());
2060       if (dst.gp() != src.gp()) movl(dst.gp(), src.gp());
2061       return true;
2062     case kExprI64ReinterpretF64:
2063       Movq(dst.gp(), src.fp());
2064       return true;
2065     case kExprF32SConvertI32:
2066       Cvtlsi2ss(dst.fp(), src.gp());
2067       return true;
2068     case kExprF32UConvertI32:
2069       movl(kScratchRegister, src.gp());
2070       Cvtqsi2ss(dst.fp(), kScratchRegister);
2071       return true;
2072     case kExprF32SConvertI64:
2073       Cvtqsi2ss(dst.fp(), src.gp());
2074       return true;
2075     case kExprF32UConvertI64:
2076       Cvtqui2ss(dst.fp(), src.gp());
2077       return true;
2078     case kExprF32ConvertF64:
2079       Cvtsd2ss(dst.fp(), src.fp());
2080       return true;
2081     case kExprF32ReinterpretI32:
2082       Movd(dst.fp(), src.gp());
2083       return true;
2084     case kExprF64SConvertI32:
2085       Cvtlsi2sd(dst.fp(), src.gp());
2086       return true;
2087     case kExprF64UConvertI32:
2088       movl(kScratchRegister, src.gp());
2089       Cvtqsi2sd(dst.fp(), kScratchRegister);
2090       return true;
2091     case kExprF64SConvertI64:
2092       Cvtqsi2sd(dst.fp(), src.gp());
2093       return true;
2094     case kExprF64UConvertI64:
2095       Cvtqui2sd(dst.fp(), src.gp());
2096       return true;
2097     case kExprF64ConvertF32:
2098       Cvtss2sd(dst.fp(), src.fp());
2099       return true;
2100     case kExprF64ReinterpretI64:
2101       Movq(dst.fp(), src.gp());
2102       return true;
2103     default:
2104       UNREACHABLE();
2105   }
2106 }
2107 
emit_i32_signextend_i8(Register dst,Register src)2108 void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
2109   movsxbl(dst, src);
2110 }
2111 
emit_i32_signextend_i16(Register dst,Register src)2112 void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
2113   movsxwl(dst, src);
2114 }
2115 
emit_i64_signextend_i8(LiftoffRegister dst,LiftoffRegister src)2116 void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
2117                                               LiftoffRegister src) {
2118   movsxbq(dst.gp(), src.gp());
2119 }
2120 
emit_i64_signextend_i16(LiftoffRegister dst,LiftoffRegister src)2121 void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
2122                                                LiftoffRegister src) {
2123   movsxwq(dst.gp(), src.gp());
2124 }
2125 
emit_i64_signextend_i32(LiftoffRegister dst,LiftoffRegister src)2126 void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
2127                                                LiftoffRegister src) {
2128   movsxlq(dst.gp(), src.gp());
2129 }
2130 
emit_jump(Label * label)2131 void LiftoffAssembler::emit_jump(Label* label) { jmp(label); }
2132 
emit_jump(Register target)2133 void LiftoffAssembler::emit_jump(Register target) { jmp(target); }
2134 
emit_cond_jump(LiftoffCondition liftoff_cond,Label * label,ValueKind kind,Register lhs,Register rhs)2135 void LiftoffAssembler::emit_cond_jump(LiftoffCondition liftoff_cond,
2136                                       Label* label, ValueKind kind,
2137                                       Register lhs, Register rhs) {
2138   Condition cond = liftoff::ToCondition(liftoff_cond);
2139   if (rhs != no_reg) {
2140     switch (kind) {
2141       case kI32:
2142         cmpl(lhs, rhs);
2143         break;
2144       case kRef:
2145       case kOptRef:
2146       case kRtt:
2147         DCHECK(liftoff_cond == kEqual || liftoff_cond == kUnequal);
2148         V8_FALLTHROUGH;
2149       case kI64:
2150         cmpq(lhs, rhs);
2151         break;
2152       default:
2153         UNREACHABLE();
2154     }
2155   } else {
2156     DCHECK_EQ(kind, kI32);
2157     testl(lhs, lhs);
2158   }
2159 
2160   j(cond, label);
2161 }
2162 
emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,Label * label,Register lhs,int imm)2163 void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,
2164                                            Label* label, Register lhs,
2165                                            int imm) {
2166   Condition cond = liftoff::ToCondition(liftoff_cond);
2167   cmpl(lhs, Immediate(imm));
2168   j(cond, label);
2169 }
2170 
emit_i32_subi_jump_negative(Register value,int subtrahend,Label * result_negative)2171 void LiftoffAssembler::emit_i32_subi_jump_negative(Register value,
2172                                                    int subtrahend,
2173                                                    Label* result_negative) {
2174   subl(value, Immediate(subtrahend));
2175   j(negative, result_negative);
2176 }
2177 
emit_i32_eqz(Register dst,Register src)2178 void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
2179   testl(src, src);
2180   setcc(equal, dst);
2181   movzxbl(dst, dst);
2182 }
2183 
emit_i32_set_cond(LiftoffCondition liftoff_cond,Register dst,Register lhs,Register rhs)2184 void LiftoffAssembler::emit_i32_set_cond(LiftoffCondition liftoff_cond,
2185                                          Register dst, Register lhs,
2186                                          Register rhs) {
2187   Condition cond = liftoff::ToCondition(liftoff_cond);
2188   cmpl(lhs, rhs);
2189   setcc(cond, dst);
2190   movzxbl(dst, dst);
2191 }
2192 
emit_i64_eqz(Register dst,LiftoffRegister src)2193 void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
2194   testq(src.gp(), src.gp());
2195   setcc(equal, dst);
2196   movzxbl(dst, dst);
2197 }
2198 
emit_i64_set_cond(LiftoffCondition liftoff_cond,Register dst,LiftoffRegister lhs,LiftoffRegister rhs)2199 void LiftoffAssembler::emit_i64_set_cond(LiftoffCondition liftoff_cond,
2200                                          Register dst, LiftoffRegister lhs,
2201                                          LiftoffRegister rhs) {
2202   Condition cond = liftoff::ToCondition(liftoff_cond);
2203   cmpq(lhs.gp(), rhs.gp());
2204   setcc(cond, dst);
2205   movzxbl(dst, dst);
2206 }
2207 
2208 namespace liftoff {
2209 template <void (SharedTurboAssembler::*cmp_op)(DoubleRegister, DoubleRegister)>
EmitFloatSetCond(LiftoffAssembler * assm,Condition cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2210 void EmitFloatSetCond(LiftoffAssembler* assm, Condition cond, Register dst,
2211                       DoubleRegister lhs, DoubleRegister rhs) {
2212   Label cont;
2213   Label not_nan;
2214 
2215   (assm->*cmp_op)(lhs, rhs);
2216   // If PF is one, one of the operands was NaN. This needs special handling.
2217   assm->j(parity_odd, &not_nan, Label::kNear);
2218   // Return 1 for f32.ne, 0 for all other cases.
2219   if (cond == not_equal) {
2220     assm->movl(dst, Immediate(1));
2221   } else {
2222     assm->xorl(dst, dst);
2223   }
2224   assm->jmp(&cont, Label::kNear);
2225   assm->bind(&not_nan);
2226 
2227   assm->setcc(cond, dst);
2228   assm->movzxbl(dst, dst);
2229   assm->bind(&cont);
2230 }
2231 }  // namespace liftoff
2232 
emit_f32_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2233 void LiftoffAssembler::emit_f32_set_cond(LiftoffCondition liftoff_cond,
2234                                          Register dst, DoubleRegister lhs,
2235                                          DoubleRegister rhs) {
2236   Condition cond = liftoff::ToCondition(liftoff_cond);
2237   liftoff::EmitFloatSetCond<&TurboAssembler::Ucomiss>(this, cond, dst, lhs,
2238                                                       rhs);
2239 }
2240 
emit_f64_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2241 void LiftoffAssembler::emit_f64_set_cond(LiftoffCondition liftoff_cond,
2242                                          Register dst, DoubleRegister lhs,
2243                                          DoubleRegister rhs) {
2244   Condition cond = liftoff::ToCondition(liftoff_cond);
2245   liftoff::EmitFloatSetCond<&TurboAssembler::Ucomisd>(this, cond, dst, lhs,
2246                                                       rhs);
2247 }
2248 
emit_select(LiftoffRegister dst,Register condition,LiftoffRegister true_value,LiftoffRegister false_value,ValueKind kind)2249 bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
2250                                    LiftoffRegister true_value,
2251                                    LiftoffRegister false_value,
2252                                    ValueKind kind) {
2253   if (kind != kI32 && kind != kI64) return false;
2254 
2255   testl(condition, condition);
2256 
2257   if (kind == kI32) {
2258     if (dst == false_value) {
2259       cmovl(not_zero, dst.gp(), true_value.gp());
2260     } else {
2261       if (dst != true_value) movl(dst.gp(), true_value.gp());
2262       cmovl(zero, dst.gp(), false_value.gp());
2263     }
2264   } else {
2265     if (dst == false_value) {
2266       cmovq(not_zero, dst.gp(), true_value.gp());
2267     } else {
2268       if (dst != true_value) movq(dst.gp(), true_value.gp());
2269       cmovq(zero, dst.gp(), false_value.gp());
2270     }
2271   }
2272 
2273   return true;
2274 }
2275 
emit_smi_check(Register obj,Label * target,SmiCheckMode mode)2276 void LiftoffAssembler::emit_smi_check(Register obj, Label* target,
2277                                       SmiCheckMode mode) {
2278   testb(obj, Immediate(kSmiTagMask));
2279   Condition condition = mode == kJumpOnSmi ? zero : not_zero;
2280   j(condition, target);
2281 }
2282 
2283 // TODO(fanchenk): Distinguish mov* if data bypass delay matter.
2284 namespace liftoff {
2285 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2286           void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
2287 void EmitSimdCommutativeBinOp(
2288     LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs,
2289     LiftoffRegister rhs, base::Optional<CpuFeature> feature = base::nullopt) {
2290   if (CpuFeatures::IsSupported(AVX)) {
2291     CpuFeatureScope scope(assm, AVX);
2292     (assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
2293     return;
2294   }
2295 
2296   base::Optional<CpuFeatureScope> sse_scope;
2297   if (feature.has_value()) sse_scope.emplace(assm, *feature);
2298 
2299   if (dst.fp() == rhs.fp()) {
2300     (assm->*sse_op)(dst.fp(), lhs.fp());
2301   } else {
2302     if (dst.fp() != lhs.fp()) (assm->movaps)(dst.fp(), lhs.fp());
2303     (assm->*sse_op)(dst.fp(), rhs.fp());
2304   }
2305 }
2306 
2307 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2308           void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
2309 void EmitSimdNonCommutativeBinOp(
2310     LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs,
2311     LiftoffRegister rhs, base::Optional<CpuFeature> feature = base::nullopt) {
2312   if (CpuFeatures::IsSupported(AVX)) {
2313     CpuFeatureScope scope(assm, AVX);
2314     (assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
2315     return;
2316   }
2317 
2318   base::Optional<CpuFeatureScope> sse_scope;
2319   if (feature.has_value()) sse_scope.emplace(assm, *feature);
2320 
2321   if (dst.fp() == rhs.fp()) {
2322     assm->movaps(kScratchDoubleReg, rhs.fp());
2323     assm->movaps(dst.fp(), lhs.fp());
2324     (assm->*sse_op)(dst.fp(), kScratchDoubleReg);
2325   } else {
2326     if (dst.fp() != lhs.fp()) assm->movaps(dst.fp(), lhs.fp());
2327     (assm->*sse_op)(dst.fp(), rhs.fp());
2328   }
2329 }
2330 
2331 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2332           void (Assembler::*sse_op)(XMMRegister, XMMRegister), uint8_t width>
EmitSimdShiftOp(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister operand,LiftoffRegister count)2333 void EmitSimdShiftOp(LiftoffAssembler* assm, LiftoffRegister dst,
2334                      LiftoffRegister operand, LiftoffRegister count) {
2335   constexpr int mask = (1 << width) - 1;
2336   assm->movq(kScratchRegister, count.gp());
2337   assm->andq(kScratchRegister, Immediate(mask));
2338   assm->Movq(kScratchDoubleReg, kScratchRegister);
2339   if (CpuFeatures::IsSupported(AVX)) {
2340     CpuFeatureScope scope(assm, AVX);
2341     (assm->*avx_op)(dst.fp(), operand.fp(), kScratchDoubleReg);
2342   } else {
2343     if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
2344     (assm->*sse_op)(dst.fp(), kScratchDoubleReg);
2345   }
2346 }
2347 
2348 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, byte),
2349           void (Assembler::*sse_op)(XMMRegister, byte), uint8_t width>
EmitSimdShiftOpImm(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister operand,int32_t count)2350 void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst,
2351                         LiftoffRegister operand, int32_t count) {
2352   constexpr int mask = (1 << width) - 1;
2353   byte shift = static_cast<byte>(count & mask);
2354   if (CpuFeatures::IsSupported(AVX)) {
2355     CpuFeatureScope scope(assm, AVX);
2356     (assm->*avx_op)(dst.fp(), operand.fp(), shift);
2357   } else {
2358     if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
2359     (assm->*sse_op)(dst.fp(), shift);
2360   }
2361 }
2362 
EmitAnyTrue(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister src)2363 inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
2364                         LiftoffRegister src) {
2365   assm->xorq(dst.gp(), dst.gp());
2366   assm->Ptest(src.fp(), src.fp());
2367   assm->setcc(not_equal, dst.gp());
2368 }
2369 
2370 template <void (SharedTurboAssembler::*pcmp)(XMMRegister, XMMRegister)>
2371 inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst,
2372                         LiftoffRegister src,
2373                         base::Optional<CpuFeature> feature = base::nullopt) {
2374   base::Optional<CpuFeatureScope> sse_scope;
2375   if (feature.has_value()) sse_scope.emplace(assm, *feature);
2376 
2377   XMMRegister tmp = kScratchDoubleReg;
2378   assm->xorq(dst.gp(), dst.gp());
2379   assm->Pxor(tmp, tmp);
2380   (assm->*pcmp)(tmp, src.fp());
2381   assm->Ptest(tmp, tmp);
2382   assm->setcc(equal, dst.gp());
2383 }
2384 
2385 }  // namespace liftoff
2386 
LoadTransform(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LoadTransformationKind transform,uint32_t * protected_load_pc)2387 void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
2388                                      Register offset_reg, uintptr_t offset_imm,
2389                                      LoadType type,
2390                                      LoadTransformationKind transform,
2391                                      uint32_t* protected_load_pc) {
2392   Operand src_op = liftoff::GetMemOp(this, src_addr, offset_reg, offset_imm);
2393   *protected_load_pc = pc_offset();
2394   MachineType memtype = type.mem_type();
2395   if (transform == LoadTransformationKind::kExtend) {
2396     if (memtype == MachineType::Int8()) {
2397       Pmovsxbw(dst.fp(), src_op);
2398     } else if (memtype == MachineType::Uint8()) {
2399       Pmovzxbw(dst.fp(), src_op);
2400     } else if (memtype == MachineType::Int16()) {
2401       Pmovsxwd(dst.fp(), src_op);
2402     } else if (memtype == MachineType::Uint16()) {
2403       Pmovzxwd(dst.fp(), src_op);
2404     } else if (memtype == MachineType::Int32()) {
2405       Pmovsxdq(dst.fp(), src_op);
2406     } else if (memtype == MachineType::Uint32()) {
2407       Pmovzxdq(dst.fp(), src_op);
2408     }
2409   } else if (transform == LoadTransformationKind::kZeroExtend) {
2410     if (memtype == MachineType::Int32()) {
2411       Movss(dst.fp(), src_op);
2412     } else {
2413       DCHECK_EQ(MachineType::Int64(), memtype);
2414       Movsd(dst.fp(), src_op);
2415     }
2416   } else {
2417     DCHECK_EQ(LoadTransformationKind::kSplat, transform);
2418     if (memtype == MachineType::Int8()) {
2419       S128Load8Splat(dst.fp(), src_op, kScratchDoubleReg);
2420     } else if (memtype == MachineType::Int16()) {
2421       S128Load16Splat(dst.fp(), src_op, kScratchDoubleReg);
2422     } else if (memtype == MachineType::Int32()) {
2423       S128Load32Splat(dst.fp(), src_op);
2424     } else if (memtype == MachineType::Int64()) {
2425       Movddup(dst.fp(), src_op);
2426     }
2427   }
2428 }
2429 
LoadLane(LiftoffRegister dst,LiftoffRegister src,Register addr,Register offset_reg,uintptr_t offset_imm,LoadType type,uint8_t laneidx,uint32_t * protected_load_pc)2430 void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
2431                                 Register addr, Register offset_reg,
2432                                 uintptr_t offset_imm, LoadType type,
2433                                 uint8_t laneidx, uint32_t* protected_load_pc) {
2434   Operand src_op = liftoff::GetMemOp(this, addr, offset_reg, offset_imm);
2435 
2436   MachineType mem_type = type.mem_type();
2437   if (mem_type == MachineType::Int8()) {
2438     Pinsrb(dst.fp(), src.fp(), src_op, laneidx, protected_load_pc);
2439   } else if (mem_type == MachineType::Int16()) {
2440     Pinsrw(dst.fp(), src.fp(), src_op, laneidx, protected_load_pc);
2441   } else if (mem_type == MachineType::Int32()) {
2442     Pinsrd(dst.fp(), src.fp(), src_op, laneidx, protected_load_pc);
2443   } else {
2444     DCHECK_EQ(MachineType::Int64(), mem_type);
2445     Pinsrq(dst.fp(), src.fp(), src_op, laneidx, protected_load_pc);
2446   }
2447 }
2448 
StoreLane(Register dst,Register offset,uintptr_t offset_imm,LiftoffRegister src,StoreType type,uint8_t lane,uint32_t * protected_store_pc)2449 void LiftoffAssembler::StoreLane(Register dst, Register offset,
2450                                  uintptr_t offset_imm, LiftoffRegister src,
2451                                  StoreType type, uint8_t lane,
2452                                  uint32_t* protected_store_pc) {
2453   Operand dst_op = liftoff::GetMemOp(this, dst, offset, offset_imm);
2454   if (protected_store_pc) *protected_store_pc = pc_offset();
2455   MachineRepresentation rep = type.mem_rep();
2456   if (rep == MachineRepresentation::kWord8) {
2457     Pextrb(dst_op, src.fp(), lane);
2458   } else if (rep == MachineRepresentation::kWord16) {
2459     Pextrw(dst_op, src.fp(), lane);
2460   } else if (rep == MachineRepresentation::kWord32) {
2461     S128Store32Lane(dst_op, src.fp(), lane);
2462   } else {
2463     DCHECK_EQ(MachineRepresentation::kWord64, rep);
2464     S128Store64Lane(dst_op, src.fp(), lane);
2465   }
2466 }
2467 
emit_i8x16_shuffle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,const uint8_t shuffle[16],bool is_swizzle)2468 void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
2469                                           LiftoffRegister lhs,
2470                                           LiftoffRegister rhs,
2471                                           const uint8_t shuffle[16],
2472                                           bool is_swizzle) {
2473   if (is_swizzle) {
2474     uint32_t imms[4];
2475     // Shuffles that use just 1 operand are called swizzles, rhs can be ignored.
2476     wasm::SimdShuffle::Pack16Lanes(imms, shuffle);
2477     TurboAssembler::Move(kScratchDoubleReg, make_uint64(imms[3], imms[2]),
2478                          make_uint64(imms[1], imms[0]));
2479     Pshufb(dst.fp(), lhs.fp(), kScratchDoubleReg);
2480     return;
2481   }
2482 
2483   uint64_t mask1[2] = {};
2484   for (int i = 15; i >= 0; i--) {
2485     uint8_t lane = shuffle[i];
2486     int j = i >> 3;
2487     mask1[j] <<= 8;
2488     mask1[j] |= lane < kSimd128Size ? lane : 0x80;
2489   }
2490   TurboAssembler::Move(liftoff::kScratchDoubleReg2, mask1[1], mask1[0]);
2491   Pshufb(kScratchDoubleReg, lhs.fp(), liftoff::kScratchDoubleReg2);
2492 
2493   uint64_t mask2[2] = {};
2494   for (int i = 15; i >= 0; i--) {
2495     uint8_t lane = shuffle[i];
2496     int j = i >> 3;
2497     mask2[j] <<= 8;
2498     mask2[j] |= lane >= kSimd128Size ? (lane & 0x0F) : 0x80;
2499   }
2500   TurboAssembler::Move(liftoff::kScratchDoubleReg2, mask2[1], mask2[0]);
2501 
2502   Pshufb(dst.fp(), rhs.fp(), liftoff::kScratchDoubleReg2);
2503   Por(dst.fp(), kScratchDoubleReg);
2504 }
2505 
emit_i8x16_swizzle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2506 void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
2507                                           LiftoffRegister lhs,
2508                                           LiftoffRegister rhs) {
2509   I8x16Swizzle(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg,
2510                kScratchRegister);
2511 }
2512 
emit_i8x16_popcnt(LiftoffRegister dst,LiftoffRegister src)2513 void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
2514                                          LiftoffRegister src) {
2515   I8x16Popcnt(dst.fp(), src.fp(), kScratchDoubleReg,
2516               liftoff::kScratchDoubleReg2, kScratchRegister);
2517 }
2518 
emit_i8x16_splat(LiftoffRegister dst,LiftoffRegister src)2519 void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
2520                                         LiftoffRegister src) {
2521   I8x16Splat(dst.fp(), src.gp(), kScratchDoubleReg);
2522 }
2523 
emit_i16x8_splat(LiftoffRegister dst,LiftoffRegister src)2524 void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
2525                                         LiftoffRegister src) {
2526   I16x8Splat(dst.fp(), src.gp());
2527 }
2528 
emit_i32x4_splat(LiftoffRegister dst,LiftoffRegister src)2529 void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
2530                                         LiftoffRegister src) {
2531   Movd(dst.fp(), src.gp());
2532   Pshufd(dst.fp(), dst.fp(), static_cast<uint8_t>(0));
2533 }
2534 
emit_i64x2_splat(LiftoffRegister dst,LiftoffRegister src)2535 void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
2536                                         LiftoffRegister src) {
2537   Movq(dst.fp(), src.gp());
2538   Movddup(dst.fp(), dst.fp());
2539 }
2540 
emit_f32x4_splat(LiftoffRegister dst,LiftoffRegister src)2541 void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
2542                                         LiftoffRegister src) {
2543   F32x4Splat(dst.fp(), src.fp());
2544 }
2545 
emit_f64x2_splat(LiftoffRegister dst,LiftoffRegister src)2546 void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
2547                                         LiftoffRegister src) {
2548   Movddup(dst.fp(), src.fp());
2549 }
2550 
emit_i8x16_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2551 void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
2552                                      LiftoffRegister rhs) {
2553   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
2554       this, dst, lhs, rhs);
2555 }
2556 
emit_i8x16_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2557 void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
2558                                      LiftoffRegister rhs) {
2559   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
2560       this, dst, lhs, rhs);
2561   Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2562   Pxor(dst.fp(), kScratchDoubleReg);
2563 }
2564 
emit_i8x16_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2565 void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2566                                        LiftoffRegister rhs) {
2567   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtb,
2568                                        &Assembler::pcmpgtb>(this, dst, lhs,
2569                                                             rhs);
2570 }
2571 
emit_i8x16_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2572 void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2573                                        LiftoffRegister rhs) {
2574   DoubleRegister ref = rhs.fp();
2575   if (dst == rhs) {
2576     Movaps(kScratchDoubleReg, rhs.fp());
2577     ref = kScratchDoubleReg;
2578   }
2579   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
2580       this, dst, lhs, rhs, SSE4_1);
2581   Pcmpeqb(dst.fp(), ref);
2582   Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2583   Pxor(dst.fp(), kScratchDoubleReg);
2584 }
2585 
emit_i8x16_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2586 void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2587                                        LiftoffRegister rhs) {
2588   DoubleRegister ref = rhs.fp();
2589   if (dst == rhs) {
2590     Movaps(kScratchDoubleReg, rhs.fp());
2591     ref = kScratchDoubleReg;
2592   }
2593   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
2594       this, dst, lhs, rhs, SSE4_1);
2595   Pcmpeqb(dst.fp(), ref);
2596 }
2597 
emit_i8x16_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2598 void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2599                                        LiftoffRegister rhs) {
2600   DoubleRegister ref = rhs.fp();
2601   if (dst == rhs) {
2602     Movaps(kScratchDoubleReg, rhs.fp());
2603     ref = kScratchDoubleReg;
2604   }
2605   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
2606       this, dst, lhs, rhs);
2607   Pcmpeqb(dst.fp(), ref);
2608 }
2609 
emit_i16x8_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2610 void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
2611                                      LiftoffRegister rhs) {
2612   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
2613       this, dst, lhs, rhs);
2614 }
2615 
emit_i16x8_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2616 void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
2617                                      LiftoffRegister rhs) {
2618   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
2619       this, dst, lhs, rhs);
2620   Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2621   Pxor(dst.fp(), kScratchDoubleReg);
2622 }
2623 
emit_i16x8_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2624 void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2625                                        LiftoffRegister rhs) {
2626   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtw,
2627                                        &Assembler::pcmpgtw>(this, dst, lhs,
2628                                                             rhs);
2629 }
2630 
emit_i16x8_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2631 void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2632                                        LiftoffRegister rhs) {
2633   DoubleRegister ref = rhs.fp();
2634   if (dst == rhs) {
2635     Movaps(kScratchDoubleReg, rhs.fp());
2636     ref = kScratchDoubleReg;
2637   }
2638   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
2639       this, dst, lhs, rhs);
2640   Pcmpeqw(dst.fp(), ref);
2641   Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2642   Pxor(dst.fp(), kScratchDoubleReg);
2643 }
2644 
emit_i16x8_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2645 void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2646                                        LiftoffRegister rhs) {
2647   DoubleRegister ref = rhs.fp();
2648   if (dst == rhs) {
2649     Movaps(kScratchDoubleReg, rhs.fp());
2650     ref = kScratchDoubleReg;
2651   }
2652   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
2653       this, dst, lhs, rhs);
2654   Pcmpeqw(dst.fp(), ref);
2655 }
2656 
emit_i16x8_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2657 void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2658                                        LiftoffRegister rhs) {
2659   DoubleRegister ref = rhs.fp();
2660   if (dst == rhs) {
2661     Movaps(kScratchDoubleReg, rhs.fp());
2662     ref = kScratchDoubleReg;
2663   }
2664   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
2665       this, dst, lhs, rhs, SSE4_1);
2666   Pcmpeqw(dst.fp(), ref);
2667 }
2668 
emit_i32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2669 void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
2670                                      LiftoffRegister rhs) {
2671   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
2672       this, dst, lhs, rhs);
2673 }
2674 
emit_i32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2675 void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
2676                                      LiftoffRegister rhs) {
2677   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
2678       this, dst, lhs, rhs);
2679   Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2680   Pxor(dst.fp(), kScratchDoubleReg);
2681 }
2682 
emit_i32x4_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2683 void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2684                                        LiftoffRegister rhs) {
2685   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtd,
2686                                        &Assembler::pcmpgtd>(this, dst, lhs,
2687                                                             rhs);
2688 }
2689 
emit_i32x4_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2690 void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2691                                        LiftoffRegister rhs) {
2692   DoubleRegister ref = rhs.fp();
2693   if (dst == rhs) {
2694     Movaps(kScratchDoubleReg, rhs.fp());
2695     ref = kScratchDoubleReg;
2696   }
2697   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
2698       this, dst, lhs, rhs, SSE4_1);
2699   Pcmpeqd(dst.fp(), ref);
2700   Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2701   Pxor(dst.fp(), kScratchDoubleReg);
2702 }
2703 
emit_i32x4_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2704 void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2705                                        LiftoffRegister rhs) {
2706   DoubleRegister ref = rhs.fp();
2707   if (dst == rhs) {
2708     Movaps(kScratchDoubleReg, rhs.fp());
2709     ref = kScratchDoubleReg;
2710   }
2711   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
2712       this, dst, lhs, rhs, SSE4_1);
2713   Pcmpeqd(dst.fp(), ref);
2714 }
2715 
emit_i32x4_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2716 void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2717                                        LiftoffRegister rhs) {
2718   DoubleRegister ref = rhs.fp();
2719   if (dst == rhs) {
2720     Movaps(kScratchDoubleReg, rhs.fp());
2721     ref = kScratchDoubleReg;
2722   }
2723   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
2724       this, dst, lhs, rhs, SSE4_1);
2725   Pcmpeqd(dst.fp(), ref);
2726 }
2727 
emit_i64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2728 void LiftoffAssembler::emit_i64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
2729                                      LiftoffRegister rhs) {
2730   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqq, &Assembler::pcmpeqq>(
2731       this, dst, lhs, rhs, SSE4_1);
2732 }
2733 
emit_i64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2734 void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
2735                                      LiftoffRegister rhs) {
2736   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqq, &Assembler::pcmpeqq>(
2737       this, dst, lhs, rhs, SSE4_1);
2738   Pcmpeqq(kScratchDoubleReg, kScratchDoubleReg);
2739   Pxor(dst.fp(), kScratchDoubleReg);
2740 }
2741 
emit_i64x2_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2742 void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2743                                        LiftoffRegister rhs) {
2744   // Different register alias requirements depending on CpuFeatures supported:
2745   if (CpuFeatures::IsSupported(AVX) || CpuFeatures::IsSupported(SSE4_2)) {
2746     // 1. AVX, or SSE4_2 no requirements (I64x2GtS takes care of aliasing).
2747     I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2748   } else {
2749     // 2. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
2750     if (dst == lhs || dst == rhs) {
2751       I64x2GtS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp(),
2752                kScratchDoubleReg);
2753       movaps(dst.fp(), liftoff::kScratchDoubleReg2);
2754     } else {
2755       I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2756     }
2757   }
2758 }
2759 
emit_i64x2_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2760 void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2761                                        LiftoffRegister rhs) {
2762   // Different register alias requirements depending on CpuFeatures supported:
2763   if (CpuFeatures::IsSupported(AVX)) {
2764     // 1. AVX, no requirements.
2765     I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2766   } else if (CpuFeatures::IsSupported(SSE4_2)) {
2767     // 2. SSE4_2, dst != lhs.
2768     if (dst == lhs) {
2769       I64x2GeS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp(),
2770                kScratchDoubleReg);
2771       movaps(dst.fp(), liftoff::kScratchDoubleReg2);
2772     } else {
2773       I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2774     }
2775   } else {
2776     // 3. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
2777     if (dst == lhs || dst == rhs) {
2778       I64x2GeS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp(),
2779                kScratchDoubleReg);
2780       movaps(dst.fp(), liftoff::kScratchDoubleReg2);
2781     } else {
2782       I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
2783     }
2784   }
2785 }
2786 
emit_f32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2787 void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
2788                                      LiftoffRegister rhs) {
2789   liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqps, &Assembler::cmpeqps>(
2790       this, dst, lhs, rhs);
2791 }
2792 
emit_f32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2793 void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
2794                                      LiftoffRegister rhs) {
2795   liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqps,
2796                                     &Assembler::cmpneqps>(this, dst, lhs, rhs);
2797 }
2798 
emit_f32x4_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2799 void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
2800                                      LiftoffRegister rhs) {
2801   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltps,
2802                                        &Assembler::cmpltps>(this, dst, lhs,
2803                                                             rhs);
2804 }
2805 
emit_f32x4_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2806 void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
2807                                      LiftoffRegister rhs) {
2808   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpleps,
2809                                        &Assembler::cmpleps>(this, dst, lhs,
2810                                                             rhs);
2811 }
2812 
emit_f64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2813 void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
2814                                      LiftoffRegister rhs) {
2815   liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqpd, &Assembler::cmpeqpd>(
2816       this, dst, lhs, rhs);
2817 }
2818 
emit_f64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2819 void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
2820                                      LiftoffRegister rhs) {
2821   liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqpd,
2822                                     &Assembler::cmpneqpd>(this, dst, lhs, rhs);
2823 }
2824 
emit_f64x2_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2825 void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
2826                                      LiftoffRegister rhs) {
2827   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltpd,
2828                                        &Assembler::cmpltpd>(this, dst, lhs,
2829                                                             rhs);
2830 }
2831 
emit_f64x2_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2832 void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
2833                                      LiftoffRegister rhs) {
2834   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmplepd,
2835                                        &Assembler::cmplepd>(this, dst, lhs,
2836                                                             rhs);
2837 }
2838 
emit_s128_const(LiftoffRegister dst,const uint8_t imms[16])2839 void LiftoffAssembler::emit_s128_const(LiftoffRegister dst,
2840                                        const uint8_t imms[16]) {
2841   uint64_t vals[2];
2842   memcpy(vals, imms, sizeof(vals));
2843   TurboAssembler::Move(dst.fp(), vals[1], vals[0]);
2844 }
2845 
emit_s128_not(LiftoffRegister dst,LiftoffRegister src)2846 void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
2847   S128Not(dst.fp(), src.fp(), kScratchDoubleReg);
2848 }
2849 
emit_s128_and(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2850 void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
2851                                      LiftoffRegister rhs) {
2852   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpand, &Assembler::pand>(
2853       this, dst, lhs, rhs);
2854 }
2855 
emit_s128_or(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2856 void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
2857                                     LiftoffRegister rhs) {
2858   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpor, &Assembler::por>(
2859       this, dst, lhs, rhs);
2860 }
2861 
emit_s128_xor(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2862 void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
2863                                      LiftoffRegister rhs) {
2864   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpxor, &Assembler::pxor>(
2865       this, dst, lhs, rhs);
2866 }
2867 
emit_s128_select(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,LiftoffRegister mask)2868 void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
2869                                         LiftoffRegister src1,
2870                                         LiftoffRegister src2,
2871                                         LiftoffRegister mask) {
2872   // Ensure that we don't overwrite any inputs with the movaps below.
2873   DCHECK_NE(dst, src1);
2874   DCHECK_NE(dst, src2);
2875   if (!CpuFeatures::IsSupported(AVX) && dst != mask) {
2876     movaps(dst.fp(), mask.fp());
2877     S128Select(dst.fp(), dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
2878   } else {
2879     S128Select(dst.fp(), mask.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
2880   }
2881 }
2882 
emit_i8x16_neg(LiftoffRegister dst,LiftoffRegister src)2883 void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
2884                                       LiftoffRegister src) {
2885   if (dst.fp() == src.fp()) {
2886     Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2887     Psignb(dst.fp(), kScratchDoubleReg);
2888   } else {
2889     Pxor(dst.fp(), dst.fp());
2890     Psubb(dst.fp(), src.fp());
2891   }
2892 }
2893 
emit_v128_anytrue(LiftoffRegister dst,LiftoffRegister src)2894 void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst,
2895                                          LiftoffRegister src) {
2896   liftoff::EmitAnyTrue(this, dst, src);
2897 }
2898 
emit_i8x16_alltrue(LiftoffRegister dst,LiftoffRegister src)2899 void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst,
2900                                           LiftoffRegister src) {
2901   liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqb>(this, dst, src);
2902 }
2903 
emit_i8x16_bitmask(LiftoffRegister dst,LiftoffRegister src)2904 void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
2905                                           LiftoffRegister src) {
2906   Pmovmskb(dst.gp(), src.fp());
2907 }
2908 
emit_i8x16_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2909 void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
2910                                       LiftoffRegister rhs) {
2911   I8x16Shl(dst.fp(), lhs.fp(), rhs.gp(), kScratchRegister, kScratchDoubleReg,
2912            liftoff::kScratchDoubleReg2);
2913 }
2914 
emit_i8x16_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2915 void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
2916                                        int32_t rhs) {
2917   I8x16Shl(dst.fp(), lhs.fp(), rhs, kScratchRegister, kScratchDoubleReg);
2918 }
2919 
emit_i8x16_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2920 void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
2921                                         LiftoffRegister lhs,
2922                                         LiftoffRegister rhs) {
2923   I8x16ShrS(dst.fp(), lhs.fp(), rhs.gp(), kScratchRegister, kScratchDoubleReg,
2924             liftoff::kScratchDoubleReg2);
2925 }
2926 
emit_i8x16_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2927 void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
2928                                          LiftoffRegister lhs, int32_t rhs) {
2929   I8x16ShrS(dst.fp(), lhs.fp(), rhs, kScratchDoubleReg);
2930 }
2931 
emit_i8x16_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2932 void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
2933                                         LiftoffRegister lhs,
2934                                         LiftoffRegister rhs) {
2935   I8x16ShrU(dst.fp(), lhs.fp(), rhs.gp(), kScratchRegister, kScratchDoubleReg,
2936             liftoff::kScratchDoubleReg2);
2937 }
2938 
emit_i8x16_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2939 void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
2940                                          LiftoffRegister lhs, int32_t rhs) {
2941   I8x16ShrU(dst.fp(), lhs.fp(), rhs, kScratchRegister, kScratchDoubleReg);
2942 }
2943 
emit_i8x16_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2944 void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
2945                                       LiftoffRegister rhs) {
2946   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
2947       this, dst, lhs, rhs);
2948 }
2949 
emit_i8x16_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2950 void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
2951                                             LiftoffRegister lhs,
2952                                             LiftoffRegister rhs) {
2953   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsb, &Assembler::paddsb>(
2954       this, dst, lhs, rhs);
2955 }
2956 
emit_i8x16_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2957 void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst,
2958                                             LiftoffRegister lhs,
2959                                             LiftoffRegister rhs) {
2960   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusb, &Assembler::paddusb>(
2961       this, dst, lhs, rhs);
2962 }
2963 
emit_i8x16_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2964 void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
2965                                       LiftoffRegister rhs) {
2966   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubb, &Assembler::psubb>(
2967       this, dst, lhs, rhs);
2968 }
2969 
emit_i8x16_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2970 void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
2971                                             LiftoffRegister lhs,
2972                                             LiftoffRegister rhs) {
2973   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsb, &Assembler::psubsb>(
2974       this, dst, lhs, rhs);
2975 }
2976 
emit_i8x16_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2977 void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst,
2978                                             LiftoffRegister lhs,
2979                                             LiftoffRegister rhs) {
2980   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusb,
2981                                        &Assembler::psubusb>(this, dst, lhs,
2982                                                             rhs);
2983 }
2984 
emit_i8x16_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2985 void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
2986                                         LiftoffRegister lhs,
2987                                         LiftoffRegister rhs) {
2988   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
2989       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
2990 }
2991 
emit_i8x16_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2992 void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
2993                                         LiftoffRegister lhs,
2994                                         LiftoffRegister rhs) {
2995   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
2996       this, dst, lhs, rhs);
2997 }
2998 
emit_i8x16_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2999 void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
3000                                         LiftoffRegister lhs,
3001                                         LiftoffRegister rhs) {
3002   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsb, &Assembler::pmaxsb>(
3003       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3004 }
3005 
emit_i8x16_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3006 void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
3007                                         LiftoffRegister lhs,
3008                                         LiftoffRegister rhs) {
3009   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
3010       this, dst, lhs, rhs);
3011 }
3012 
emit_i16x8_neg(LiftoffRegister dst,LiftoffRegister src)3013 void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
3014                                       LiftoffRegister src) {
3015   if (dst.fp() == src.fp()) {
3016     Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3017     Psignw(dst.fp(), kScratchDoubleReg);
3018   } else {
3019     Pxor(dst.fp(), dst.fp());
3020     Psubw(dst.fp(), src.fp());
3021   }
3022 }
3023 
emit_i16x8_alltrue(LiftoffRegister dst,LiftoffRegister src)3024 void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
3025                                           LiftoffRegister src) {
3026   liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqw>(this, dst, src);
3027 }
3028 
emit_i16x8_bitmask(LiftoffRegister dst,LiftoffRegister src)3029 void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
3030                                           LiftoffRegister src) {
3031   XMMRegister tmp = kScratchDoubleReg;
3032   Packsswb(tmp, src.fp());
3033   Pmovmskb(dst.gp(), tmp);
3034   shrq(dst.gp(), Immediate(8));
3035 }
3036 
emit_i16x8_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3037 void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
3038                                       LiftoffRegister rhs) {
3039   liftoff::EmitSimdShiftOp<&Assembler::vpsllw, &Assembler::psllw, 4>(this, dst,
3040                                                                      lhs, rhs);
3041 }
3042 
emit_i16x8_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3043 void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
3044                                        int32_t rhs) {
3045   liftoff::EmitSimdShiftOpImm<&Assembler::vpsllw, &Assembler::psllw, 4>(
3046       this, dst, lhs, rhs);
3047 }
3048 
emit_i16x8_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3049 void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
3050                                         LiftoffRegister lhs,
3051                                         LiftoffRegister rhs) {
3052   liftoff::EmitSimdShiftOp<&Assembler::vpsraw, &Assembler::psraw, 4>(this, dst,
3053                                                                      lhs, rhs);
3054 }
3055 
emit_i16x8_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3056 void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
3057                                          LiftoffRegister lhs, int32_t rhs) {
3058   liftoff::EmitSimdShiftOpImm<&Assembler::vpsraw, &Assembler::psraw, 4>(
3059       this, dst, lhs, rhs);
3060 }
3061 
emit_i16x8_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3062 void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
3063                                         LiftoffRegister lhs,
3064                                         LiftoffRegister rhs) {
3065   liftoff::EmitSimdShiftOp<&Assembler::vpsrlw, &Assembler::psrlw, 4>(this, dst,
3066                                                                      lhs, rhs);
3067 }
3068 
emit_i16x8_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3069 void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
3070                                          LiftoffRegister lhs, int32_t rhs) {
3071   liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlw, &Assembler::psrlw, 4>(
3072       this, dst, lhs, rhs);
3073 }
3074 
emit_i16x8_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3075 void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
3076                                       LiftoffRegister rhs) {
3077   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
3078       this, dst, lhs, rhs);
3079 }
3080 
emit_i16x8_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3081 void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
3082                                             LiftoffRegister lhs,
3083                                             LiftoffRegister rhs) {
3084   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsw, &Assembler::paddsw>(
3085       this, dst, lhs, rhs);
3086 }
3087 
emit_i16x8_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3088 void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
3089                                             LiftoffRegister lhs,
3090                                             LiftoffRegister rhs) {
3091   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusw, &Assembler::paddusw>(
3092       this, dst, lhs, rhs);
3093 }
3094 
emit_i16x8_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3095 void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
3096                                       LiftoffRegister rhs) {
3097   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubw, &Assembler::psubw>(
3098       this, dst, lhs, rhs);
3099 }
3100 
emit_i16x8_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3101 void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
3102                                             LiftoffRegister lhs,
3103                                             LiftoffRegister rhs) {
3104   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsw, &Assembler::psubsw>(
3105       this, dst, lhs, rhs);
3106 }
3107 
emit_i16x8_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3108 void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
3109                                             LiftoffRegister lhs,
3110                                             LiftoffRegister rhs) {
3111   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusw,
3112                                        &Assembler::psubusw>(this, dst, lhs,
3113                                                             rhs);
3114 }
3115 
emit_i16x8_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3116 void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
3117                                       LiftoffRegister rhs) {
3118   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmullw, &Assembler::pmullw>(
3119       this, dst, lhs, rhs);
3120 }
3121 
emit_i16x8_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3122 void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
3123                                         LiftoffRegister lhs,
3124                                         LiftoffRegister rhs) {
3125   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
3126       this, dst, lhs, rhs);
3127 }
3128 
emit_i16x8_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3129 void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
3130                                         LiftoffRegister lhs,
3131                                         LiftoffRegister rhs) {
3132   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
3133       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3134 }
3135 
emit_i16x8_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3136 void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
3137                                         LiftoffRegister lhs,
3138                                         LiftoffRegister rhs) {
3139   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsw, &Assembler::pmaxsw>(
3140       this, dst, lhs, rhs);
3141 }
3142 
emit_i16x8_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3143 void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
3144                                         LiftoffRegister lhs,
3145                                         LiftoffRegister rhs) {
3146   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
3147       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3148 }
3149 
emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,LiftoffRegister src)3150 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
3151                                                           LiftoffRegister src) {
3152   I16x8ExtAddPairwiseI8x16S(dst.fp(), src.fp(), kScratchDoubleReg,
3153                             kScratchRegister);
3154 }
3155 
emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,LiftoffRegister src)3156 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
3157                                                           LiftoffRegister src) {
3158   I16x8ExtAddPairwiseI8x16U(dst.fp(), src.fp(), kScratchRegister);
3159 }
3160 
emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3161 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
3162                                                      LiftoffRegister src1,
3163                                                      LiftoffRegister src2) {
3164   I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg,
3165                  /*is_signed=*/true);
3166 }
3167 
emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3168 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
3169                                                      LiftoffRegister src1,
3170                                                      LiftoffRegister src2) {
3171   I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg,
3172                  /*is_signed=*/false);
3173 }
3174 
emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3175 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
3176                                                       LiftoffRegister src1,
3177                                                       LiftoffRegister src2) {
3178   I16x8ExtMulHighS(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
3179 }
3180 
emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3181 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
3182                                                       LiftoffRegister src1,
3183                                                       LiftoffRegister src2) {
3184   I16x8ExtMulHighU(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
3185 }
3186 
emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3187 void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
3188                                                 LiftoffRegister src1,
3189                                                 LiftoffRegister src2) {
3190   I16x8Q15MulRSatS(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg);
3191 }
3192 
emit_i32x4_neg(LiftoffRegister dst,LiftoffRegister src)3193 void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
3194                                       LiftoffRegister src) {
3195   if (dst.fp() == src.fp()) {
3196     Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3197     Psignd(dst.fp(), kScratchDoubleReg);
3198   } else {
3199     Pxor(dst.fp(), dst.fp());
3200     Psubd(dst.fp(), src.fp());
3201   }
3202 }
3203 
emit_i32x4_alltrue(LiftoffRegister dst,LiftoffRegister src)3204 void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
3205                                           LiftoffRegister src) {
3206   liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqd>(this, dst, src);
3207 }
3208 
emit_i32x4_bitmask(LiftoffRegister dst,LiftoffRegister src)3209 void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
3210                                           LiftoffRegister src) {
3211   Movmskps(dst.gp(), src.fp());
3212 }
3213 
emit_i32x4_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3214 void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
3215                                       LiftoffRegister rhs) {
3216   liftoff::EmitSimdShiftOp<&Assembler::vpslld, &Assembler::pslld, 5>(this, dst,
3217                                                                      lhs, rhs);
3218 }
3219 
emit_i32x4_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3220 void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
3221                                        int32_t rhs) {
3222   liftoff::EmitSimdShiftOpImm<&Assembler::vpslld, &Assembler::pslld, 5>(
3223       this, dst, lhs, rhs);
3224 }
3225 
emit_i32x4_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3226 void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
3227                                         LiftoffRegister lhs,
3228                                         LiftoffRegister rhs) {
3229   liftoff::EmitSimdShiftOp<&Assembler::vpsrad, &Assembler::psrad, 5>(this, dst,
3230                                                                      lhs, rhs);
3231 }
3232 
emit_i32x4_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3233 void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
3234                                          LiftoffRegister lhs, int32_t rhs) {
3235   liftoff::EmitSimdShiftOpImm<&Assembler::vpsrad, &Assembler::psrad, 5>(
3236       this, dst, lhs, rhs);
3237 }
3238 
emit_i32x4_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3239 void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
3240                                         LiftoffRegister lhs,
3241                                         LiftoffRegister rhs) {
3242   liftoff::EmitSimdShiftOp<&Assembler::vpsrld, &Assembler::psrld, 5>(this, dst,
3243                                                                      lhs, rhs);
3244 }
3245 
emit_i32x4_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3246 void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
3247                                          LiftoffRegister lhs, int32_t rhs) {
3248   liftoff::EmitSimdShiftOpImm<&Assembler::vpsrld, &Assembler::psrld, 5>(
3249       this, dst, lhs, rhs);
3250 }
3251 
emit_i32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3252 void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3253                                       LiftoffRegister rhs) {
3254   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>(
3255       this, dst, lhs, rhs);
3256 }
3257 
emit_i32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3258 void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3259                                       LiftoffRegister rhs) {
3260   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubd, &Assembler::psubd>(
3261       this, dst, lhs, rhs);
3262 }
3263 
emit_i32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3264 void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3265                                       LiftoffRegister rhs) {
3266   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmulld, &Assembler::pmulld>(
3267       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3268 }
3269 
emit_i32x4_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3270 void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
3271                                         LiftoffRegister lhs,
3272                                         LiftoffRegister rhs) {
3273   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
3274       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3275 }
3276 
emit_i32x4_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3277 void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
3278                                         LiftoffRegister lhs,
3279                                         LiftoffRegister rhs) {
3280   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
3281       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3282 }
3283 
emit_i32x4_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3284 void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
3285                                         LiftoffRegister lhs,
3286                                         LiftoffRegister rhs) {
3287   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsd, &Assembler::pmaxsd>(
3288       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3289 }
3290 
emit_i32x4_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3291 void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
3292                                         LiftoffRegister lhs,
3293                                         LiftoffRegister rhs) {
3294   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
3295       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3296 }
3297 
emit_i32x4_dot_i16x8_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3298 void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
3299                                               LiftoffRegister lhs,
3300                                               LiftoffRegister rhs) {
3301   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaddwd, &Assembler::pmaddwd>(
3302       this, dst, lhs, rhs);
3303 }
3304 
emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,LiftoffRegister src)3305 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
3306                                                           LiftoffRegister src) {
3307   I32x4ExtAddPairwiseI16x8S(dst.fp(), src.fp(), kScratchRegister);
3308 }
3309 
emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,LiftoffRegister src)3310 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
3311                                                           LiftoffRegister src) {
3312   I32x4ExtAddPairwiseI16x8U(dst.fp(), src.fp(), kScratchDoubleReg);
3313 }
3314 
3315 namespace liftoff {
3316 // Helper function to check for register aliasing, AVX support, and moves
3317 // registers around before calling the actual macro-assembler function.
I32x4ExtMulHelper(LiftoffAssembler * assm,XMMRegister dst,XMMRegister src1,XMMRegister src2,bool low,bool is_signed)3318 inline void I32x4ExtMulHelper(LiftoffAssembler* assm, XMMRegister dst,
3319                               XMMRegister src1, XMMRegister src2, bool low,
3320                               bool is_signed) {
3321   // I32x4ExtMul requires dst == src1 if AVX is not supported.
3322   if (CpuFeatures::IsSupported(AVX) || dst == src1) {
3323     assm->I32x4ExtMul(dst, src1, src2, kScratchDoubleReg, low, is_signed);
3324   } else if (dst != src2) {
3325     // dst != src1 && dst != src2
3326     assm->movaps(dst, src1);
3327     assm->I32x4ExtMul(dst, dst, src2, kScratchDoubleReg, low, is_signed);
3328   } else {
3329     // dst == src2
3330     // Extended multiplication is commutative,
3331     assm->movaps(dst, src2);
3332     assm->I32x4ExtMul(dst, dst, src1, kScratchDoubleReg, low, is_signed);
3333   }
3334 }
3335 }  // namespace liftoff
3336 
emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3337 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
3338                                                      LiftoffRegister src1,
3339                                                      LiftoffRegister src2) {
3340   liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(), /*low=*/true,
3341                              /*is_signed=*/true);
3342 }
3343 
emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3344 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
3345                                                      LiftoffRegister src1,
3346                                                      LiftoffRegister src2) {
3347   liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(), /*low=*/true,
3348                              /*is_signed=*/false);
3349 }
3350 
emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3351 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
3352                                                       LiftoffRegister src1,
3353                                                       LiftoffRegister src2) {
3354   liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(),
3355                              /*low=*/false,
3356                              /*is_signed=*/true);
3357 }
3358 
emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3359 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
3360                                                       LiftoffRegister src1,
3361                                                       LiftoffRegister src2) {
3362   liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(),
3363                              /*low=*/false,
3364                              /*is_signed=*/false);
3365 }
3366 
emit_i64x2_neg(LiftoffRegister dst,LiftoffRegister src)3367 void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
3368                                       LiftoffRegister src) {
3369   I64x2Neg(dst.fp(), src.fp(), kScratchDoubleReg);
3370 }
3371 
emit_i64x2_alltrue(LiftoffRegister dst,LiftoffRegister src)3372 void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst,
3373                                           LiftoffRegister src) {
3374   liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqq>(this, dst, src, SSE4_1);
3375 }
3376 
emit_i64x2_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3377 void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
3378                                       LiftoffRegister rhs) {
3379   liftoff::EmitSimdShiftOp<&Assembler::vpsllq, &Assembler::psllq, 6>(this, dst,
3380                                                                      lhs, rhs);
3381 }
3382 
emit_i64x2_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3383 void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
3384                                        int32_t rhs) {
3385   liftoff::EmitSimdShiftOpImm<&Assembler::vpsllq, &Assembler::psllq, 6>(
3386       this, dst, lhs, rhs);
3387 }
3388 
emit_i64x2_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3389 void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
3390                                         LiftoffRegister lhs,
3391                                         LiftoffRegister rhs) {
3392   I64x2ShrS(dst.fp(), lhs.fp(), rhs.gp(), kScratchDoubleReg,
3393             liftoff::kScratchDoubleReg2, kScratchRegister);
3394 }
3395 
emit_i64x2_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3396 void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
3397                                          LiftoffRegister lhs, int32_t rhs) {
3398   I64x2ShrS(dst.fp(), lhs.fp(), rhs & 0x3F, kScratchDoubleReg);
3399 }
3400 
emit_i64x2_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3401 void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
3402                                         LiftoffRegister lhs,
3403                                         LiftoffRegister rhs) {
3404   liftoff::EmitSimdShiftOp<&Assembler::vpsrlq, &Assembler::psrlq, 6>(this, dst,
3405                                                                      lhs, rhs);
3406 }
3407 
emit_i64x2_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3408 void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
3409                                          LiftoffRegister lhs, int32_t rhs) {
3410   liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>(
3411       this, dst, lhs, rhs);
3412 }
3413 
emit_i64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3414 void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
3415                                       LiftoffRegister rhs) {
3416   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>(
3417       this, dst, lhs, rhs);
3418 }
3419 
emit_i64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3420 void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
3421                                       LiftoffRegister rhs) {
3422   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubq, &Assembler::psubq>(
3423       this, dst, lhs, rhs);
3424 }
3425 
emit_i64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3426 void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
3427                                       LiftoffRegister rhs) {
3428   static constexpr RegClass tmp_rc = reg_class_for(kS128);
3429   LiftoffRegister tmp1 =
3430       GetUnusedRegister(tmp_rc, LiftoffRegList{dst, lhs, rhs});
3431   LiftoffRegister tmp2 =
3432       GetUnusedRegister(tmp_rc, LiftoffRegList{dst, lhs, rhs, tmp1});
3433   I64x2Mul(dst.fp(), lhs.fp(), rhs.fp(), tmp1.fp(), tmp2.fp());
3434 }
3435 
emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3436 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
3437                                                      LiftoffRegister src1,
3438                                                      LiftoffRegister src2) {
3439   I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg, /*low=*/true,
3440               /*is_signed=*/true);
3441 }
3442 
emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3443 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
3444                                                      LiftoffRegister src1,
3445                                                      LiftoffRegister src2) {
3446   I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg, /*low=*/true,
3447               /*is_signed=*/false);
3448 }
3449 
emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3450 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
3451                                                       LiftoffRegister src1,
3452                                                       LiftoffRegister src2) {
3453   I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg, /*low=*/false,
3454               /*is_signed=*/true);
3455 }
3456 
emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3457 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
3458                                                       LiftoffRegister src1,
3459                                                       LiftoffRegister src2) {
3460   I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), kScratchDoubleReg, /*low=*/false,
3461               /*is_signed=*/false);
3462 }
3463 
emit_i64x2_bitmask(LiftoffRegister dst,LiftoffRegister src)3464 void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
3465                                           LiftoffRegister src) {
3466   Movmskpd(dst.gp(), src.fp());
3467 }
3468 
emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)3469 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
3470                                                      LiftoffRegister src) {
3471   Pmovsxdq(dst.fp(), src.fp());
3472 }
3473 
emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)3474 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
3475                                                       LiftoffRegister src) {
3476   I64x2SConvertI32x4High(dst.fp(), src.fp());
3477 }
3478 
emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)3479 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
3480                                                      LiftoffRegister src) {
3481   Pmovzxdq(dst.fp(), src.fp());
3482 }
3483 
emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)3484 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
3485                                                       LiftoffRegister src) {
3486   I64x2UConvertI32x4High(dst.fp(), src.fp(), kScratchDoubleReg);
3487 }
3488 
emit_f32x4_abs(LiftoffRegister dst,LiftoffRegister src)3489 void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
3490                                       LiftoffRegister src) {
3491   Absps(dst.fp(), src.fp(), kScratchRegister);
3492 }
3493 
emit_f32x4_neg(LiftoffRegister dst,LiftoffRegister src)3494 void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
3495                                       LiftoffRegister src) {
3496   Negps(dst.fp(), src.fp(), kScratchRegister);
3497 }
3498 
emit_f32x4_sqrt(LiftoffRegister dst,LiftoffRegister src)3499 void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
3500                                        LiftoffRegister src) {
3501   Sqrtps(dst.fp(), src.fp());
3502 }
3503 
emit_f32x4_ceil(LiftoffRegister dst,LiftoffRegister src)3504 bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
3505                                        LiftoffRegister src) {
3506   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3507   Roundps(dst.fp(), src.fp(), kRoundUp);
3508   return true;
3509 }
3510 
emit_f32x4_floor(LiftoffRegister dst,LiftoffRegister src)3511 bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
3512                                         LiftoffRegister src) {
3513   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3514   Roundps(dst.fp(), src.fp(), kRoundDown);
3515   return true;
3516 }
3517 
emit_f32x4_trunc(LiftoffRegister dst,LiftoffRegister src)3518 bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
3519                                         LiftoffRegister src) {
3520   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3521   Roundps(dst.fp(), src.fp(), kRoundToZero);
3522   return true;
3523 }
3524 
emit_f32x4_nearest_int(LiftoffRegister dst,LiftoffRegister src)3525 bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
3526                                               LiftoffRegister src) {
3527   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3528   Roundps(dst.fp(), src.fp(), kRoundToNearest);
3529   return true;
3530 }
3531 
emit_f32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3532 void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3533                                       LiftoffRegister rhs) {
3534   liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
3535       this, dst, lhs, rhs);
3536 }
3537 
emit_f32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3538 void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3539                                       LiftoffRegister rhs) {
3540   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubps, &Assembler::subps>(
3541       this, dst, lhs, rhs);
3542 }
3543 
emit_f32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3544 void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3545                                       LiftoffRegister rhs) {
3546   liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulps, &Assembler::mulps>(
3547       this, dst, lhs, rhs);
3548 }
3549 
emit_f32x4_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3550 void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
3551                                       LiftoffRegister rhs) {
3552   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivps, &Assembler::divps>(
3553       this, dst, lhs, rhs);
3554 }
3555 
emit_f32x4_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3556 void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
3557                                       LiftoffRegister rhs) {
3558   F32x4Min(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
3559 }
3560 
emit_f32x4_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3561 void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
3562                                       LiftoffRegister rhs) {
3563   F32x4Max(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
3564 }
3565 
emit_f32x4_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3566 void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
3567                                        LiftoffRegister rhs) {
3568   // Due to the way minps works, pmin(a, b) = minps(b, a).
3569   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminps, &Assembler::minps>(
3570       this, dst, rhs, lhs);
3571 }
3572 
emit_f32x4_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3573 void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
3574                                        LiftoffRegister rhs) {
3575   // Due to the way maxps works, pmax(a, b) = maxps(b, a).
3576   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxps, &Assembler::maxps>(
3577       this, dst, rhs, lhs);
3578 }
3579 
emit_f64x2_abs(LiftoffRegister dst,LiftoffRegister src)3580 void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
3581                                       LiftoffRegister src) {
3582   Abspd(dst.fp(), src.fp(), kScratchRegister);
3583 }
3584 
emit_f64x2_neg(LiftoffRegister dst,LiftoffRegister src)3585 void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
3586                                       LiftoffRegister src) {
3587   Negpd(dst.fp(), src.fp(), kScratchRegister);
3588 }
3589 
emit_f64x2_sqrt(LiftoffRegister dst,LiftoffRegister src)3590 void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
3591                                        LiftoffRegister src) {
3592   Sqrtpd(dst.fp(), src.fp());
3593 }
3594 
emit_f64x2_ceil(LiftoffRegister dst,LiftoffRegister src)3595 bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
3596                                        LiftoffRegister src) {
3597   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3598   Roundpd(dst.fp(), src.fp(), kRoundUp);
3599   return true;
3600 }
3601 
emit_f64x2_floor(LiftoffRegister dst,LiftoffRegister src)3602 bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
3603                                         LiftoffRegister src) {
3604   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3605   Roundpd(dst.fp(), src.fp(), kRoundDown);
3606   return true;
3607 }
3608 
emit_f64x2_trunc(LiftoffRegister dst,LiftoffRegister src)3609 bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
3610                                         LiftoffRegister src) {
3611   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3612   Roundpd(dst.fp(), src.fp(), kRoundToZero);
3613   return true;
3614 }
3615 
emit_f64x2_nearest_int(LiftoffRegister dst,LiftoffRegister src)3616 bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
3617                                               LiftoffRegister src) {
3618   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3619   Roundpd(dst.fp(), src.fp(), kRoundToNearest);
3620   return true;
3621 }
3622 
emit_f64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3623 void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
3624                                       LiftoffRegister rhs) {
3625   liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(
3626       this, dst, lhs, rhs);
3627 }
3628 
emit_f64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3629 void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
3630                                       LiftoffRegister rhs) {
3631   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubpd, &Assembler::subpd>(
3632       this, dst, lhs, rhs);
3633 }
3634 
emit_f64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3635 void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
3636                                       LiftoffRegister rhs) {
3637   liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulpd, &Assembler::mulpd>(
3638       this, dst, lhs, rhs);
3639 }
3640 
emit_f64x2_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3641 void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
3642                                       LiftoffRegister rhs) {
3643   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivpd, &Assembler::divpd>(
3644       this, dst, lhs, rhs);
3645 }
3646 
emit_f64x2_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3647 void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
3648                                       LiftoffRegister rhs) {
3649   F64x2Min(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
3650 }
3651 
emit_f64x2_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3652 void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
3653                                       LiftoffRegister rhs) {
3654   F64x2Max(dst.fp(), lhs.fp(), rhs.fp(), kScratchDoubleReg);
3655 }
3656 
emit_f64x2_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3657 void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
3658                                        LiftoffRegister rhs) {
3659   // Due to the way minpd works, pmin(a, b) = minpd(b, a).
3660   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminpd, &Assembler::minpd>(
3661       this, dst, rhs, lhs);
3662 }
3663 
emit_f64x2_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3664 void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
3665                                        LiftoffRegister rhs) {
3666   // Due to the way maxpd works, pmax(a, b) = maxpd(b, a).
3667   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxpd, &Assembler::maxpd>(
3668       this, dst, rhs, lhs);
3669 }
3670 
emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src)3671 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
3672                                                       LiftoffRegister src) {
3673   Cvtdq2pd(dst.fp(), src.fp());
3674 }
3675 
emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src)3676 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
3677                                                       LiftoffRegister src) {
3678   F64x2ConvertLowI32x4U(dst.fp(), src.fp(), kScratchRegister);
3679 }
3680 
emit_f64x2_promote_low_f32x4(LiftoffRegister dst,LiftoffRegister src)3681 void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
3682                                                     LiftoffRegister src) {
3683   Cvtps2pd(dst.fp(), src.fp());
3684 }
3685 
emit_i32x4_sconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)3686 void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
3687                                                  LiftoffRegister src) {
3688   I32x4SConvertF32x4(dst.fp(), src.fp(), kScratchDoubleReg, kScratchRegister);
3689 }
3690 
emit_i32x4_uconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)3691 void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
3692                                                  LiftoffRegister src) {
3693   // NAN->0, negative->0.
3694   Pxor(kScratchDoubleReg, kScratchDoubleReg);
3695   if (CpuFeatures::IsSupported(AVX)) {
3696     CpuFeatureScope scope(this, AVX);
3697     vmaxps(dst.fp(), src.fp(), kScratchDoubleReg);
3698   } else {
3699     if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
3700     maxps(dst.fp(), kScratchDoubleReg);
3701   }
3702   // scratch: float representation of max_signed.
3703   Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3704   Psrld(kScratchDoubleReg, uint8_t{1});            // 0x7fffffff
3705   Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // 0x4f000000
3706   // scratch2: convert (src-max_signed).
3707   // Set positive overflow lanes to 0x7FFFFFFF.
3708   // Set negative lanes to 0.
3709   if (CpuFeatures::IsSupported(AVX)) {
3710     CpuFeatureScope scope(this, AVX);
3711     vsubps(liftoff::kScratchDoubleReg2, dst.fp(), kScratchDoubleReg);
3712   } else {
3713     movaps(liftoff::kScratchDoubleReg2, dst.fp());
3714     subps(liftoff::kScratchDoubleReg2, kScratchDoubleReg);
3715   }
3716   Cmpleps(kScratchDoubleReg, liftoff::kScratchDoubleReg2);
3717   Cvttps2dq(liftoff::kScratchDoubleReg2, liftoff::kScratchDoubleReg2);
3718   Pxor(liftoff::kScratchDoubleReg2, kScratchDoubleReg);
3719   Pxor(kScratchDoubleReg, kScratchDoubleReg);
3720   Pmaxsd(liftoff::kScratchDoubleReg2, kScratchDoubleReg);
3721   // Convert to int. Overflow lanes above max_signed will be 0x80000000.
3722   Cvttps2dq(dst.fp(), dst.fp());
3723   // Add (src-max_signed) for overflow lanes.
3724   Paddd(dst.fp(), liftoff::kScratchDoubleReg2);
3725 }
3726 
emit_f32x4_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)3727 void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
3728                                                  LiftoffRegister src) {
3729   Cvtdq2ps(dst.fp(), src.fp());
3730 }
3731 
emit_f32x4_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)3732 void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
3733                                                  LiftoffRegister src) {
3734   Pxor(kScratchDoubleReg, kScratchDoubleReg);           // Zeros.
3735   Pblendw(kScratchDoubleReg, src.fp(), uint8_t{0x55});  // Get lo 16 bits.
3736   if (CpuFeatures::IsSupported(AVX)) {
3737     CpuFeatureScope scope(this, AVX);
3738     vpsubd(dst.fp(), src.fp(), kScratchDoubleReg);  // Get hi 16 bits.
3739   } else {
3740     if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
3741     psubd(dst.fp(), kScratchDoubleReg);
3742   }
3743   Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // Convert lo exactly.
3744   Psrld(dst.fp(), byte{1});            // Divide by 2 to get in unsigned range.
3745   Cvtdq2ps(dst.fp(), dst.fp());        // Convert hi, exactly.
3746   Addps(dst.fp(), dst.fp());           // Double hi, exactly.
3747   Addps(dst.fp(), kScratchDoubleReg);  // Add hi and lo, may round.
3748 }
3749 
emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,LiftoffRegister src)3750 void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
3751                                                     LiftoffRegister src) {
3752   Cvtpd2ps(dst.fp(), src.fp());
3753 }
3754 
emit_i8x16_sconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3755 void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
3756                                                  LiftoffRegister lhs,
3757                                                  LiftoffRegister rhs) {
3758   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpacksswb,
3759                                        &Assembler::packsswb>(this, dst, lhs,
3760                                                              rhs);
3761 }
3762 
emit_i8x16_uconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3763 void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
3764                                                  LiftoffRegister lhs,
3765                                                  LiftoffRegister rhs) {
3766   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackuswb,
3767                                        &Assembler::packuswb>(this, dst, lhs,
3768                                                              rhs);
3769 }
3770 
emit_i16x8_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3771 void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
3772                                                  LiftoffRegister lhs,
3773                                                  LiftoffRegister rhs) {
3774   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackssdw,
3775                                        &Assembler::packssdw>(this, dst, lhs,
3776                                                              rhs);
3777 }
3778 
emit_i16x8_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3779 void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
3780                                                  LiftoffRegister lhs,
3781                                                  LiftoffRegister rhs) {
3782   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackusdw,
3783                                        &Assembler::packusdw>(this, dst, lhs,
3784                                                              rhs, SSE4_1);
3785 }
3786 
emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)3787 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
3788                                                      LiftoffRegister src) {
3789   Pmovsxbw(dst.fp(), src.fp());
3790 }
3791 
emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)3792 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
3793                                                       LiftoffRegister src) {
3794   I16x8SConvertI8x16High(dst.fp(), src.fp());
3795 }
3796 
emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)3797 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
3798                                                      LiftoffRegister src) {
3799   Pmovzxbw(dst.fp(), src.fp());
3800 }
3801 
emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)3802 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
3803                                                       LiftoffRegister src) {
3804   I16x8UConvertI8x16High(dst.fp(), src.fp(), kScratchDoubleReg);
3805 }
3806 
emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)3807 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
3808                                                      LiftoffRegister src) {
3809   Pmovsxwd(dst.fp(), src.fp());
3810 }
3811 
emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)3812 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
3813                                                       LiftoffRegister src) {
3814   I32x4SConvertI16x8High(dst.fp(), src.fp());
3815 }
3816 
emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)3817 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
3818                                                      LiftoffRegister src) {
3819   Pmovzxwd(dst.fp(), src.fp());
3820 }
3821 
emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)3822 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
3823                                                       LiftoffRegister src) {
3824   I32x4UConvertI16x8High(dst.fp(), src.fp(), kScratchDoubleReg);
3825 }
3826 
emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,LiftoffRegister src)3827 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
3828                                                          LiftoffRegister src) {
3829   I32x4TruncSatF64x2SZero(dst.fp(), src.fp(), kScratchDoubleReg,
3830                           kScratchRegister);
3831 }
3832 
emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,LiftoffRegister src)3833 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
3834                                                          LiftoffRegister src) {
3835   I32x4TruncSatF64x2UZero(dst.fp(), src.fp(), kScratchDoubleReg,
3836                           kScratchRegister);
3837 }
3838 
emit_s128_and_not(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3839 void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
3840                                          LiftoffRegister lhs,
3841                                          LiftoffRegister rhs) {
3842   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vandnps, &Assembler::andnps>(
3843       this, dst, rhs, lhs);
3844 }
3845 
emit_i8x16_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3846 void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
3847                                                      LiftoffRegister lhs,
3848                                                      LiftoffRegister rhs) {
3849   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgb, &Assembler::pavgb>(
3850       this, dst, lhs, rhs);
3851 }
3852 
emit_i16x8_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3853 void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
3854                                                      LiftoffRegister lhs,
3855                                                      LiftoffRegister rhs) {
3856   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgw, &Assembler::pavgw>(
3857       this, dst, lhs, rhs);
3858 }
3859 
emit_i8x16_abs(LiftoffRegister dst,LiftoffRegister src)3860 void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
3861                                       LiftoffRegister src) {
3862   Pabsb(dst.fp(), src.fp());
3863 }
3864 
emit_i16x8_abs(LiftoffRegister dst,LiftoffRegister src)3865 void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
3866                                       LiftoffRegister src) {
3867   Pabsw(dst.fp(), src.fp());
3868 }
3869 
emit_i32x4_abs(LiftoffRegister dst,LiftoffRegister src)3870 void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
3871                                       LiftoffRegister src) {
3872   Pabsd(dst.fp(), src.fp());
3873 }
3874 
emit_i64x2_abs(LiftoffRegister dst,LiftoffRegister src)3875 void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
3876                                       LiftoffRegister src) {
3877   I64x2Abs(dst.fp(), src.fp(), kScratchDoubleReg);
3878 }
3879 
emit_i8x16_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3880 void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
3881                                                  LiftoffRegister lhs,
3882                                                  uint8_t imm_lane_idx) {
3883   Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
3884   movsxbl(dst.gp(), dst.gp());
3885 }
3886 
emit_i8x16_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3887 void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
3888                                                  LiftoffRegister lhs,
3889                                                  uint8_t imm_lane_idx) {
3890   Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
3891 }
3892 
emit_i16x8_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3893 void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
3894                                                  LiftoffRegister lhs,
3895                                                  uint8_t imm_lane_idx) {
3896   Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
3897   movsxwl(dst.gp(), dst.gp());
3898 }
3899 
emit_i16x8_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3900 void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
3901                                                  LiftoffRegister lhs,
3902                                                  uint8_t imm_lane_idx) {
3903   Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
3904 }
3905 
emit_i32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3906 void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
3907                                                LiftoffRegister lhs,
3908                                                uint8_t imm_lane_idx) {
3909   Pextrd(dst.gp(), lhs.fp(), imm_lane_idx);
3910 }
3911 
emit_i64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3912 void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
3913                                                LiftoffRegister lhs,
3914                                                uint8_t imm_lane_idx) {
3915   Pextrq(dst.gp(), lhs.fp(), static_cast<int8_t>(imm_lane_idx));
3916 }
3917 
emit_f32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3918 void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
3919                                                LiftoffRegister lhs,
3920                                                uint8_t imm_lane_idx) {
3921   F32x4ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
3922 }
3923 
emit_f64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3924 void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
3925                                                LiftoffRegister lhs,
3926                                                uint8_t imm_lane_idx) {
3927   F64x2ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
3928 }
3929 
emit_i8x16_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3930 void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
3931                                                LiftoffRegister src1,
3932                                                LiftoffRegister src2,
3933                                                uint8_t imm_lane_idx) {
3934   if (CpuFeatures::IsSupported(AVX)) {
3935     CpuFeatureScope scope(this, AVX);
3936     vpinsrb(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
3937   } else {
3938     CpuFeatureScope scope(this, SSE4_1);
3939     if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3940     pinsrb(dst.fp(), src2.gp(), imm_lane_idx);
3941   }
3942 }
3943 
emit_i16x8_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3944 void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
3945                                                LiftoffRegister src1,
3946                                                LiftoffRegister src2,
3947                                                uint8_t imm_lane_idx) {
3948   if (CpuFeatures::IsSupported(AVX)) {
3949     CpuFeatureScope scope(this, AVX);
3950     vpinsrw(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
3951   } else {
3952     if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3953     pinsrw(dst.fp(), src2.gp(), imm_lane_idx);
3954   }
3955 }
3956 
emit_i32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3957 void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
3958                                                LiftoffRegister src1,
3959                                                LiftoffRegister src2,
3960                                                uint8_t imm_lane_idx) {
3961   if (CpuFeatures::IsSupported(AVX)) {
3962     CpuFeatureScope scope(this, AVX);
3963     vpinsrd(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
3964   } else {
3965     CpuFeatureScope scope(this, SSE4_1);
3966     if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3967     pinsrd(dst.fp(), src2.gp(), imm_lane_idx);
3968   }
3969 }
3970 
emit_i64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3971 void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
3972                                                LiftoffRegister src1,
3973                                                LiftoffRegister src2,
3974                                                uint8_t imm_lane_idx) {
3975   if (CpuFeatures::IsSupported(AVX)) {
3976     CpuFeatureScope scope(this, AVX);
3977     vpinsrq(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
3978   } else {
3979     CpuFeatureScope scope(this, SSE4_1);
3980     if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3981     pinsrq(dst.fp(), src2.gp(), imm_lane_idx);
3982   }
3983 }
3984 
emit_f32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3985 void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
3986                                                LiftoffRegister src1,
3987                                                LiftoffRegister src2,
3988                                                uint8_t imm_lane_idx) {
3989   if (CpuFeatures::IsSupported(AVX)) {
3990     CpuFeatureScope scope(this, AVX);
3991     vinsertps(dst.fp(), src1.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
3992   } else {
3993     CpuFeatureScope scope(this, SSE4_1);
3994     if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
3995     insertps(dst.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
3996   }
3997 }
3998 
emit_f64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3999 void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
4000                                                LiftoffRegister src1,
4001                                                LiftoffRegister src2,
4002                                                uint8_t imm_lane_idx) {
4003   F64x2ReplaceLane(dst.fp(), src1.fp(), src2.fp(), imm_lane_idx);
4004 }
4005 
StackCheck(Label * ool_code,Register limit_address)4006 void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
4007   cmpq(rsp, Operand(limit_address, 0));
4008   j(below_equal, ool_code);
4009 }
4010 
CallTrapCallbackForTesting()4011 void LiftoffAssembler::CallTrapCallbackForTesting() {
4012   PrepareCallCFunction(0);
4013   CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
4014 }
4015 
AssertUnreachable(AbortReason reason)4016 void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
4017   TurboAssembler::AssertUnreachable(reason);
4018 }
4019 
PushRegisters(LiftoffRegList regs)4020 void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
4021   LiftoffRegList gp_regs = regs & kGpCacheRegList;
4022   while (!gp_regs.is_empty()) {
4023     LiftoffRegister reg = gp_regs.GetFirstRegSet();
4024     pushq(reg.gp());
4025     gp_regs.clear(reg);
4026   }
4027   LiftoffRegList fp_regs = regs & kFpCacheRegList;
4028   unsigned num_fp_regs = fp_regs.GetNumRegsSet();
4029   if (num_fp_regs) {
4030     AllocateStackSpace(num_fp_regs * kSimd128Size);
4031     unsigned offset = 0;
4032     while (!fp_regs.is_empty()) {
4033       LiftoffRegister reg = fp_regs.GetFirstRegSet();
4034       Movdqu(Operand(rsp, offset), reg.fp());
4035       fp_regs.clear(reg);
4036       offset += kSimd128Size;
4037     }
4038     DCHECK_EQ(offset, num_fp_regs * kSimd128Size);
4039   }
4040 }
4041 
PopRegisters(LiftoffRegList regs)4042 void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
4043   LiftoffRegList fp_regs = regs & kFpCacheRegList;
4044   unsigned fp_offset = 0;
4045   while (!fp_regs.is_empty()) {
4046     LiftoffRegister reg = fp_regs.GetFirstRegSet();
4047     Movdqu(reg.fp(), Operand(rsp, fp_offset));
4048     fp_regs.clear(reg);
4049     fp_offset += kSimd128Size;
4050   }
4051   if (fp_offset) addq(rsp, Immediate(fp_offset));
4052   LiftoffRegList gp_regs = regs & kGpCacheRegList;
4053   while (!gp_regs.is_empty()) {
4054     LiftoffRegister reg = gp_regs.GetLastRegSet();
4055     popq(reg.gp());
4056     gp_regs.clear(reg);
4057   }
4058 }
4059 
RecordSpillsInSafepoint(SafepointTableBuilder::Safepoint & safepoint,LiftoffRegList all_spills,LiftoffRegList ref_spills,int spill_offset)4060 void LiftoffAssembler::RecordSpillsInSafepoint(
4061     SafepointTableBuilder::Safepoint& safepoint, LiftoffRegList all_spills,
4062     LiftoffRegList ref_spills, int spill_offset) {
4063   int spill_space_size = 0;
4064   while (!all_spills.is_empty()) {
4065     LiftoffRegister reg = all_spills.GetFirstRegSet();
4066     if (ref_spills.has(reg)) {
4067       safepoint.DefineTaggedStackSlot(spill_offset);
4068     }
4069     all_spills.clear(reg);
4070     ++spill_offset;
4071     spill_space_size += kSystemPointerSize;
4072   }
4073   // Record the number of additional spill slots.
4074   RecordOolSpillSpaceSize(spill_space_size);
4075 }
4076 
DropStackSlotsAndRet(uint32_t num_stack_slots)4077 void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
4078   DCHECK_LT(num_stack_slots,
4079             (1 << 16) / kSystemPointerSize);  // 16 bit immediate
4080   ret(static_cast<int>(num_stack_slots * kSystemPointerSize));
4081 }
4082 
CallC(const ValueKindSig * sig,const LiftoffRegister * args,const LiftoffRegister * rets,ValueKind out_argument_kind,int stack_bytes,ExternalReference ext_ref)4083 void LiftoffAssembler::CallC(const ValueKindSig* sig,
4084                              const LiftoffRegister* args,
4085                              const LiftoffRegister* rets,
4086                              ValueKind out_argument_kind, int stack_bytes,
4087                              ExternalReference ext_ref) {
4088   AllocateStackSpace(stack_bytes);
4089 
4090   int arg_bytes = 0;
4091   for (ValueKind param_kind : sig->parameters()) {
4092     liftoff::Store(this, Operand(rsp, arg_bytes), *args++, param_kind);
4093     arg_bytes += value_kind_size(param_kind);
4094   }
4095   DCHECK_LE(arg_bytes, stack_bytes);
4096 
4097   // Pass a pointer to the buffer with the arguments to the C function.
4098   movq(arg_reg_1, rsp);
4099 
4100   constexpr int kNumCCallArgs = 1;
4101 
4102   // Now call the C function.
4103   PrepareCallCFunction(kNumCCallArgs);
4104   CallCFunction(ext_ref, kNumCCallArgs);
4105 
4106   // Move return value to the right register.
4107   const LiftoffRegister* next_result_reg = rets;
4108   if (sig->return_count() > 0) {
4109     DCHECK_EQ(1, sig->return_count());
4110     constexpr Register kReturnReg = rax;
4111     if (kReturnReg != next_result_reg->gp()) {
4112       Move(*next_result_reg, LiftoffRegister(kReturnReg), sig->GetReturn(0));
4113     }
4114     ++next_result_reg;
4115   }
4116 
4117   // Load potential output value from the buffer on the stack.
4118   if (out_argument_kind != kVoid) {
4119     liftoff::Load(this, *next_result_reg, Operand(rsp, 0), out_argument_kind);
4120   }
4121 
4122   addq(rsp, Immediate(stack_bytes));
4123 }
4124 
CallNativeWasmCode(Address addr)4125 void LiftoffAssembler::CallNativeWasmCode(Address addr) {
4126   near_call(addr, RelocInfo::WASM_CALL);
4127 }
4128 
TailCallNativeWasmCode(Address addr)4129 void LiftoffAssembler::TailCallNativeWasmCode(Address addr) {
4130   near_jmp(addr, RelocInfo::WASM_CALL);
4131 }
4132 
CallIndirect(const ValueKindSig * sig,compiler::CallDescriptor * call_descriptor,Register target)4133 void LiftoffAssembler::CallIndirect(const ValueKindSig* sig,
4134                                     compiler::CallDescriptor* call_descriptor,
4135                                     Register target) {
4136   if (target == no_reg) {
4137     popq(kScratchRegister);
4138     target = kScratchRegister;
4139   }
4140   call(target);
4141 }
4142 
TailCallIndirect(Register target)4143 void LiftoffAssembler::TailCallIndirect(Register target) {
4144   if (target == no_reg) {
4145     popq(kScratchRegister);
4146     target = kScratchRegister;
4147   }
4148   jmp(target);
4149 }
4150 
CallRuntimeStub(WasmCode::RuntimeStubId sid)4151 void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) {
4152   // A direct call to a wasm runtime stub defined in this module.
4153   // Just encode the stub index. This will be patched at relocation.
4154   near_call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL);
4155 }
4156 
AllocateStackSlot(Register addr,uint32_t size)4157 void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
4158   AllocateStackSpace(size);
4159   movq(addr, rsp);
4160 }
4161 
DeallocateStackSlot(uint32_t size)4162 void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
4163   addq(rsp, Immediate(size));
4164 }
4165 
MaybeOSR()4166 void LiftoffAssembler::MaybeOSR() {
4167   cmpq(liftoff::GetOSRTargetSlot(), Immediate(0));
4168   j(not_equal, static_cast<Address>(WasmCode::kWasmOnStackReplace),
4169     RelocInfo::WASM_STUB_CALL);
4170 }
4171 
emit_set_if_nan(Register dst,DoubleRegister src,ValueKind kind)4172 void LiftoffAssembler::emit_set_if_nan(Register dst, DoubleRegister src,
4173                                        ValueKind kind) {
4174   if (kind == kF32) {
4175     Ucomiss(src, src);
4176   } else {
4177     DCHECK_EQ(kind, kF64);
4178     Ucomisd(src, src);
4179   }
4180   Label ret;
4181   j(parity_odd, &ret);
4182   movl(Operand(dst, 0), Immediate(1));
4183   bind(&ret);
4184 }
4185 
emit_s128_set_if_nan(Register dst,LiftoffRegister src,Register tmp_gp,LiftoffRegister tmp_s128,ValueKind lane_kind)4186 void LiftoffAssembler::emit_s128_set_if_nan(Register dst, LiftoffRegister src,
4187                                             Register tmp_gp,
4188                                             LiftoffRegister tmp_s128,
4189                                             ValueKind lane_kind) {
4190   if (lane_kind == kF32) {
4191     movaps(tmp_s128.fp(), src.fp());
4192     cmpunordps(tmp_s128.fp(), tmp_s128.fp());
4193   } else {
4194     DCHECK_EQ(lane_kind, kF64);
4195     movapd(tmp_s128.fp(), src.fp());
4196     cmpunordpd(tmp_s128.fp(), tmp_s128.fp());
4197   }
4198   pmovmskb(tmp_gp, tmp_s128.fp());
4199   orl(Operand(dst, 0), tmp_gp);
4200 }
4201 
Construct(int param_slots)4202 void LiftoffStackSlots::Construct(int param_slots) {
4203   DCHECK_LT(0, slots_.size());
4204   SortInPushOrder();
4205   int last_stack_slot = param_slots;
4206   for (auto& slot : slots_) {
4207     const int stack_slot = slot.dst_slot_;
4208     int stack_decrement = (last_stack_slot - stack_slot) * kSystemPointerSize;
4209     last_stack_slot = stack_slot;
4210     const LiftoffAssembler::VarState& src = slot.src_;
4211     DCHECK_LT(0, stack_decrement);
4212     switch (src.loc()) {
4213       case LiftoffAssembler::VarState::kStack:
4214         if (src.kind() == kI32) {
4215           asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4216           // Load i32 values to a register first to ensure they are zero
4217           // extended.
4218           asm_->movl(kScratchRegister, liftoff::GetStackSlot(slot.src_offset_));
4219           asm_->pushq(kScratchRegister);
4220         } else if (src.kind() == kS128) {
4221           asm_->AllocateStackSpace(stack_decrement - kSimd128Size);
4222           // Since offsets are subtracted from sp, we need a smaller offset to
4223           // push the top of a s128 value.
4224           asm_->pushq(liftoff::GetStackSlot(slot.src_offset_ - 8));
4225           asm_->pushq(liftoff::GetStackSlot(slot.src_offset_));
4226         } else {
4227           asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4228           // For all other types, just push the whole (8-byte) stack slot.
4229           // This is also ok for f32 values (even though we copy 4 uninitialized
4230           // bytes), because f32 and f64 values are clearly distinguished in
4231           // Turbofan, so the uninitialized bytes are never accessed.
4232           asm_->pushq(liftoff::GetStackSlot(slot.src_offset_));
4233         }
4234         break;
4235       case LiftoffAssembler::VarState::kRegister: {
4236         int pushed = src.kind() == kS128 ? kSimd128Size : kSystemPointerSize;
4237         liftoff::push(asm_, src.reg(), src.kind(), stack_decrement - pushed);
4238         break;
4239       }
4240       case LiftoffAssembler::VarState::kIntConst:
4241         asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4242         asm_->pushq(Immediate(src.i32_const()));
4243         break;
4244     }
4245   }
4246 }
4247 
4248 #undef RETURN_FALSE_IF_MISSING_CPU_FEATURE
4249 
4250 }  // namespace wasm
4251 }  // namespace internal
4252 }  // namespace v8
4253 
4254 #endif  // V8_WASM_BASELINE_X64_LIFTOFF_ASSEMBLER_X64_H_
4255