• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_WASM_BASELINE_IA32_LIFTOFF_ASSEMBLER_IA32_H_
6 #define V8_WASM_BASELINE_IA32_LIFTOFF_ASSEMBLER_IA32_H_
7 
8 #include "src/base/platform/wrappers.h"
9 #include "src/codegen/assembler.h"
10 #include "src/heap/memory-chunk.h"
11 #include "src/wasm/baseline/liftoff-assembler.h"
12 #include "src/wasm/baseline/liftoff-register.h"
13 #include "src/wasm/simd-shuffle.h"
14 #include "src/wasm/value-type.h"
15 #include "src/wasm/wasm-objects.h"
16 
17 namespace v8 {
18 namespace internal {
19 namespace wasm {
20 
21 #define RETURN_FALSE_IF_MISSING_CPU_FEATURE(name)    \
22   if (!CpuFeatures::IsSupported(name)) return false; \
23   CpuFeatureScope feature(this, name);
24 
25 namespace liftoff {
26 
ToCondition(LiftoffCondition liftoff_cond)27 inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
28   switch (liftoff_cond) {
29     case kEqual:
30       return equal;
31     case kUnequal:
32       return not_equal;
33     case kSignedLessThan:
34       return less;
35     case kSignedLessEqual:
36       return less_equal;
37     case kSignedGreaterThan:
38       return greater;
39     case kSignedGreaterEqual:
40       return greater_equal;
41     case kUnsignedLessThan:
42       return below;
43     case kUnsignedLessEqual:
44       return below_equal;
45     case kUnsignedGreaterThan:
46       return above;
47     case kUnsignedGreaterEqual:
48       return above_equal;
49   }
50 }
51 
52 // ebp-4 holds the stack marker, ebp-8 is the instance parameter.
53 constexpr int kInstanceOffset = 8;
54 constexpr int kFeedbackVectorOffset = 12;  // ebp-12 is the feedback vector.
55 constexpr int kTierupBudgetOffset = 16;    // ebp-16 is the tiering budget.
56 
GetStackSlot(int offset)57 inline Operand GetStackSlot(int offset) { return Operand(ebp, -offset); }
58 
GetHalfStackSlot(int offset,RegPairHalf half)59 inline MemOperand GetHalfStackSlot(int offset, RegPairHalf half) {
60   int32_t half_offset =
61       half == kLowWord ? 0 : LiftoffAssembler::kStackSlotSize / 2;
62   return Operand(offset > 0 ? ebp : esp, -offset + half_offset);
63 }
64 
65 // TODO(clemensb): Make this a constexpr variable once Operand is constexpr.
GetInstanceOperand()66 inline Operand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
67 
68 static constexpr LiftoffRegList kByteRegs =
69     LiftoffRegList::FromBits<RegList{eax, ecx, edx}.bits()>();
70 
Load(LiftoffAssembler * assm,LiftoffRegister dst,Register base,int32_t offset,ValueKind kind)71 inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, Register base,
72                  int32_t offset, ValueKind kind) {
73   Operand src(base, offset);
74   switch (kind) {
75     case kI32:
76     case kOptRef:
77     case kRef:
78     case kRtt:
79       assm->mov(dst.gp(), src);
80       break;
81     case kI64:
82       assm->mov(dst.low_gp(), src);
83       assm->mov(dst.high_gp(), Operand(base, offset + 4));
84       break;
85     case kF32:
86       assm->movss(dst.fp(), src);
87       break;
88     case kF64:
89       assm->movsd(dst.fp(), src);
90       break;
91     case kS128:
92       assm->movdqu(dst.fp(), src);
93       break;
94     default:
95       UNREACHABLE();
96   }
97 }
98 
Store(LiftoffAssembler * assm,Register base,int32_t offset,LiftoffRegister src,ValueKind kind)99 inline void Store(LiftoffAssembler* assm, Register base, int32_t offset,
100                   LiftoffRegister src, ValueKind kind) {
101   Operand dst(base, offset);
102   switch (kind) {
103     case kI32:
104     case kOptRef:
105     case kRef:
106     case kRtt:
107       assm->mov(dst, src.gp());
108       break;
109     case kI64:
110       assm->mov(dst, src.low_gp());
111       assm->mov(Operand(base, offset + 4), src.high_gp());
112       break;
113     case kF32:
114       assm->movss(dst, src.fp());
115       break;
116     case kF64:
117       assm->movsd(dst, src.fp());
118       break;
119     case kS128:
120       assm->movdqu(dst, src.fp());
121       break;
122     case kVoid:
123     case kBottom:
124     case kI8:
125     case kI16:
126       UNREACHABLE();
127   }
128 }
129 
130 inline void push(LiftoffAssembler* assm, LiftoffRegister reg, ValueKind kind,
131                  int padding = 0) {
132   switch (kind) {
133     case kI32:
134     case kRef:
135     case kOptRef:
136     case kRtt:
137       assm->AllocateStackSpace(padding);
138       assm->push(reg.gp());
139       break;
140     case kI64:
141       assm->AllocateStackSpace(padding);
142       assm->push(reg.high_gp());
143       assm->push(reg.low_gp());
144       break;
145     case kF32:
146       assm->AllocateStackSpace(sizeof(float) + padding);
147       assm->movss(Operand(esp, 0), reg.fp());
148       break;
149     case kF64:
150       assm->AllocateStackSpace(sizeof(double) + padding);
151       assm->movsd(Operand(esp, 0), reg.fp());
152       break;
153     case kS128:
154       assm->AllocateStackSpace(sizeof(double) * 2 + padding);
155       assm->movdqu(Operand(esp, 0), reg.fp());
156       break;
157     case kVoid:
158     case kBottom:
159     case kI8:
160     case kI16:
161       UNREACHABLE();
162   }
163 }
164 
SignExtendI32ToI64(Assembler * assm,LiftoffRegister reg)165 inline void SignExtendI32ToI64(Assembler* assm, LiftoffRegister reg) {
166   assm->mov(reg.high_gp(), reg.low_gp());
167   assm->sar(reg.high_gp(), 31);
168 }
169 
170 // Get a temporary byte register, using {candidate} if possible.
171 // Might spill, but always keeps status flags intact.
GetTmpByteRegister(LiftoffAssembler * assm,Register candidate)172 inline Register GetTmpByteRegister(LiftoffAssembler* assm, Register candidate) {
173   if (candidate.is_byte_register()) return candidate;
174   // {GetUnusedRegister()} may insert move instructions to spill registers to
175   // the stack. This is OK because {mov} does not change the status flags.
176   return assm->GetUnusedRegister(liftoff::kByteRegs).gp();
177 }
178 
MoveStackValue(LiftoffAssembler * assm,const Operand & src,const Operand & dst)179 inline void MoveStackValue(LiftoffAssembler* assm, const Operand& src,
180                            const Operand& dst) {
181   if (assm->cache_state()->has_unused_register(kGpReg)) {
182     Register tmp = assm->cache_state()->unused_register(kGpReg).gp();
183     assm->mov(tmp, src);
184     assm->mov(dst, tmp);
185   } else {
186     // No free register, move via the stack.
187     assm->push(src);
188     assm->pop(dst);
189   }
190 }
191 
192 constexpr DoubleRegister kScratchDoubleReg = xmm7;
193 
194 constexpr int kSubSpSize = 6;  // 6 bytes for "sub esp, <imm32>"
195 
196 }  // namespace liftoff
197 
PrepareStackFrame()198 int LiftoffAssembler::PrepareStackFrame() {
199   int offset = pc_offset();
200   // Next we reserve the memory for the whole stack frame. We do not know yet
201   // how big the stack frame will be so we just emit a placeholder instruction.
202   // PatchPrepareStackFrame will patch this in order to increase the stack
203   // appropriately.
204   sub_sp_32(0);
205   DCHECK_EQ(liftoff::kSubSpSize, pc_offset() - offset);
206   return offset;
207 }
208 
PrepareTailCall(int num_callee_stack_params,int stack_param_delta)209 void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params,
210                                        int stack_param_delta) {
211   // Push the return address and frame pointer to complete the stack frame.
212   push(Operand(ebp, 4));
213   push(Operand(ebp, 0));
214 
215   // Shift the whole frame upwards.
216   Register scratch = eax;
217   push(scratch);
218   const int slot_count = num_callee_stack_params + 2;
219   for (int i = slot_count; i > 0; --i) {
220     mov(scratch, Operand(esp, i * 4));
221     mov(Operand(ebp, (i - stack_param_delta - 1) * 4), scratch);
222   }
223   pop(scratch);
224 
225   // Set the new stack and frame pointers.
226   lea(esp, Operand(ebp, -stack_param_delta * 4));
227   pop(ebp);
228 }
229 
AlignFrameSize()230 void LiftoffAssembler::AlignFrameSize() {}
231 
PatchPrepareStackFrame(int offset,SafepointTableBuilder * safepoint_table_builder)232 void LiftoffAssembler::PatchPrepareStackFrame(
233     int offset, SafepointTableBuilder* safepoint_table_builder) {
234   // The frame_size includes the frame marker and the instance slot. Both are
235   // pushed as part of frame construction, so we don't need to allocate memory
236   // for them anymore.
237   int frame_size = GetTotalFrameSize() - 2 * kSystemPointerSize;
238   DCHECK_EQ(0, frame_size % kSystemPointerSize);
239 
240   // We can't run out of space when patching, just pass anything big enough to
241   // not cause the assembler to try to grow the buffer.
242   constexpr int kAvailableSpace = 64;
243   Assembler patching_assembler(
244       AssemblerOptions{},
245       ExternalAssemblerBuffer(buffer_start_ + offset, kAvailableSpace));
246 
247   if (V8_LIKELY(frame_size < 4 * KB)) {
248     // This is the standard case for small frames: just subtract from SP and be
249     // done with it.
250     patching_assembler.sub_sp_32(frame_size);
251     DCHECK_EQ(liftoff::kSubSpSize, patching_assembler.pc_offset());
252     return;
253   }
254 
255   // The frame size is bigger than 4KB, so we might overflow the available stack
256   // space if we first allocate the frame and then do the stack check (we will
257   // need some remaining stack space for throwing the exception). That's why we
258   // check the available stack space before we allocate the frame. To do this we
259   // replace the {__ sub(sp, framesize)} with a jump to OOL code that does this
260   // "extended stack check".
261   //
262   // The OOL code can simply be generated here with the normal assembler,
263   // because all other code generation, including OOL code, has already finished
264   // when {PatchPrepareStackFrame} is called. The function prologue then jumps
265   // to the current {pc_offset()} to execute the OOL code for allocating the
266   // large frame.
267 
268   // Emit the unconditional branch in the function prologue (from {offset} to
269   // {pc_offset()}).
270   patching_assembler.jmp_rel(pc_offset() - offset);
271   DCHECK_GE(liftoff::kSubSpSize, patching_assembler.pc_offset());
272   patching_assembler.Nop(liftoff::kSubSpSize - patching_assembler.pc_offset());
273 
274   // If the frame is bigger than the stack, we throw the stack overflow
275   // exception unconditionally. Thereby we can avoid the integer overflow
276   // check in the condition code.
277   RecordComment("OOL: stack check for large frame");
278   Label continuation;
279   if (frame_size < FLAG_stack_size * 1024) {
280     // We do not have a scratch register, so pick any and push it first.
281     Register stack_limit = eax;
282     push(stack_limit);
283     mov(stack_limit,
284         FieldOperand(kWasmInstanceRegister,
285                      WasmInstanceObject::kRealStackLimitAddressOffset));
286     mov(stack_limit, Operand(stack_limit, 0));
287     add(stack_limit, Immediate(frame_size));
288     cmp(esp, stack_limit);
289     pop(stack_limit);
290     j(above_equal, &continuation, Label::kNear);
291   }
292 
293   wasm_call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
294   // The call will not return; just define an empty safepoint.
295   safepoint_table_builder->DefineSafepoint(this);
296   AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
297 
298   bind(&continuation);
299 
300   // Now allocate the stack space. Note that this might do more than just
301   // decrementing the SP; consult {TurboAssembler::AllocateStackSpace}.
302   AllocateStackSpace(frame_size);
303 
304   // Jump back to the start of the function, from {pc_offset()} to
305   // right after the reserved space for the {__ sub(sp, sp, framesize)} (which
306   // is a branch now).
307   int func_start_offset = offset + liftoff::kSubSpSize;
308   jmp_rel(func_start_offset - pc_offset());
309 }
310 
FinishCode()311 void LiftoffAssembler::FinishCode() {}
312 
AbortCompilation()313 void LiftoffAssembler::AbortCompilation() {}
314 
315 // static
StaticStackFrameSize()316 constexpr int LiftoffAssembler::StaticStackFrameSize() {
317   return liftoff::kTierupBudgetOffset;
318 }
319 
SlotSizeForType(ValueKind kind)320 int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
321   return value_kind_full_size(kind);
322 }
323 
NeedsAlignment(ValueKind kind)324 bool LiftoffAssembler::NeedsAlignment(ValueKind kind) {
325   return is_reference(kind);
326 }
327 
LoadConstant(LiftoffRegister reg,WasmValue value,RelocInfo::Mode rmode)328 void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
329                                     RelocInfo::Mode rmode) {
330   switch (value.type().kind()) {
331     case kI32:
332       TurboAssembler::Move(reg.gp(), Immediate(value.to_i32(), rmode));
333       break;
334     case kI64: {
335       DCHECK(RelocInfo::IsNoInfo(rmode));
336       int32_t low_word = value.to_i64();
337       int32_t high_word = value.to_i64() >> 32;
338       TurboAssembler::Move(reg.low_gp(), Immediate(low_word));
339       TurboAssembler::Move(reg.high_gp(), Immediate(high_word));
340       break;
341     }
342     case kF32:
343       TurboAssembler::Move(reg.fp(), value.to_f32_boxed().get_bits());
344       break;
345     case kF64:
346       TurboAssembler::Move(reg.fp(), value.to_f64_boxed().get_bits());
347       break;
348     default:
349       UNREACHABLE();
350   }
351 }
352 
LoadInstanceFromFrame(Register dst)353 void LiftoffAssembler::LoadInstanceFromFrame(Register dst) {
354   mov(dst, liftoff::GetInstanceOperand());
355 }
356 
LoadFromInstance(Register dst,Register instance,int offset,int size)357 void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
358                                         int offset, int size) {
359   DCHECK_LE(0, offset);
360   Operand src{instance, offset};
361   switch (size) {
362     case 1:
363       movzx_b(dst, src);
364       break;
365     case 4:
366       mov(dst, src);
367       break;
368     default:
369       UNIMPLEMENTED();
370   }
371 }
372 
LoadTaggedPointerFromInstance(Register dst,Register instance,int offset)373 void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
374                                                      Register instance,
375                                                      int offset) {
376   STATIC_ASSERT(kTaggedSize == kSystemPointerSize);
377   mov(dst, Operand{instance, offset});
378 }
379 
SpillInstance(Register instance)380 void LiftoffAssembler::SpillInstance(Register instance) {
381   mov(liftoff::GetInstanceOperand(), instance);
382 }
383 
ResetOSRTarget()384 void LiftoffAssembler::ResetOSRTarget() {}
385 
LoadTaggedPointer(Register dst,Register src_addr,Register offset_reg,int32_t offset_imm,LiftoffRegList pinned)386 void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
387                                          Register offset_reg,
388                                          int32_t offset_imm,
389                                          LiftoffRegList pinned) {
390   DCHECK_GE(offset_imm, 0);
391   STATIC_ASSERT(kTaggedSize == kInt32Size);
392   Load(LiftoffRegister(dst), src_addr, offset_reg,
393        static_cast<uint32_t>(offset_imm), LoadType::kI32Load, pinned);
394 }
395 
LoadFullPointer(Register dst,Register src_addr,int32_t offset_imm)396 void LiftoffAssembler::LoadFullPointer(Register dst, Register src_addr,
397                                        int32_t offset_imm) {
398   mov(dst, Operand(src_addr, offset_imm));
399 }
400 
StoreTaggedPointer(Register dst_addr,Register offset_reg,int32_t offset_imm,LiftoffRegister src,LiftoffRegList pinned,SkipWriteBarrier skip_write_barrier)401 void LiftoffAssembler::StoreTaggedPointer(Register dst_addr,
402                                           Register offset_reg,
403                                           int32_t offset_imm,
404                                           LiftoffRegister src,
405                                           LiftoffRegList pinned,
406                                           SkipWriteBarrier skip_write_barrier) {
407   DCHECK_GE(offset_imm, 0);
408   DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
409   STATIC_ASSERT(kTaggedSize == kInt32Size);
410   Operand dst_op = offset_reg == no_reg
411                        ? Operand(dst_addr, offset_imm)
412                        : Operand(dst_addr, offset_reg, times_1, offset_imm);
413   mov(dst_op, src.gp());
414 
415   if (skip_write_barrier || FLAG_disable_write_barriers) return;
416 
417   Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
418   Label write_barrier;
419   Label exit;
420   CheckPageFlag(dst_addr, scratch,
421                 MemoryChunk::kPointersFromHereAreInterestingMask, not_zero,
422                 &write_barrier, Label::kNear);
423   jmp(&exit, Label::kNear);
424   bind(&write_barrier);
425   JumpIfSmi(src.gp(), &exit, Label::kNear);
426   CheckPageFlag(src.gp(), scratch,
427                 MemoryChunk::kPointersToHereAreInterestingMask, zero, &exit,
428                 Label::kNear);
429   lea(scratch, dst_op);
430   CallRecordWriteStubSaveRegisters(
431       dst_addr, scratch, RememberedSetAction::kEmit, SaveFPRegsMode::kSave,
432       StubCallMode::kCallWasmRuntimeStub);
433   bind(&exit);
434 }
435 
Load(LiftoffRegister dst,Register src_addr,Register offset_reg,uint32_t offset_imm,LoadType type,LiftoffRegList pinned,uint32_t * protected_load_pc,bool is_load_mem,bool i64_offset)436 void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
437                             Register offset_reg, uint32_t offset_imm,
438                             LoadType type, LiftoffRegList pinned,
439                             uint32_t* protected_load_pc, bool is_load_mem,
440                             bool i64_offset) {
441   // Offsets >=2GB are statically OOB on 32-bit systems.
442   DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
443   DCHECK_EQ(type.value_type() == kWasmI64, dst.is_gp_pair());
444   Operand src_op = offset_reg == no_reg
445                        ? Operand(src_addr, offset_imm)
446                        : Operand(src_addr, offset_reg, times_1, offset_imm);
447   if (protected_load_pc) *protected_load_pc = pc_offset();
448 
449   switch (type.value()) {
450     case LoadType::kI32Load8U:
451       movzx_b(dst.gp(), src_op);
452       break;
453     case LoadType::kI32Load8S:
454       movsx_b(dst.gp(), src_op);
455       break;
456     case LoadType::kI64Load8U:
457       movzx_b(dst.low_gp(), src_op);
458       xor_(dst.high_gp(), dst.high_gp());
459       break;
460     case LoadType::kI64Load8S:
461       movsx_b(dst.low_gp(), src_op);
462       liftoff::SignExtendI32ToI64(this, dst);
463       break;
464     case LoadType::kI32Load16U:
465       movzx_w(dst.gp(), src_op);
466       break;
467     case LoadType::kI32Load16S:
468       movsx_w(dst.gp(), src_op);
469       break;
470     case LoadType::kI64Load16U:
471       movzx_w(dst.low_gp(), src_op);
472       xor_(dst.high_gp(), dst.high_gp());
473       break;
474     case LoadType::kI64Load16S:
475       movsx_w(dst.low_gp(), src_op);
476       liftoff::SignExtendI32ToI64(this, dst);
477       break;
478     case LoadType::kI32Load:
479       mov(dst.gp(), src_op);
480       break;
481     case LoadType::kI64Load32U:
482       mov(dst.low_gp(), src_op);
483       xor_(dst.high_gp(), dst.high_gp());
484       break;
485     case LoadType::kI64Load32S:
486       mov(dst.low_gp(), src_op);
487       liftoff::SignExtendI32ToI64(this, dst);
488       break;
489     case LoadType::kI64Load: {
490       // Compute the operand for the load of the upper half.
491       Operand upper_src_op =
492           offset_reg == no_reg
493               ? Operand(src_addr, bit_cast<int32_t>(offset_imm + 4))
494               : Operand(src_addr, offset_reg, times_1, offset_imm + 4);
495       // The high word has to be mov'ed first, such that this is the protected
496       // instruction. The mov of the low word cannot segfault.
497       mov(dst.high_gp(), upper_src_op);
498       mov(dst.low_gp(), src_op);
499       break;
500     }
501     case LoadType::kF32Load:
502       movss(dst.fp(), src_op);
503       break;
504     case LoadType::kF64Load:
505       movsd(dst.fp(), src_op);
506       break;
507     case LoadType::kS128Load:
508       movdqu(dst.fp(), src_op);
509       break;
510   }
511 }
512 
Store(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList pinned,uint32_t * protected_store_pc,bool is_store_mem)513 void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
514                              uint32_t offset_imm, LiftoffRegister src,
515                              StoreType type, LiftoffRegList pinned,
516                              uint32_t* protected_store_pc, bool is_store_mem) {
517   DCHECK_EQ(type.value_type() == kWasmI64, src.is_gp_pair());
518   // Offsets >=2GB are statically OOB on 32-bit systems.
519   DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
520   Operand dst_op = offset_reg == no_reg
521                        ? Operand(dst_addr, offset_imm)
522                        : Operand(dst_addr, offset_reg, times_1, offset_imm);
523   if (protected_store_pc) *protected_store_pc = pc_offset();
524 
525   switch (type.value()) {
526     case StoreType::kI64Store8:
527       src = src.low();
528       V8_FALLTHROUGH;
529     case StoreType::kI32Store8:
530       // Only the lower 4 registers can be addressed as 8-bit registers.
531       if (src.gp().is_byte_register()) {
532         mov_b(dst_op, src.gp());
533       } else {
534         // We know that {src} is not a byte register, so the only pinned byte
535         // registers (beside the outer {pinned}) are {dst_addr} and potentially
536         // {offset_reg}.
537         LiftoffRegList pinned_byte = pinned | LiftoffRegList{dst_addr};
538         if (offset_reg != no_reg) pinned_byte.set(offset_reg);
539         Register byte_src =
540             GetUnusedRegister(liftoff::kByteRegs.MaskOut(pinned_byte)).gp();
541         mov(byte_src, src.gp());
542         mov_b(dst_op, byte_src);
543       }
544       break;
545     case StoreType::kI64Store16:
546       src = src.low();
547       V8_FALLTHROUGH;
548     case StoreType::kI32Store16:
549       mov_w(dst_op, src.gp());
550       break;
551     case StoreType::kI64Store32:
552       src = src.low();
553       V8_FALLTHROUGH;
554     case StoreType::kI32Store:
555       mov(dst_op, src.gp());
556       break;
557     case StoreType::kI64Store: {
558       // Compute the operand for the store of the upper half.
559       Operand upper_dst_op =
560           offset_reg == no_reg
561               ? Operand(dst_addr, bit_cast<int32_t>(offset_imm + 4))
562               : Operand(dst_addr, offset_reg, times_1, offset_imm + 4);
563       // The high word has to be mov'ed first, such that this is the protected
564       // instruction. The mov of the low word cannot segfault.
565       mov(upper_dst_op, src.high_gp());
566       mov(dst_op, src.low_gp());
567       break;
568     }
569     case StoreType::kF32Store:
570       movss(dst_op, src.fp());
571       break;
572     case StoreType::kF64Store:
573       movsd(dst_op, src.fp());
574       break;
575     case StoreType::kS128Store:
576       Movdqu(dst_op, src.fp());
577       break;
578   }
579 }
580 
AtomicLoad(LiftoffRegister dst,Register src_addr,Register offset_reg,uint32_t offset_imm,LoadType type,LiftoffRegList pinned)581 void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
582                                   Register offset_reg, uint32_t offset_imm,
583                                   LoadType type, LiftoffRegList pinned) {
584   if (type.value() != LoadType::kI64Load) {
585     Load(dst, src_addr, offset_reg, offset_imm, type, pinned, nullptr, true);
586     return;
587   }
588 
589   DCHECK_EQ(type.value_type() == kWasmI64, dst.is_gp_pair());
590   DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
591   Operand src_op = offset_reg == no_reg
592                        ? Operand(src_addr, offset_imm)
593                        : Operand(src_addr, offset_reg, times_1, offset_imm);
594 
595   movsd(liftoff::kScratchDoubleReg, src_op);
596   Pextrd(dst.low().gp(), liftoff::kScratchDoubleReg, 0);
597   Pextrd(dst.high().gp(), liftoff::kScratchDoubleReg, 1);
598 }
599 
AtomicStore(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList pinned)600 void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
601                                    uint32_t offset_imm, LiftoffRegister src,
602                                    StoreType type, LiftoffRegList pinned) {
603   DCHECK_NE(offset_reg, no_reg);
604   DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
605   Operand dst_op = Operand(dst_addr, offset_reg, times_1, offset_imm);
606 
607   // i64 store uses a totally different approach, hence implement it separately.
608   if (type.value() == StoreType::kI64Store) {
609     auto scratch2 = GetUnusedRegister(kFpReg, pinned).fp();
610     movd(liftoff::kScratchDoubleReg, src.low().gp());
611     movd(scratch2, src.high().gp());
612     Punpckldq(liftoff::kScratchDoubleReg, scratch2);
613     movsd(dst_op, liftoff::kScratchDoubleReg);
614     // This lock+or is needed to achieve sequential consistency.
615     lock();
616     or_(Operand(esp, 0), Immediate(0));
617     return;
618   }
619 
620   // Other i64 stores actually only use the low word.
621   if (src.is_pair()) src = src.low();
622   Register src_gp = src.gp();
623 
624   bool is_byte_store = type.size() == 1;
625   LiftoffRegList src_candidates =
626       is_byte_store ? liftoff::kByteRegs : kGpCacheRegList;
627   pinned = pinned | LiftoffRegList{dst_addr, src, offset_reg};
628 
629   // Ensure that {src} is a valid and otherwise unused register.
630   if (!src_candidates.has(src) || cache_state_.is_used(src)) {
631     // If there are no unused candidate registers, but {src} is a candidate,
632     // then spill other uses of {src}. Otherwise spill any candidate register
633     // and use that.
634     LiftoffRegList unpinned_candidates = src_candidates.MaskOut(pinned);
635     if (!cache_state_.has_unused_register(unpinned_candidates) &&
636         src_candidates.has(src)) {
637       SpillRegister(src);
638     } else {
639       Register safe_src = GetUnusedRegister(unpinned_candidates).gp();
640       mov(safe_src, src_gp);
641       src_gp = safe_src;
642     }
643   }
644 
645   switch (type.value()) {
646     case StoreType::kI64Store8:
647     case StoreType::kI32Store8:
648       xchg_b(src_gp, dst_op);
649       return;
650     case StoreType::kI64Store16:
651     case StoreType::kI32Store16:
652       xchg_w(src_gp, dst_op);
653       return;
654     case StoreType::kI64Store32:
655     case StoreType::kI32Store:
656       xchg(src_gp, dst_op);
657       return;
658     default:
659       UNREACHABLE();
660   }
661 }
662 
663 namespace liftoff {
664 #define __ lasm->
665 
666 enum Binop { kAdd, kSub, kAnd, kOr, kXor, kExchange };
667 
AtomicAddOrSubOrExchange32(LiftoffAssembler * lasm,Binop binop,Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)668 inline void AtomicAddOrSubOrExchange32(LiftoffAssembler* lasm, Binop binop,
669                                        Register dst_addr, Register offset_reg,
670                                        uint32_t offset_imm,
671                                        LiftoffRegister value,
672                                        LiftoffRegister result, StoreType type) {
673   DCHECK_EQ(value, result);
674   DCHECK(!__ cache_state()->is_used(result));
675   bool is_64_bit_op = type.value_type() == kWasmI64;
676 
677   Register value_reg = is_64_bit_op ? value.low_gp() : value.gp();
678   Register result_reg = is_64_bit_op ? result.low_gp() : result.gp();
679 
680   bool is_byte_store = type.size() == 1;
681   LiftoffRegList pinned = {dst_addr, value_reg, offset_reg};
682 
683   // Ensure that {value_reg} is a valid register.
684   if (is_byte_store && !liftoff::kByteRegs.has(value_reg)) {
685     Register safe_value_reg =
686         __ GetUnusedRegister(liftoff::kByteRegs.MaskOut(pinned)).gp();
687     __ mov(safe_value_reg, value_reg);
688     value_reg = safe_value_reg;
689   }
690 
691   Operand dst_op = Operand(dst_addr, offset_reg, times_1, offset_imm);
692   if (binop == kSub) {
693     __ neg(value_reg);
694   }
695   if (binop != kExchange) {
696     __ lock();
697   }
698   switch (type.value()) {
699     case StoreType::kI64Store8:
700     case StoreType::kI32Store8:
701       if (binop == kExchange) {
702         __ xchg_b(value_reg, dst_op);
703       } else {
704         __ xadd_b(dst_op, value_reg);
705       }
706       __ movzx_b(result_reg, value_reg);
707       break;
708     case StoreType::kI64Store16:
709     case StoreType::kI32Store16:
710       if (binop == kExchange) {
711         __ xchg_w(value_reg, dst_op);
712       } else {
713         __ xadd_w(dst_op, value_reg);
714       }
715       __ movzx_w(result_reg, value_reg);
716       break;
717     case StoreType::kI64Store32:
718     case StoreType::kI32Store:
719       if (binop == kExchange) {
720         __ xchg(value_reg, dst_op);
721       } else {
722         __ xadd(dst_op, value_reg);
723       }
724       if (value_reg != result_reg) {
725         __ mov(result_reg, value_reg);
726       }
727       break;
728     default:
729       UNREACHABLE();
730   }
731   if (is_64_bit_op) {
732     __ xor_(result.high_gp(), result.high_gp());
733   }
734 }
735 
AtomicBinop32(LiftoffAssembler * lasm,Binop op,Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)736 inline void AtomicBinop32(LiftoffAssembler* lasm, Binop op, Register dst_addr,
737                           Register offset_reg, uint32_t offset_imm,
738                           LiftoffRegister value, LiftoffRegister result,
739                           StoreType type) {
740   DCHECK_EQ(value, result);
741   DCHECK(!__ cache_state()->is_used(result));
742   bool is_64_bit_op = type.value_type() == kWasmI64;
743 
744   Register value_reg = is_64_bit_op ? value.low_gp() : value.gp();
745   Register result_reg = is_64_bit_op ? result.low_gp() : result.gp();
746 
747   // The cmpxchg instruction uses eax to store the old value of the
748   // compare-exchange primitive. Therefore we have to spill the register and
749   // move any use to another register.
750   __ ClearRegister(eax, {&dst_addr, &offset_reg, &value_reg},
751                    LiftoffRegList{dst_addr, offset_reg, value_reg});
752 
753   bool is_byte_store = type.size() == 1;
754   Register scratch = no_reg;
755   if (is_byte_store) {
756     // The scratch register has to be a byte register. As we are already tight
757     // on registers, we just use the root register here.
758     static_assert(!kLiftoffAssemblerGpCacheRegs.has(kRootRegister),
759                   "root register is not Liftoff cache register");
760     DCHECK(kRootRegister.is_byte_register());
761     __ push(kRootRegister);
762     scratch = kRootRegister;
763   } else {
764     scratch = __ GetUnusedRegister(
765                   kGpReg, LiftoffRegList{dst_addr, offset_reg, value_reg, eax})
766                   .gp();
767   }
768 
769   Operand dst_op = Operand(dst_addr, offset_reg, times_1, offset_imm);
770 
771   switch (type.value()) {
772     case StoreType::kI32Store8:
773     case StoreType::kI64Store8: {
774       __ xor_(eax, eax);
775       __ mov_b(eax, dst_op);
776       break;
777     }
778     case StoreType::kI32Store16:
779     case StoreType::kI64Store16: {
780       __ xor_(eax, eax);
781       __ mov_w(eax, dst_op);
782       break;
783     }
784     case StoreType::kI32Store:
785     case StoreType::kI64Store32: {
786       __ mov(eax, dst_op);
787       break;
788     }
789     default:
790       UNREACHABLE();
791   }
792 
793   Label binop;
794   __ bind(&binop);
795   __ mov(scratch, eax);
796 
797   switch (op) {
798     case kAnd: {
799       __ and_(scratch, value_reg);
800       break;
801     }
802     case kOr: {
803       __ or_(scratch, value_reg);
804       break;
805     }
806     case kXor: {
807       __ xor_(scratch, value_reg);
808       break;
809     }
810     default:
811       UNREACHABLE();
812   }
813 
814   __ lock();
815 
816   switch (type.value()) {
817     case StoreType::kI32Store8:
818     case StoreType::kI64Store8: {
819       __ cmpxchg_b(dst_op, scratch);
820       break;
821     }
822     case StoreType::kI32Store16:
823     case StoreType::kI64Store16: {
824       __ cmpxchg_w(dst_op, scratch);
825       break;
826     }
827     case StoreType::kI32Store:
828     case StoreType::kI64Store32: {
829       __ cmpxchg(dst_op, scratch);
830       break;
831     }
832     default:
833       UNREACHABLE();
834   }
835   __ j(not_equal, &binop);
836 
837   if (is_byte_store) {
838     __ pop(kRootRegister);
839   }
840   if (result_reg != eax) {
841     __ mov(result_reg, eax);
842   }
843   if (is_64_bit_op) {
844     __ xor_(result.high_gp(), result.high_gp());
845   }
846 }
847 
AtomicBinop64(LiftoffAssembler * lasm,Binop op,Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result)848 inline void AtomicBinop64(LiftoffAssembler* lasm, Binop op, Register dst_addr,
849                           Register offset_reg, uint32_t offset_imm,
850                           LiftoffRegister value, LiftoffRegister result) {
851   // We need {ebx} here, which is the root register. As the root register it
852   // needs special treatment. As we use {ebx} directly in the code below, we
853   // have to make sure here that the root register is actually {ebx}.
854   static_assert(kRootRegister == ebx,
855                 "The following code assumes that kRootRegister == ebx");
856   __ push(ebx);
857 
858   // Store the value on the stack, so that we can use it for retries.
859   __ AllocateStackSpace(8);
860   Operand value_op_hi = Operand(esp, 0);
861   Operand value_op_lo = Operand(esp, 4);
862   __ mov(value_op_lo, value.low_gp());
863   __ mov(value_op_hi, value.high_gp());
864 
865   // We want to use the compare-exchange instruction here. It uses registers
866   // as follows: old-value = EDX:EAX; new-value = ECX:EBX.
867   Register old_hi = edx;
868   Register old_lo = eax;
869   Register new_hi = ecx;
870   Register new_lo = ebx;
871   // Base and offset need separate registers that do not alias with the
872   // ones above.
873   Register base = esi;
874   Register offset = edi;
875 
876   // Swap base and offset register if necessary to avoid unnecessary
877   // moves.
878   if (dst_addr == offset || offset_reg == base) {
879     std::swap(dst_addr, offset_reg);
880   }
881   // Spill all these registers if they are still holding other values.
882   __ SpillRegisters(old_hi, old_lo, new_hi, base, offset);
883   __ ParallelRegisterMove(
884       {{LiftoffRegister::ForPair(base, offset),
885         LiftoffRegister::ForPair(dst_addr, offset_reg), kI64}});
886 
887   Operand dst_op_lo = Operand(base, offset, times_1, offset_imm);
888   Operand dst_op_hi = Operand(base, offset, times_1, offset_imm + 4);
889 
890   // Load the old value from memory.
891   __ mov(old_lo, dst_op_lo);
892   __ mov(old_hi, dst_op_hi);
893   Label retry;
894   __ bind(&retry);
895   __ mov(new_lo, old_lo);
896   __ mov(new_hi, old_hi);
897   switch (op) {
898     case kAdd:
899       __ add(new_lo, value_op_lo);
900       __ adc(new_hi, value_op_hi);
901       break;
902     case kSub:
903       __ sub(new_lo, value_op_lo);
904       __ sbb(new_hi, value_op_hi);
905       break;
906     case kAnd:
907       __ and_(new_lo, value_op_lo);
908       __ and_(new_hi, value_op_hi);
909       break;
910     case kOr:
911       __ or_(new_lo, value_op_lo);
912       __ or_(new_hi, value_op_hi);
913       break;
914     case kXor:
915       __ xor_(new_lo, value_op_lo);
916       __ xor_(new_hi, value_op_hi);
917       break;
918     case kExchange:
919       __ mov(new_lo, value_op_lo);
920       __ mov(new_hi, value_op_hi);
921       break;
922   }
923   __ lock();
924   __ cmpxchg8b(dst_op_lo);
925   __ j(not_equal, &retry);
926 
927   // Deallocate the stack space again.
928   __ add(esp, Immediate(8));
929   // Restore the root register, and we are done.
930   __ pop(kRootRegister);
931 
932   // Move the result into the correct registers.
933   __ ParallelRegisterMove(
934       {{result, LiftoffRegister::ForPair(old_lo, old_hi), kI64}});
935 }
936 
937 #undef __
938 }  // namespace liftoff
939 
AtomicAdd(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)940 void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
941                                  uint32_t offset_imm, LiftoffRegister value,
942                                  LiftoffRegister result, StoreType type) {
943   if (type.value() == StoreType::kI64Store) {
944     liftoff::AtomicBinop64(this, liftoff::kAdd, dst_addr, offset_reg,
945                            offset_imm, value, result);
946     return;
947   }
948 
949   liftoff::AtomicAddOrSubOrExchange32(this, liftoff::kAdd, dst_addr, offset_reg,
950                                       offset_imm, value, result, type);
951 }
952 
AtomicSub(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)953 void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
954                                  uint32_t offset_imm, LiftoffRegister value,
955                                  LiftoffRegister result, StoreType type) {
956   if (type.value() == StoreType::kI64Store) {
957     liftoff::AtomicBinop64(this, liftoff::kSub, dst_addr, offset_reg,
958                            offset_imm, value, result);
959     return;
960   }
961   liftoff::AtomicAddOrSubOrExchange32(this, liftoff::kSub, dst_addr, offset_reg,
962                                       offset_imm, value, result, type);
963 }
964 
AtomicAnd(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)965 void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
966                                  uint32_t offset_imm, LiftoffRegister value,
967                                  LiftoffRegister result, StoreType type) {
968   if (type.value() == StoreType::kI64Store) {
969     liftoff::AtomicBinop64(this, liftoff::kAnd, dst_addr, offset_reg,
970                            offset_imm, value, result);
971     return;
972   }
973 
974   liftoff::AtomicBinop32(this, liftoff::kAnd, dst_addr, offset_reg, offset_imm,
975                          value, result, type);
976 }
977 
AtomicOr(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)978 void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
979                                 uint32_t offset_imm, LiftoffRegister value,
980                                 LiftoffRegister result, StoreType type) {
981   if (type.value() == StoreType::kI64Store) {
982     liftoff::AtomicBinop64(this, liftoff::kOr, dst_addr, offset_reg, offset_imm,
983                            value, result);
984     return;
985   }
986 
987   liftoff::AtomicBinop32(this, liftoff::kOr, dst_addr, offset_reg, offset_imm,
988                          value, result, type);
989 }
990 
AtomicXor(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)991 void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
992                                  uint32_t offset_imm, LiftoffRegister value,
993                                  LiftoffRegister result, StoreType type) {
994   if (type.value() == StoreType::kI64Store) {
995     liftoff::AtomicBinop64(this, liftoff::kXor, dst_addr, offset_reg,
996                            offset_imm, value, result);
997     return;
998   }
999 
1000   liftoff::AtomicBinop32(this, liftoff::kXor, dst_addr, offset_reg, offset_imm,
1001                          value, result, type);
1002 }
1003 
AtomicExchange(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)1004 void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
1005                                       uint32_t offset_imm,
1006                                       LiftoffRegister value,
1007                                       LiftoffRegister result, StoreType type) {
1008   if (type.value() == StoreType::kI64Store) {
1009     liftoff::AtomicBinop64(this, liftoff::kExchange, dst_addr, offset_reg,
1010                            offset_imm, value, result);
1011     return;
1012   }
1013   liftoff::AtomicAddOrSubOrExchange32(this, liftoff::kExchange, dst_addr,
1014                                       offset_reg, offset_imm, value, result,
1015                                       type);
1016 }
1017 
AtomicCompareExchange(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister expected,LiftoffRegister new_value,LiftoffRegister result,StoreType type)1018 void LiftoffAssembler::AtomicCompareExchange(
1019     Register dst_addr, Register offset_reg, uint32_t offset_imm,
1020     LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
1021     StoreType type) {
1022   // We expect that the offset has already been added to {dst_addr}, and no
1023   // {offset_reg} is provided. This is to save registers.
1024   DCHECK_EQ(offset_reg, no_reg);
1025 
1026   DCHECK_EQ(result, expected);
1027 
1028   if (type.value() != StoreType::kI64Store) {
1029     bool is_64_bit_op = type.value_type() == kWasmI64;
1030 
1031     Register value_reg = is_64_bit_op ? new_value.low_gp() : new_value.gp();
1032     Register expected_reg = is_64_bit_op ? expected.low_gp() : expected.gp();
1033     Register result_reg = expected_reg;
1034 
1035     // The cmpxchg instruction uses eax to store the old value of the
1036     // compare-exchange primitive. Therefore we have to spill the register and
1037     // move any use to another register.
1038     ClearRegister(eax, {&dst_addr, &value_reg},
1039                   LiftoffRegList{dst_addr, value_reg, expected_reg});
1040     if (expected_reg != eax) {
1041       mov(eax, expected_reg);
1042       expected_reg = eax;
1043     }
1044 
1045     bool is_byte_store = type.size() == 1;
1046     LiftoffRegList pinned = {dst_addr, value_reg, expected_reg};
1047 
1048     // Ensure that {value_reg} is a valid register.
1049     if (is_byte_store && !liftoff::kByteRegs.has(value_reg)) {
1050       Register safe_value_reg =
1051           pinned.set(GetUnusedRegister(liftoff::kByteRegs.MaskOut(pinned)))
1052               .gp();
1053       mov(safe_value_reg, value_reg);
1054       value_reg = safe_value_reg;
1055       pinned.clear(LiftoffRegister(value_reg));
1056     }
1057 
1058 
1059     Operand dst_op = Operand(dst_addr, offset_imm);
1060 
1061     lock();
1062     switch (type.value()) {
1063       case StoreType::kI32Store8:
1064       case StoreType::kI64Store8: {
1065         cmpxchg_b(dst_op, value_reg);
1066         movzx_b(result_reg, eax);
1067         break;
1068       }
1069       case StoreType::kI32Store16:
1070       case StoreType::kI64Store16: {
1071         cmpxchg_w(dst_op, value_reg);
1072         movzx_w(result_reg, eax);
1073         break;
1074       }
1075       case StoreType::kI32Store:
1076       case StoreType::kI64Store32: {
1077         cmpxchg(dst_op, value_reg);
1078         if (result_reg != eax) {
1079           mov(result_reg, eax);
1080         }
1081         break;
1082       }
1083       default:
1084         UNREACHABLE();
1085     }
1086     if (is_64_bit_op) {
1087       xor_(result.high_gp(), result.high_gp());
1088     }
1089     return;
1090   }
1091 
1092   // The following code handles kExprI64AtomicCompareExchange.
1093 
1094   // We need {ebx} here, which is the root register. The root register it
1095   // needs special treatment. As we use {ebx} directly in the code below, we
1096   // have to make sure here that the root register is actually {ebx}.
1097   static_assert(kRootRegister == ebx,
1098                 "The following code assumes that kRootRegister == ebx");
1099   push(kRootRegister);
1100 
1101   // The compare-exchange instruction uses registers as follows:
1102   // old-value = EDX:EAX; new-value = ECX:EBX.
1103   Register expected_hi = edx;
1104   Register expected_lo = eax;
1105   Register new_hi = ecx;
1106   Register new_lo = ebx;
1107   // The address needs a separate registers that does not alias with the
1108   // ones above.
1109   Register address = esi;
1110 
1111   // Spill all these registers if they are still holding other values.
1112   SpillRegisters(expected_hi, expected_lo, new_hi, address);
1113 
1114   // We have to set new_lo specially, because it's the root register. We do it
1115   // before setting all other registers so that the original value does not get
1116   // overwritten.
1117   mov(new_lo, new_value.low_gp());
1118 
1119   // Move all other values into the right register.
1120   ParallelRegisterMove(
1121       {{LiftoffRegister(address), LiftoffRegister(dst_addr), kI32},
1122        {LiftoffRegister::ForPair(expected_lo, expected_hi), expected, kI64},
1123        {LiftoffRegister(new_hi), new_value.high(), kI32}});
1124 
1125   Operand dst_op = Operand(address, offset_imm);
1126 
1127   lock();
1128   cmpxchg8b(dst_op);
1129 
1130   // Restore the root register, and we are done.
1131   pop(kRootRegister);
1132 
1133   // Move the result into the correct registers.
1134   ParallelRegisterMove(
1135       {{result, LiftoffRegister::ForPair(expected_lo, expected_hi), kI64}});
1136 }
1137 
AtomicFence()1138 void LiftoffAssembler::AtomicFence() { mfence(); }
1139 
LoadCallerFrameSlot(LiftoffRegister dst,uint32_t caller_slot_idx,ValueKind kind)1140 void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
1141                                            uint32_t caller_slot_idx,
1142                                            ValueKind kind) {
1143   liftoff::Load(this, dst, ebp, kSystemPointerSize * (caller_slot_idx + 1),
1144                 kind);
1145 }
1146 
LoadReturnStackSlot(LiftoffRegister reg,int offset,ValueKind kind)1147 void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister reg, int offset,
1148                                            ValueKind kind) {
1149   liftoff::Load(this, reg, esp, offset, kind);
1150 }
1151 
StoreCallerFrameSlot(LiftoffRegister src,uint32_t caller_slot_idx,ValueKind kind)1152 void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
1153                                             uint32_t caller_slot_idx,
1154                                             ValueKind kind) {
1155   liftoff::Store(this, ebp, kSystemPointerSize * (caller_slot_idx + 1), src,
1156                  kind);
1157 }
1158 
MoveStackValue(uint32_t dst_offset,uint32_t src_offset,ValueKind kind)1159 void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
1160                                       ValueKind kind) {
1161   DCHECK_EQ(0, SlotSizeForType(kind) % kSystemPointerSize);
1162   int words = SlotSizeForType(kind) / kSystemPointerSize;
1163   DCHECK_LE(1, words);
1164   // Make sure we move the words in the correct order in case there is an
1165   // overlap between src and dst.
1166   if (src_offset < dst_offset) {
1167     do {
1168       liftoff::MoveStackValue(this, liftoff::GetStackSlot(src_offset),
1169                               liftoff::GetStackSlot(dst_offset));
1170       dst_offset -= kSystemPointerSize;
1171       src_offset -= kSystemPointerSize;
1172     } while (--words);
1173   } else {
1174     while (words--) {
1175       liftoff::MoveStackValue(
1176           this, liftoff::GetStackSlot(src_offset - words * kSystemPointerSize),
1177           liftoff::GetStackSlot(dst_offset - words * kSystemPointerSize));
1178     }
1179   }
1180 }
1181 
Move(Register dst,Register src,ValueKind kind)1182 void LiftoffAssembler::Move(Register dst, Register src, ValueKind kind) {
1183   DCHECK_NE(dst, src);
1184   DCHECK(kI32 == kind || is_reference(kind));
1185   mov(dst, src);
1186 }
1187 
Move(DoubleRegister dst,DoubleRegister src,ValueKind kind)1188 void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
1189                             ValueKind kind) {
1190   DCHECK_NE(dst, src);
1191   if (kind == kF32) {
1192     movss(dst, src);
1193   } else if (kind == kF64) {
1194     movsd(dst, src);
1195   } else {
1196     DCHECK_EQ(kS128, kind);
1197     Movaps(dst, src);
1198   }
1199 }
1200 
Spill(int offset,LiftoffRegister reg,ValueKind kind)1201 void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueKind kind) {
1202   RecordUsedSpillOffset(offset);
1203   Operand dst = liftoff::GetStackSlot(offset);
1204   switch (kind) {
1205     case kI32:
1206     case kOptRef:
1207     case kRef:
1208     case kRtt:
1209       mov(dst, reg.gp());
1210       break;
1211     case kI64:
1212       mov(liftoff::GetHalfStackSlot(offset, kLowWord), reg.low_gp());
1213       mov(liftoff::GetHalfStackSlot(offset, kHighWord), reg.high_gp());
1214       break;
1215     case kF32:
1216       movss(dst, reg.fp());
1217       break;
1218     case kF64:
1219       movsd(dst, reg.fp());
1220       break;
1221     case kS128:
1222       movdqu(dst, reg.fp());
1223       break;
1224     default:
1225       UNREACHABLE();
1226   }
1227 }
1228 
Spill(int offset,WasmValue value)1229 void LiftoffAssembler::Spill(int offset, WasmValue value) {
1230   RecordUsedSpillOffset(offset);
1231   Operand dst = liftoff::GetStackSlot(offset);
1232   switch (value.type().kind()) {
1233     case kI32:
1234       mov(dst, Immediate(value.to_i32()));
1235       break;
1236     case kI64: {
1237       int32_t low_word = value.to_i64();
1238       int32_t high_word = value.to_i64() >> 32;
1239       mov(liftoff::GetHalfStackSlot(offset, kLowWord), Immediate(low_word));
1240       mov(liftoff::GetHalfStackSlot(offset, kHighWord), Immediate(high_word));
1241       break;
1242     }
1243     default:
1244       // We do not track f32 and f64 constants, hence they are unreachable.
1245       UNREACHABLE();
1246   }
1247 }
1248 
Fill(LiftoffRegister reg,int offset,ValueKind kind)1249 void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueKind kind) {
1250   liftoff::Load(this, reg, ebp, -offset, kind);
1251 }
1252 
FillI64Half(Register reg,int offset,RegPairHalf half)1253 void LiftoffAssembler::FillI64Half(Register reg, int offset, RegPairHalf half) {
1254   mov(reg, liftoff::GetHalfStackSlot(offset, half));
1255 }
1256 
FillStackSlotsWithZero(int start,int size)1257 void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
1258   DCHECK_LT(0, size);
1259   DCHECK_EQ(0, size % 4);
1260   RecordUsedSpillOffset(start + size);
1261 
1262   if (size <= 12) {
1263     // Special straight-line code for up to three words (6-9 bytes per word:
1264     // C7 <1-4 bytes operand> <4 bytes imm>, makes 18-27 bytes total).
1265     for (int offset = 4; offset <= size; offset += 4) {
1266       mov(liftoff::GetHalfStackSlot(start + offset, kLowWord), Immediate(0));
1267     }
1268   } else {
1269     // General case for bigger counts.
1270     // This sequence takes 19-22 bytes (3 for pushes, 3-6 for lea, 2 for xor, 5
1271     // for mov, 3 for repstosq, 3 for pops).
1272     // Note: rep_stos fills ECX doublewords at [EDI] with EAX.
1273     push(eax);
1274     push(ecx);
1275     push(edi);
1276     lea(edi, liftoff::GetStackSlot(start + size));
1277     xor_(eax, eax);
1278     // Size is in bytes, convert to doublewords (4-bytes).
1279     mov(ecx, Immediate(size / 4));
1280     rep_stos();
1281     pop(edi);
1282     pop(ecx);
1283     pop(eax);
1284   }
1285 }
1286 
emit_i32_add(Register dst,Register lhs,Register rhs)1287 void LiftoffAssembler::emit_i32_add(Register dst, Register lhs, Register rhs) {
1288   if (lhs != dst) {
1289     lea(dst, Operand(lhs, rhs, times_1, 0));
1290   } else {
1291     add(dst, rhs);
1292   }
1293 }
1294 
emit_i32_addi(Register dst,Register lhs,int32_t imm)1295 void LiftoffAssembler::emit_i32_addi(Register dst, Register lhs, int32_t imm) {
1296   if (lhs != dst) {
1297     lea(dst, Operand(lhs, imm));
1298   } else {
1299     add(dst, Immediate(imm));
1300   }
1301 }
1302 
emit_i32_sub(Register dst,Register lhs,Register rhs)1303 void LiftoffAssembler::emit_i32_sub(Register dst, Register lhs, Register rhs) {
1304   if (dst != rhs) {
1305     // Default path.
1306     if (dst != lhs) mov(dst, lhs);
1307     sub(dst, rhs);
1308   } else if (lhs == rhs) {
1309     // Degenerate case.
1310     xor_(dst, dst);
1311   } else {
1312     // Emit {dst = lhs + -rhs} if dst == rhs.
1313     neg(dst);
1314     add(dst, lhs);
1315   }
1316 }
1317 
emit_i32_subi(Register dst,Register lhs,int32_t imm)1318 void LiftoffAssembler::emit_i32_subi(Register dst, Register lhs, int32_t imm) {
1319   if (dst != lhs) {
1320     // We'll have to implement an UB-safe version if we need this corner case.
1321     DCHECK_NE(imm, kMinInt);
1322     lea(dst, Operand(lhs, -imm));
1323   } else {
1324     sub(dst, Immediate(imm));
1325   }
1326 }
1327 
1328 namespace liftoff {
1329 template <void (Assembler::*op)(Register, Register)>
EmitCommutativeBinOp(LiftoffAssembler * assm,Register dst,Register lhs,Register rhs)1330 void EmitCommutativeBinOp(LiftoffAssembler* assm, Register dst, Register lhs,
1331                           Register rhs) {
1332   if (dst == rhs) {
1333     (assm->*op)(dst, lhs);
1334   } else {
1335     if (dst != lhs) assm->mov(dst, lhs);
1336     (assm->*op)(dst, rhs);
1337   }
1338 }
1339 
1340 template <void (Assembler::*op)(Register, int32_t)>
EmitCommutativeBinOpImm(LiftoffAssembler * assm,Register dst,Register lhs,int32_t imm)1341 void EmitCommutativeBinOpImm(LiftoffAssembler* assm, Register dst, Register lhs,
1342                              int32_t imm) {
1343   if (dst != lhs) assm->mov(dst, lhs);
1344   (assm->*op)(dst, imm);
1345 }
1346 }  // namespace liftoff
1347 
emit_i32_mul(Register dst,Register lhs,Register rhs)1348 void LiftoffAssembler::emit_i32_mul(Register dst, Register lhs, Register rhs) {
1349   liftoff::EmitCommutativeBinOp<&Assembler::imul>(this, dst, lhs, rhs);
1350 }
1351 
1352 namespace liftoff {
1353 enum class DivOrRem : uint8_t { kDiv, kRem };
1354 template <bool is_signed, DivOrRem div_or_rem>
EmitInt32DivOrRem(LiftoffAssembler * assm,Register dst,Register lhs,Register rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1355 void EmitInt32DivOrRem(LiftoffAssembler* assm, Register dst, Register lhs,
1356                        Register rhs, Label* trap_div_by_zero,
1357                        Label* trap_div_unrepresentable) {
1358   constexpr bool needs_unrepresentable_check =
1359       is_signed && div_or_rem == DivOrRem::kDiv;
1360   constexpr bool special_case_minus_1 =
1361       is_signed && div_or_rem == DivOrRem::kRem;
1362   DCHECK_EQ(needs_unrepresentable_check, trap_div_unrepresentable != nullptr);
1363 
1364   // For division, the lhs is always taken from {edx:eax}. Thus, make sure that
1365   // these registers are unused. If {rhs} is stored in one of them, move it to
1366   // another temporary register.
1367   // Do all this before any branch, such that the code is executed
1368   // unconditionally, as the cache state will also be modified unconditionally.
1369   assm->SpillRegisters(eax, edx);
1370   if (rhs == eax || rhs == edx) {
1371     LiftoffRegList unavailable{eax, edx, lhs};
1372     Register tmp = assm->GetUnusedRegister(kGpReg, unavailable).gp();
1373     assm->mov(tmp, rhs);
1374     rhs = tmp;
1375   }
1376 
1377   // Check for division by zero.
1378   assm->test(rhs, rhs);
1379   assm->j(zero, trap_div_by_zero);
1380 
1381   Label done;
1382   if (needs_unrepresentable_check) {
1383     // Check for {kMinInt / -1}. This is unrepresentable.
1384     Label do_div;
1385     assm->cmp(rhs, -1);
1386     assm->j(not_equal, &do_div);
1387     assm->cmp(lhs, kMinInt);
1388     assm->j(equal, trap_div_unrepresentable);
1389     assm->bind(&do_div);
1390   } else if (special_case_minus_1) {
1391     // {lhs % -1} is always 0 (needs to be special cased because {kMinInt / -1}
1392     // cannot be computed).
1393     Label do_rem;
1394     assm->cmp(rhs, -1);
1395     assm->j(not_equal, &do_rem);
1396     assm->xor_(dst, dst);
1397     assm->jmp(&done);
1398     assm->bind(&do_rem);
1399   }
1400 
1401   // Now move {lhs} into {eax}, then zero-extend or sign-extend into {edx}, then
1402   // do the division.
1403   if (lhs != eax) assm->mov(eax, lhs);
1404   if (is_signed) {
1405     assm->cdq();
1406     assm->idiv(rhs);
1407   } else {
1408     assm->xor_(edx, edx);
1409     assm->div(rhs);
1410   }
1411 
1412   // Move back the result (in {eax} or {edx}) into the {dst} register.
1413   constexpr Register kResultReg = div_or_rem == DivOrRem::kDiv ? eax : edx;
1414   if (dst != kResultReg) assm->mov(dst, kResultReg);
1415   if (special_case_minus_1) assm->bind(&done);
1416 }
1417 }  // namespace liftoff
1418 
emit_i32_divs(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1419 void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
1420                                      Label* trap_div_by_zero,
1421                                      Label* trap_div_unrepresentable) {
1422   liftoff::EmitInt32DivOrRem<true, liftoff::DivOrRem::kDiv>(
1423       this, dst, lhs, rhs, trap_div_by_zero, trap_div_unrepresentable);
1424 }
1425 
emit_i32_divu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1426 void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
1427                                      Label* trap_div_by_zero) {
1428   liftoff::EmitInt32DivOrRem<false, liftoff::DivOrRem::kDiv>(
1429       this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1430 }
1431 
emit_i32_rems(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1432 void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
1433                                      Label* trap_div_by_zero) {
1434   liftoff::EmitInt32DivOrRem<true, liftoff::DivOrRem::kRem>(
1435       this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1436 }
1437 
emit_i32_remu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1438 void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
1439                                      Label* trap_div_by_zero) {
1440   liftoff::EmitInt32DivOrRem<false, liftoff::DivOrRem::kRem>(
1441       this, dst, lhs, rhs, trap_div_by_zero, nullptr);
1442 }
1443 
emit_i32_and(Register dst,Register lhs,Register rhs)1444 void LiftoffAssembler::emit_i32_and(Register dst, Register lhs, Register rhs) {
1445   liftoff::EmitCommutativeBinOp<&Assembler::and_>(this, dst, lhs, rhs);
1446 }
1447 
emit_i32_andi(Register dst,Register lhs,int32_t imm)1448 void LiftoffAssembler::emit_i32_andi(Register dst, Register lhs, int32_t imm) {
1449   liftoff::EmitCommutativeBinOpImm<&Assembler::and_>(this, dst, lhs, imm);
1450 }
1451 
emit_i32_or(Register dst,Register lhs,Register rhs)1452 void LiftoffAssembler::emit_i32_or(Register dst, Register lhs, Register rhs) {
1453   liftoff::EmitCommutativeBinOp<&Assembler::or_>(this, dst, lhs, rhs);
1454 }
1455 
emit_i32_ori(Register dst,Register lhs,int32_t imm)1456 void LiftoffAssembler::emit_i32_ori(Register dst, Register lhs, int32_t imm) {
1457   liftoff::EmitCommutativeBinOpImm<&Assembler::or_>(this, dst, lhs, imm);
1458 }
1459 
emit_i32_xor(Register dst,Register lhs,Register rhs)1460 void LiftoffAssembler::emit_i32_xor(Register dst, Register lhs, Register rhs) {
1461   liftoff::EmitCommutativeBinOp<&Assembler::xor_>(this, dst, lhs, rhs);
1462 }
1463 
emit_i32_xori(Register dst,Register lhs,int32_t imm)1464 void LiftoffAssembler::emit_i32_xori(Register dst, Register lhs, int32_t imm) {
1465   liftoff::EmitCommutativeBinOpImm<&Assembler::xor_>(this, dst, lhs, imm);
1466 }
1467 
1468 namespace liftoff {
EmitShiftOperation(LiftoffAssembler * assm,Register dst,Register src,Register amount,void (Assembler::* emit_shift)(Register))1469 inline void EmitShiftOperation(LiftoffAssembler* assm, Register dst,
1470                                Register src, Register amount,
1471                                void (Assembler::*emit_shift)(Register)) {
1472   LiftoffRegList pinned = {dst, src, amount};
1473   // If dst is ecx, compute into a tmp register first, then move to ecx.
1474   if (dst == ecx) {
1475     Register tmp = assm->GetUnusedRegister(kGpReg, pinned).gp();
1476     assm->mov(tmp, src);
1477     if (amount != ecx) assm->mov(ecx, amount);
1478     (assm->*emit_shift)(tmp);
1479     assm->mov(ecx, tmp);
1480     return;
1481   }
1482 
1483   // Move amount into ecx. If ecx is in use, move its content to a tmp register
1484   // first. If src is ecx, src is now the tmp register.
1485   Register tmp_reg = no_reg;
1486   if (amount != ecx) {
1487     if (assm->cache_state()->is_used(LiftoffRegister(ecx)) ||
1488         pinned.has(LiftoffRegister(ecx))) {
1489       tmp_reg = assm->GetUnusedRegister(kGpReg, pinned).gp();
1490       assm->mov(tmp_reg, ecx);
1491       if (src == ecx) src = tmp_reg;
1492     }
1493     assm->mov(ecx, amount);
1494   }
1495 
1496   // Do the actual shift.
1497   if (dst != src) assm->mov(dst, src);
1498   (assm->*emit_shift)(dst);
1499 
1500   // Restore ecx if needed.
1501   if (tmp_reg.is_valid()) assm->mov(ecx, tmp_reg);
1502 }
1503 }  // namespace liftoff
1504 
emit_i32_shl(Register dst,Register src,Register amount)1505 void LiftoffAssembler::emit_i32_shl(Register dst, Register src,
1506                                     Register amount) {
1507   liftoff::EmitShiftOperation(this, dst, src, amount, &Assembler::shl_cl);
1508 }
1509 
emit_i32_shli(Register dst,Register src,int32_t amount)1510 void LiftoffAssembler::emit_i32_shli(Register dst, Register src,
1511                                      int32_t amount) {
1512   if (dst != src) mov(dst, src);
1513   shl(dst, amount & 31);
1514 }
1515 
emit_i32_sar(Register dst,Register src,Register amount)1516 void LiftoffAssembler::emit_i32_sar(Register dst, Register src,
1517                                     Register amount) {
1518   liftoff::EmitShiftOperation(this, dst, src, amount, &Assembler::sar_cl);
1519 }
1520 
emit_i32_sari(Register dst,Register src,int32_t amount)1521 void LiftoffAssembler::emit_i32_sari(Register dst, Register src,
1522                                      int32_t amount) {
1523   if (dst != src) mov(dst, src);
1524   sar(dst, amount & 31);
1525 }
1526 
emit_i32_shr(Register dst,Register src,Register amount)1527 void LiftoffAssembler::emit_i32_shr(Register dst, Register src,
1528                                     Register amount) {
1529   liftoff::EmitShiftOperation(this, dst, src, amount, &Assembler::shr_cl);
1530 }
1531 
emit_i32_shri(Register dst,Register src,int32_t amount)1532 void LiftoffAssembler::emit_i32_shri(Register dst, Register src,
1533                                      int32_t amount) {
1534   if (dst != src) mov(dst, src);
1535   shr(dst, amount & 31);
1536 }
1537 
emit_i32_clz(Register dst,Register src)1538 void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
1539   Lzcnt(dst, src);
1540 }
1541 
emit_i32_ctz(Register dst,Register src)1542 void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
1543   Tzcnt(dst, src);
1544 }
1545 
emit_i32_popcnt(Register dst,Register src)1546 bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
1547   if (!CpuFeatures::IsSupported(POPCNT)) return false;
1548   CpuFeatureScope scope(this, POPCNT);
1549   popcnt(dst, src);
1550   return true;
1551 }
1552 
1553 namespace liftoff {
1554 template <void (Assembler::*op)(Register, Register),
1555           void (Assembler::*op_with_carry)(Register, Register)>
OpWithCarry(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1556 inline void OpWithCarry(LiftoffAssembler* assm, LiftoffRegister dst,
1557                         LiftoffRegister lhs, LiftoffRegister rhs) {
1558   // First, compute the low half of the result, potentially into a temporary dst
1559   // register if {dst.low_gp()} equals {rhs.low_gp()} or any register we need to
1560   // keep alive for computing the upper half.
1561   LiftoffRegList keep_alive{lhs.high_gp(), rhs};
1562   Register dst_low = keep_alive.has(dst.low_gp())
1563                          ? assm->GetUnusedRegister(kGpReg, keep_alive).gp()
1564                          : dst.low_gp();
1565 
1566   if (dst_low != lhs.low_gp()) assm->mov(dst_low, lhs.low_gp());
1567   (assm->*op)(dst_low, rhs.low_gp());
1568 
1569   // Now compute the upper half, while keeping alive the previous result.
1570   keep_alive = LiftoffRegList{dst_low, rhs.high_gp()};
1571   Register dst_high = keep_alive.has(dst.high_gp())
1572                           ? assm->GetUnusedRegister(kGpReg, keep_alive).gp()
1573                           : dst.high_gp();
1574 
1575   if (dst_high != lhs.high_gp()) assm->mov(dst_high, lhs.high_gp());
1576   (assm->*op_with_carry)(dst_high, rhs.high_gp());
1577 
1578   // If necessary, move result into the right registers.
1579   LiftoffRegister tmp_result = LiftoffRegister::ForPair(dst_low, dst_high);
1580   if (tmp_result != dst) assm->Move(dst, tmp_result, kI64);
1581 }
1582 
1583 template <void (Assembler::*op)(Register, const Immediate&),
1584           void (Assembler::*op_with_carry)(Register, int32_t)>
OpWithCarryI(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister lhs,int64_t imm)1585 inline void OpWithCarryI(LiftoffAssembler* assm, LiftoffRegister dst,
1586                          LiftoffRegister lhs, int64_t imm) {
1587   // The compiler allocated registers such that either {dst == lhs} or there is
1588   // no overlap between the two.
1589   DCHECK_NE(dst.low_gp(), lhs.high_gp());
1590 
1591   int32_t imm_low_word = static_cast<int32_t>(imm);
1592   int32_t imm_high_word = static_cast<int32_t>(imm >> 32);
1593 
1594   // First, compute the low half of the result.
1595   if (dst.low_gp() != lhs.low_gp()) assm->mov(dst.low_gp(), lhs.low_gp());
1596   (assm->*op)(dst.low_gp(), Immediate(imm_low_word));
1597 
1598   // Now compute the upper half.
1599   if (dst.high_gp() != lhs.high_gp()) assm->mov(dst.high_gp(), lhs.high_gp());
1600   (assm->*op_with_carry)(dst.high_gp(), imm_high_word);
1601 }
1602 }  // namespace liftoff
1603 
emit_i64_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1604 void LiftoffAssembler::emit_i64_add(LiftoffRegister dst, LiftoffRegister lhs,
1605                                     LiftoffRegister rhs) {
1606   liftoff::OpWithCarry<&Assembler::add, &Assembler::adc>(this, dst, lhs, rhs);
1607 }
1608 
emit_i64_addi(LiftoffRegister dst,LiftoffRegister lhs,int64_t imm)1609 void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs,
1610                                      int64_t imm) {
1611   liftoff::OpWithCarryI<&Assembler::add, &Assembler::adc>(this, dst, lhs, imm);
1612 }
1613 
emit_i64_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1614 void LiftoffAssembler::emit_i64_sub(LiftoffRegister dst, LiftoffRegister lhs,
1615                                     LiftoffRegister rhs) {
1616   liftoff::OpWithCarry<&Assembler::sub, &Assembler::sbb>(this, dst, lhs, rhs);
1617 }
1618 
emit_i64_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1619 void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs,
1620                                     LiftoffRegister rhs) {
1621   // Idea:
1622   //        [           lhs_hi  |           lhs_lo  ] * [  rhs_hi  |  rhs_lo  ]
1623   //    =   [  lhs_hi * rhs_lo  |                   ]  (32 bit mul, shift 32)
1624   //      + [  lhs_lo * rhs_hi  |                   ]  (32 bit mul, shift 32)
1625   //      + [             lhs_lo * rhs_lo           ]  (32x32->64 mul, shift 0)
1626 
1627   // For simplicity, we move lhs and rhs into fixed registers.
1628   Register dst_hi = edx;
1629   Register dst_lo = eax;
1630   Register lhs_hi = ecx;
1631   Register lhs_lo = dst_lo;
1632   Register rhs_hi = dst_hi;
1633   Register rhs_lo = esi;
1634 
1635   // Spill all these registers if they are still holding other values.
1636   SpillRegisters(dst_hi, dst_lo, lhs_hi, rhs_lo);
1637 
1638   // Move lhs and rhs into the respective registers.
1639   ParallelRegisterMove({{LiftoffRegister::ForPair(lhs_lo, lhs_hi), lhs, kI64},
1640                         {LiftoffRegister::ForPair(rhs_lo, rhs_hi), rhs, kI64}});
1641 
1642   // First mul: lhs_hi' = lhs_hi * rhs_lo.
1643   imul(lhs_hi, rhs_lo);
1644   // Second mul: rhi_hi' = rhs_hi * lhs_lo.
1645   imul(rhs_hi, lhs_lo);
1646   // Add them: lhs_hi'' = lhs_hi' + rhs_hi' = lhs_hi * rhs_lo + rhs_hi * lhs_lo.
1647   add(lhs_hi, rhs_hi);
1648   // Third mul: edx:eax (dst_hi:dst_lo) = eax * esi (lhs_lo * rhs_lo).
1649   mul(rhs_lo);
1650   // Add lhs_hi'' to dst_hi.
1651   add(dst_hi, lhs_hi);
1652 
1653   // Finally, move back the temporary result to the actual dst register pair.
1654   LiftoffRegister dst_tmp = LiftoffRegister::ForPair(dst_lo, dst_hi);
1655   if (dst != dst_tmp) Move(dst, dst_tmp, kI64);
1656 }
1657 
emit_i64_divs(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1658 bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
1659                                      LiftoffRegister rhs,
1660                                      Label* trap_div_by_zero,
1661                                      Label* trap_div_unrepresentable) {
1662   return false;
1663 }
1664 
emit_i64_divu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1665 bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
1666                                      LiftoffRegister rhs,
1667                                      Label* trap_div_by_zero) {
1668   return false;
1669 }
1670 
emit_i64_rems(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1671 bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
1672                                      LiftoffRegister rhs,
1673                                      Label* trap_div_by_zero) {
1674   return false;
1675 }
1676 
emit_i64_remu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1677 bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
1678                                      LiftoffRegister rhs,
1679                                      Label* trap_div_by_zero) {
1680   return false;
1681 }
1682 
1683 namespace liftoff {
PairContains(LiftoffRegister pair,Register reg)1684 inline bool PairContains(LiftoffRegister pair, Register reg) {
1685   return pair.low_gp() == reg || pair.high_gp() == reg;
1686 }
1687 
ReplaceInPair(LiftoffRegister pair,Register old_reg,Register new_reg)1688 inline LiftoffRegister ReplaceInPair(LiftoffRegister pair, Register old_reg,
1689                                      Register new_reg) {
1690   if (pair.low_gp() == old_reg) {
1691     return LiftoffRegister::ForPair(new_reg, pair.high_gp());
1692   }
1693   if (pair.high_gp() == old_reg) {
1694     return LiftoffRegister::ForPair(pair.low_gp(), new_reg);
1695   }
1696   return pair;
1697 }
1698 
Emit64BitShiftOperation(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister src,Register amount,void (TurboAssembler::* emit_shift)(Register,Register))1699 inline void Emit64BitShiftOperation(
1700     LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister src,
1701     Register amount, void (TurboAssembler::*emit_shift)(Register, Register)) {
1702   // Temporary registers cannot overlap with {dst}.
1703   LiftoffRegList pinned = {dst};
1704 
1705   constexpr size_t kMaxRegMoves = 3;
1706   base::SmallVector<LiftoffAssembler::ParallelRegisterMoveTuple, kMaxRegMoves>
1707       reg_moves;
1708 
1709   // If {dst} contains {ecx}, replace it by an unused register, which is then
1710   // moved to {ecx} in the end.
1711   Register ecx_replace = no_reg;
1712   if (PairContains(dst, ecx)) {
1713     ecx_replace = assm->GetUnusedRegister(kGpReg, pinned).gp();
1714     dst = ReplaceInPair(dst, ecx, ecx_replace);
1715     // If {amount} needs to be moved to {ecx}, but {ecx} is in use (and not part
1716     // of {dst}, hence overwritten anyway), move {ecx} to a tmp register and
1717     // restore it at the end.
1718   } else if (amount != ecx &&
1719              (assm->cache_state()->is_used(LiftoffRegister(ecx)) ||
1720               pinned.has(LiftoffRegister(ecx)))) {
1721     ecx_replace = assm->GetUnusedRegister(kGpReg, pinned).gp();
1722     reg_moves.emplace_back(ecx_replace, ecx, kI32);
1723   }
1724 
1725   reg_moves.emplace_back(dst, src, kI64);
1726   reg_moves.emplace_back(ecx, amount, kI32);
1727   assm->ParallelRegisterMove(base::VectorOf(reg_moves));
1728 
1729   // Do the actual shift.
1730   (assm->*emit_shift)(dst.high_gp(), dst.low_gp());
1731 
1732   // Restore {ecx} if needed.
1733   if (ecx_replace != no_reg) assm->mov(ecx, ecx_replace);
1734 }
1735 }  // namespace liftoff
1736 
emit_i64_shl(LiftoffRegister dst,LiftoffRegister src,Register amount)1737 void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
1738                                     Register amount) {
1739   liftoff::Emit64BitShiftOperation(this, dst, src, amount,
1740                                    &TurboAssembler::ShlPair_cl);
1741 }
1742 
emit_i64_shli(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1743 void LiftoffAssembler::emit_i64_shli(LiftoffRegister dst, LiftoffRegister src,
1744                                      int32_t amount) {
1745   amount &= 63;
1746   if (amount >= 32) {
1747     if (dst.high_gp() != src.low_gp()) mov(dst.high_gp(), src.low_gp());
1748     if (amount != 32) shl(dst.high_gp(), amount - 32);
1749     xor_(dst.low_gp(), dst.low_gp());
1750   } else {
1751     if (dst != src) Move(dst, src, kI64);
1752     ShlPair(dst.high_gp(), dst.low_gp(), amount);
1753   }
1754 }
1755 
emit_i64_sar(LiftoffRegister dst,LiftoffRegister src,Register amount)1756 void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
1757                                     Register amount) {
1758   liftoff::Emit64BitShiftOperation(this, dst, src, amount,
1759                                    &TurboAssembler::SarPair_cl);
1760 }
1761 
emit_i64_sari(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1762 void LiftoffAssembler::emit_i64_sari(LiftoffRegister dst, LiftoffRegister src,
1763                                      int32_t amount) {
1764   amount &= 63;
1765   if (amount >= 32) {
1766     if (dst.low_gp() != src.high_gp()) mov(dst.low_gp(), src.high_gp());
1767     if (dst.high_gp() != src.high_gp()) mov(dst.high_gp(), src.high_gp());
1768     if (amount != 32) sar(dst.low_gp(), amount - 32);
1769     sar(dst.high_gp(), 31);
1770   } else {
1771     if (dst != src) Move(dst, src, kI64);
1772     SarPair(dst.high_gp(), dst.low_gp(), amount);
1773   }
1774 }
emit_i64_shr(LiftoffRegister dst,LiftoffRegister src,Register amount)1775 void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
1776                                     Register amount) {
1777   liftoff::Emit64BitShiftOperation(this, dst, src, amount,
1778                                    &TurboAssembler::ShrPair_cl);
1779 }
1780 
emit_i64_shri(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1781 void LiftoffAssembler::emit_i64_shri(LiftoffRegister dst, LiftoffRegister src,
1782                                      int32_t amount) {
1783   amount &= 63;
1784   if (amount >= 32) {
1785     if (dst.low_gp() != src.high_gp()) mov(dst.low_gp(), src.high_gp());
1786     if (amount != 32) shr(dst.low_gp(), amount - 32);
1787     xor_(dst.high_gp(), dst.high_gp());
1788   } else {
1789     if (dst != src) Move(dst, src, kI64);
1790     ShrPair(dst.high_gp(), dst.low_gp(), amount);
1791   }
1792 }
1793 
emit_i64_clz(LiftoffRegister dst,LiftoffRegister src)1794 void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
1795   // return high == 0 ? 32 + CLZ32(low) : CLZ32(high);
1796   Label done;
1797   Register safe_dst = dst.low_gp();
1798   if (src.low_gp() == safe_dst) safe_dst = dst.high_gp();
1799   if (CpuFeatures::IsSupported(LZCNT)) {
1800     CpuFeatureScope scope(this, LZCNT);
1801     lzcnt(safe_dst, src.high_gp());  // Sets CF if high == 0.
1802     j(not_carry, &done, Label::kNear);
1803     lzcnt(safe_dst, src.low_gp());
1804     add(safe_dst, Immediate(32));  // 32 + CLZ32(low)
1805   } else {
1806     // CLZ32(x) =^ x == 0 ? 32 : 31 - BSR32(x)
1807     Label high_is_zero;
1808     bsr(safe_dst, src.high_gp());  // Sets ZF is high == 0.
1809     j(zero, &high_is_zero, Label::kNear);
1810     xor_(safe_dst, Immediate(31));  // for x in [0..31], 31^x == 31-x.
1811     jmp(&done, Label::kNear);
1812 
1813     bind(&high_is_zero);
1814     Label low_not_zero;
1815     bsr(safe_dst, src.low_gp());
1816     j(not_zero, &low_not_zero, Label::kNear);
1817     mov(safe_dst, Immediate(64 ^ 63));  // 64, after the xor below.
1818     bind(&low_not_zero);
1819     xor_(safe_dst, 63);  // for x in [0..31], 63^x == 63-x.
1820   }
1821 
1822   bind(&done);
1823   if (safe_dst != dst.low_gp()) mov(dst.low_gp(), safe_dst);
1824   xor_(dst.high_gp(), dst.high_gp());  // High word of result is always 0.
1825 }
1826 
emit_i64_ctz(LiftoffRegister dst,LiftoffRegister src)1827 void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
1828   // return low == 0 ? 32 + CTZ32(high) : CTZ32(low);
1829   Label done;
1830   Register safe_dst = dst.low_gp();
1831   if (src.high_gp() == safe_dst) safe_dst = dst.high_gp();
1832   if (CpuFeatures::IsSupported(BMI1)) {
1833     CpuFeatureScope scope(this, BMI1);
1834     tzcnt(safe_dst, src.low_gp());  // Sets CF if low == 0.
1835     j(not_carry, &done, Label::kNear);
1836     tzcnt(safe_dst, src.high_gp());
1837     add(safe_dst, Immediate(32));  // 32 + CTZ32(high)
1838   } else {
1839     // CTZ32(x) =^ x == 0 ? 32 : BSF32(x)
1840     bsf(safe_dst, src.low_gp());  // Sets ZF is low == 0.
1841     j(not_zero, &done, Label::kNear);
1842 
1843     Label high_not_zero;
1844     bsf(safe_dst, src.high_gp());
1845     j(not_zero, &high_not_zero, Label::kNear);
1846     mov(safe_dst, 64);  // low == 0 and high == 0
1847     jmp(&done);
1848     bind(&high_not_zero);
1849     add(safe_dst, Immediate(32));  // 32 + CTZ32(high)
1850   }
1851 
1852   bind(&done);
1853   if (safe_dst != dst.low_gp()) mov(dst.low_gp(), safe_dst);
1854   xor_(dst.high_gp(), dst.high_gp());  // High word of result is always 0.
1855 }
1856 
emit_i64_popcnt(LiftoffRegister dst,LiftoffRegister src)1857 bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
1858                                        LiftoffRegister src) {
1859   if (!CpuFeatures::IsSupported(POPCNT)) return false;
1860   CpuFeatureScope scope(this, POPCNT);
1861   // Produce partial popcnts in the two dst registers.
1862   Register src1 = src.high_gp() == dst.low_gp() ? src.high_gp() : src.low_gp();
1863   Register src2 = src.high_gp() == dst.low_gp() ? src.low_gp() : src.high_gp();
1864   popcnt(dst.low_gp(), src1);
1865   popcnt(dst.high_gp(), src2);
1866   // Add the two into the lower dst reg, clear the higher dst reg.
1867   add(dst.low_gp(), dst.high_gp());
1868   xor_(dst.high_gp(), dst.high_gp());
1869   return true;
1870 }
1871 
IncrementSmi(LiftoffRegister dst,int offset)1872 void LiftoffAssembler::IncrementSmi(LiftoffRegister dst, int offset) {
1873   add(Operand(dst.gp(), offset), Immediate(Smi::FromInt(1)));
1874 }
1875 
emit_f32_add(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1876 void LiftoffAssembler::emit_f32_add(DoubleRegister dst, DoubleRegister lhs,
1877                                     DoubleRegister rhs) {
1878   if (CpuFeatures::IsSupported(AVX)) {
1879     CpuFeatureScope scope(this, AVX);
1880     vaddss(dst, lhs, rhs);
1881   } else if (dst == rhs) {
1882     addss(dst, lhs);
1883   } else {
1884     if (dst != lhs) movss(dst, lhs);
1885     addss(dst, rhs);
1886   }
1887 }
1888 
emit_f32_sub(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1889 void LiftoffAssembler::emit_f32_sub(DoubleRegister dst, DoubleRegister lhs,
1890                                     DoubleRegister rhs) {
1891   if (CpuFeatures::IsSupported(AVX)) {
1892     CpuFeatureScope scope(this, AVX);
1893     vsubss(dst, lhs, rhs);
1894   } else if (dst == rhs) {
1895     movss(liftoff::kScratchDoubleReg, rhs);
1896     movss(dst, lhs);
1897     subss(dst, liftoff::kScratchDoubleReg);
1898   } else {
1899     if (dst != lhs) movss(dst, lhs);
1900     subss(dst, rhs);
1901   }
1902 }
1903 
emit_f32_mul(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1904 void LiftoffAssembler::emit_f32_mul(DoubleRegister dst, DoubleRegister lhs,
1905                                     DoubleRegister rhs) {
1906   if (CpuFeatures::IsSupported(AVX)) {
1907     CpuFeatureScope scope(this, AVX);
1908     vmulss(dst, lhs, rhs);
1909   } else if (dst == rhs) {
1910     mulss(dst, lhs);
1911   } else {
1912     if (dst != lhs) movss(dst, lhs);
1913     mulss(dst, rhs);
1914   }
1915 }
1916 
emit_f32_div(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1917 void LiftoffAssembler::emit_f32_div(DoubleRegister dst, DoubleRegister lhs,
1918                                     DoubleRegister rhs) {
1919   if (CpuFeatures::IsSupported(AVX)) {
1920     CpuFeatureScope scope(this, AVX);
1921     vdivss(dst, lhs, rhs);
1922   } else if (dst == rhs) {
1923     movss(liftoff::kScratchDoubleReg, rhs);
1924     movss(dst, lhs);
1925     divss(dst, liftoff::kScratchDoubleReg);
1926   } else {
1927     if (dst != lhs) movss(dst, lhs);
1928     divss(dst, rhs);
1929   }
1930 }
1931 
1932 namespace liftoff {
1933 enum class MinOrMax : uint8_t { kMin, kMax };
1934 template <typename type>
EmitFloatMinOrMax(LiftoffAssembler * assm,DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs,MinOrMax min_or_max)1935 inline void EmitFloatMinOrMax(LiftoffAssembler* assm, DoubleRegister dst,
1936                               DoubleRegister lhs, DoubleRegister rhs,
1937                               MinOrMax min_or_max) {
1938   Label is_nan;
1939   Label lhs_below_rhs;
1940   Label lhs_above_rhs;
1941   Label done;
1942 
1943   // We need one tmp register to extract the sign bit. Get it right at the
1944   // beginning, such that the spilling code is not accidentially jumped over.
1945   Register tmp = assm->GetUnusedRegister(kGpReg, {}).gp();
1946 
1947 #define dop(name, ...)            \
1948   do {                            \
1949     if (sizeof(type) == 4) {      \
1950       assm->name##s(__VA_ARGS__); \
1951     } else {                      \
1952       assm->name##d(__VA_ARGS__); \
1953     }                             \
1954   } while (false)
1955 
1956   // Check the easy cases first: nan (e.g. unordered), smaller and greater.
1957   // NaN has to be checked first, because PF=1 implies CF=1.
1958   dop(ucomis, lhs, rhs);
1959   assm->j(parity_even, &is_nan, Label::kNear);   // PF=1
1960   assm->j(below, &lhs_below_rhs, Label::kNear);  // CF=1
1961   assm->j(above, &lhs_above_rhs, Label::kNear);  // CF=0 && ZF=0
1962 
1963   // If we get here, then either
1964   // a) {lhs == rhs},
1965   // b) {lhs == -0.0} and {rhs == 0.0}, or
1966   // c) {lhs == 0.0} and {rhs == -0.0}.
1967   // For a), it does not matter whether we return {lhs} or {rhs}. Check the sign
1968   // bit of {rhs} to differentiate b) and c).
1969   dop(movmskp, tmp, rhs);
1970   assm->test(tmp, Immediate(1));
1971   assm->j(zero, &lhs_below_rhs, Label::kNear);
1972   assm->jmp(&lhs_above_rhs, Label::kNear);
1973 
1974   assm->bind(&is_nan);
1975   // Create a NaN output.
1976   dop(xorp, dst, dst);
1977   dop(divs, dst, dst);
1978   assm->jmp(&done, Label::kNear);
1979 
1980   assm->bind(&lhs_below_rhs);
1981   DoubleRegister lhs_below_rhs_src = min_or_max == MinOrMax::kMin ? lhs : rhs;
1982   if (dst != lhs_below_rhs_src) dop(movs, dst, lhs_below_rhs_src);
1983   assm->jmp(&done, Label::kNear);
1984 
1985   assm->bind(&lhs_above_rhs);
1986   DoubleRegister lhs_above_rhs_src = min_or_max == MinOrMax::kMin ? rhs : lhs;
1987   if (dst != lhs_above_rhs_src) dop(movs, dst, lhs_above_rhs_src);
1988 
1989   assm->bind(&done);
1990 }
1991 }  // namespace liftoff
1992 
emit_f32_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1993 void LiftoffAssembler::emit_f32_min(DoubleRegister dst, DoubleRegister lhs,
1994                                     DoubleRegister rhs) {
1995   liftoff::EmitFloatMinOrMax<float>(this, dst, lhs, rhs,
1996                                     liftoff::MinOrMax::kMin);
1997 }
1998 
emit_f32_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1999 void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs,
2000                                     DoubleRegister rhs) {
2001   liftoff::EmitFloatMinOrMax<float>(this, dst, lhs, rhs,
2002                                     liftoff::MinOrMax::kMax);
2003 }
2004 
emit_f32_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)2005 void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
2006                                          DoubleRegister rhs) {
2007   static constexpr int kF32SignBit = 1 << 31;
2008   LiftoffRegList pinned;
2009   Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
2010   Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
2011   Movd(scratch, lhs);                      // move {lhs} into {scratch}.
2012   and_(scratch, Immediate(~kF32SignBit));  // clear sign bit in {scratch}.
2013   Movd(scratch2, rhs);                     // move {rhs} into {scratch2}.
2014   and_(scratch2, Immediate(kF32SignBit));  // isolate sign bit in {scratch2}.
2015   or_(scratch, scratch2);                  // combine {scratch2} into {scratch}.
2016   Movd(dst, scratch);                      // move result into {dst}.
2017 }
2018 
emit_f32_abs(DoubleRegister dst,DoubleRegister src)2019 void LiftoffAssembler::emit_f32_abs(DoubleRegister dst, DoubleRegister src) {
2020   static constexpr uint32_t kSignBit = uint32_t{1} << 31;
2021   if (dst == src) {
2022     TurboAssembler::Move(liftoff::kScratchDoubleReg, kSignBit - 1);
2023     Andps(dst, liftoff::kScratchDoubleReg);
2024   } else {
2025     TurboAssembler::Move(dst, kSignBit - 1);
2026     Andps(dst, src);
2027   }
2028 }
2029 
emit_f32_neg(DoubleRegister dst,DoubleRegister src)2030 void LiftoffAssembler::emit_f32_neg(DoubleRegister dst, DoubleRegister src) {
2031   static constexpr uint32_t kSignBit = uint32_t{1} << 31;
2032   if (dst == src) {
2033     TurboAssembler::Move(liftoff::kScratchDoubleReg, kSignBit);
2034     Xorps(dst, liftoff::kScratchDoubleReg);
2035   } else {
2036     TurboAssembler::Move(dst, kSignBit);
2037     Xorps(dst, src);
2038   }
2039 }
2040 
emit_f32_ceil(DoubleRegister dst,DoubleRegister src)2041 bool LiftoffAssembler::emit_f32_ceil(DoubleRegister dst, DoubleRegister src) {
2042   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2043   roundss(dst, src, kRoundUp);
2044   return true;
2045 }
2046 
emit_f32_floor(DoubleRegister dst,DoubleRegister src)2047 bool LiftoffAssembler::emit_f32_floor(DoubleRegister dst, DoubleRegister src) {
2048   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2049   roundss(dst, src, kRoundDown);
2050   return true;
2051 }
2052 
emit_f32_trunc(DoubleRegister dst,DoubleRegister src)2053 bool LiftoffAssembler::emit_f32_trunc(DoubleRegister dst, DoubleRegister src) {
2054   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2055   roundss(dst, src, kRoundToZero);
2056   return true;
2057 }
2058 
emit_f32_nearest_int(DoubleRegister dst,DoubleRegister src)2059 bool LiftoffAssembler::emit_f32_nearest_int(DoubleRegister dst,
2060                                             DoubleRegister src) {
2061   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2062   roundss(dst, src, kRoundToNearest);
2063   return true;
2064 }
2065 
emit_f32_sqrt(DoubleRegister dst,DoubleRegister src)2066 void LiftoffAssembler::emit_f32_sqrt(DoubleRegister dst, DoubleRegister src) {
2067   Sqrtss(dst, src);
2068 }
2069 
emit_f64_add(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)2070 void LiftoffAssembler::emit_f64_add(DoubleRegister dst, DoubleRegister lhs,
2071                                     DoubleRegister rhs) {
2072   if (CpuFeatures::IsSupported(AVX)) {
2073     CpuFeatureScope scope(this, AVX);
2074     vaddsd(dst, lhs, rhs);
2075   } else if (dst == rhs) {
2076     addsd(dst, lhs);
2077   } else {
2078     if (dst != lhs) movsd(dst, lhs);
2079     addsd(dst, rhs);
2080   }
2081 }
2082 
emit_f64_sub(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)2083 void LiftoffAssembler::emit_f64_sub(DoubleRegister dst, DoubleRegister lhs,
2084                                     DoubleRegister rhs) {
2085   if (CpuFeatures::IsSupported(AVX)) {
2086     CpuFeatureScope scope(this, AVX);
2087     vsubsd(dst, lhs, rhs);
2088   } else if (dst == rhs) {
2089     movsd(liftoff::kScratchDoubleReg, rhs);
2090     movsd(dst, lhs);
2091     subsd(dst, liftoff::kScratchDoubleReg);
2092   } else {
2093     if (dst != lhs) movsd(dst, lhs);
2094     subsd(dst, rhs);
2095   }
2096 }
2097 
emit_f64_mul(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)2098 void LiftoffAssembler::emit_f64_mul(DoubleRegister dst, DoubleRegister lhs,
2099                                     DoubleRegister rhs) {
2100   if (CpuFeatures::IsSupported(AVX)) {
2101     CpuFeatureScope scope(this, AVX);
2102     vmulsd(dst, lhs, rhs);
2103   } else if (dst == rhs) {
2104     mulsd(dst, lhs);
2105   } else {
2106     if (dst != lhs) movsd(dst, lhs);
2107     mulsd(dst, rhs);
2108   }
2109 }
2110 
emit_f64_div(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)2111 void LiftoffAssembler::emit_f64_div(DoubleRegister dst, DoubleRegister lhs,
2112                                     DoubleRegister rhs) {
2113   if (CpuFeatures::IsSupported(AVX)) {
2114     CpuFeatureScope scope(this, AVX);
2115     vdivsd(dst, lhs, rhs);
2116   } else if (dst == rhs) {
2117     movsd(liftoff::kScratchDoubleReg, rhs);
2118     movsd(dst, lhs);
2119     divsd(dst, liftoff::kScratchDoubleReg);
2120   } else {
2121     if (dst != lhs) movsd(dst, lhs);
2122     divsd(dst, rhs);
2123   }
2124 }
2125 
emit_f64_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)2126 void LiftoffAssembler::emit_f64_min(DoubleRegister dst, DoubleRegister lhs,
2127                                     DoubleRegister rhs) {
2128   liftoff::EmitFloatMinOrMax<double>(this, dst, lhs, rhs,
2129                                      liftoff::MinOrMax::kMin);
2130 }
2131 
emit_f64_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)2132 void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
2133                                          DoubleRegister rhs) {
2134   static constexpr int kF32SignBit = 1 << 31;
2135   // On ia32, we cannot hold the whole f64 value in a gp register, so we just
2136   // operate on the upper half (UH).
2137   LiftoffRegList pinned;
2138   Register scratch = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
2139   Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
2140 
2141   Pextrd(scratch, lhs, 1);                 // move UH of {lhs} into {scratch}.
2142   and_(scratch, Immediate(~kF32SignBit));  // clear sign bit in {scratch}.
2143   Pextrd(scratch2, rhs, 1);                // move UH of {rhs} into {scratch2}.
2144   and_(scratch2, Immediate(kF32SignBit));  // isolate sign bit in {scratch2}.
2145   or_(scratch, scratch2);                  // combine {scratch2} into {scratch}.
2146   movsd(dst, lhs);                         // move {lhs} into {dst}.
2147   Pinsrd(dst, scratch, 1);                 // insert {scratch} into UH of {dst}.
2148 }
2149 
emit_f64_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)2150 void LiftoffAssembler::emit_f64_max(DoubleRegister dst, DoubleRegister lhs,
2151                                     DoubleRegister rhs) {
2152   liftoff::EmitFloatMinOrMax<double>(this, dst, lhs, rhs,
2153                                      liftoff::MinOrMax::kMax);
2154 }
2155 
emit_f64_abs(DoubleRegister dst,DoubleRegister src)2156 void LiftoffAssembler::emit_f64_abs(DoubleRegister dst, DoubleRegister src) {
2157   static constexpr uint64_t kSignBit = uint64_t{1} << 63;
2158   if (dst == src) {
2159     TurboAssembler::Move(liftoff::kScratchDoubleReg, kSignBit - 1);
2160     Andpd(dst, liftoff::kScratchDoubleReg);
2161   } else {
2162     TurboAssembler::Move(dst, kSignBit - 1);
2163     Andpd(dst, src);
2164   }
2165 }
2166 
emit_f64_neg(DoubleRegister dst,DoubleRegister src)2167 void LiftoffAssembler::emit_f64_neg(DoubleRegister dst, DoubleRegister src) {
2168   static constexpr uint64_t kSignBit = uint64_t{1} << 63;
2169   if (dst == src) {
2170     TurboAssembler::Move(liftoff::kScratchDoubleReg, kSignBit);
2171     Xorpd(dst, liftoff::kScratchDoubleReg);
2172   } else {
2173     TurboAssembler::Move(dst, kSignBit);
2174     Xorpd(dst, src);
2175   }
2176 }
2177 
emit_f64_ceil(DoubleRegister dst,DoubleRegister src)2178 bool LiftoffAssembler::emit_f64_ceil(DoubleRegister dst, DoubleRegister src) {
2179   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2180   roundsd(dst, src, kRoundUp);
2181   return true;
2182 }
2183 
emit_f64_floor(DoubleRegister dst,DoubleRegister src)2184 bool LiftoffAssembler::emit_f64_floor(DoubleRegister dst, DoubleRegister src) {
2185   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2186   roundsd(dst, src, kRoundDown);
2187   return true;
2188 }
2189 
emit_f64_trunc(DoubleRegister dst,DoubleRegister src)2190 bool LiftoffAssembler::emit_f64_trunc(DoubleRegister dst, DoubleRegister src) {
2191   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2192   roundsd(dst, src, kRoundToZero);
2193   return true;
2194 }
2195 
emit_f64_nearest_int(DoubleRegister dst,DoubleRegister src)2196 bool LiftoffAssembler::emit_f64_nearest_int(DoubleRegister dst,
2197                                             DoubleRegister src) {
2198   RETURN_FALSE_IF_MISSING_CPU_FEATURE(SSE4_1);
2199   roundsd(dst, src, kRoundToNearest);
2200   return true;
2201 }
2202 
emit_f64_sqrt(DoubleRegister dst,DoubleRegister src)2203 void LiftoffAssembler::emit_f64_sqrt(DoubleRegister dst, DoubleRegister src) {
2204   Sqrtsd(dst, src);
2205 }
2206 
2207 namespace liftoff {
2208 #define __ assm->
2209 // Used for float to int conversions. If the value in {converted_back} equals
2210 // {src} afterwards, the conversion succeeded.
2211 template <typename dst_type, typename src_type>
ConvertFloatToIntAndBack(LiftoffAssembler * assm,Register dst,DoubleRegister src,DoubleRegister converted_back,LiftoffRegList pinned)2212 inline void ConvertFloatToIntAndBack(LiftoffAssembler* assm, Register dst,
2213                                      DoubleRegister src,
2214                                      DoubleRegister converted_back,
2215                                      LiftoffRegList pinned) {
2216   if (std::is_same<double, src_type>::value) {  // f64
2217     if (std::is_signed<dst_type>::value) {      // f64 -> i32
2218       __ cvttsd2si(dst, src);
2219       __ Cvtsi2sd(converted_back, dst);
2220     } else {  // f64 -> u32
2221       __ Cvttsd2ui(dst, src, liftoff::kScratchDoubleReg);
2222       __ Cvtui2sd(converted_back, dst,
2223                   __ GetUnusedRegister(kGpReg, pinned).gp());
2224     }
2225   } else {                                  // f32
2226     if (std::is_signed<dst_type>::value) {  // f32 -> i32
2227       __ cvttss2si(dst, src);
2228       __ Cvtsi2ss(converted_back, dst);
2229     } else {  // f32 -> u32
2230       __ Cvttss2ui(dst, src, liftoff::kScratchDoubleReg);
2231       __ Cvtui2ss(converted_back, dst,
2232                   __ GetUnusedRegister(kGpReg, pinned).gp());
2233     }
2234   }
2235 }
2236 
2237 template <typename dst_type, typename src_type>
EmitTruncateFloatToInt(LiftoffAssembler * assm,Register dst,DoubleRegister src,Label * trap)2238 inline bool EmitTruncateFloatToInt(LiftoffAssembler* assm, Register dst,
2239                                    DoubleRegister src, Label* trap) {
2240   if (!CpuFeatures::IsSupported(SSE4_1)) {
2241     __ bailout(kMissingCPUFeature, "no SSE4.1");
2242     return true;
2243   }
2244   CpuFeatureScope feature(assm, SSE4_1);
2245 
2246   LiftoffRegList pinned = {src, dst};
2247   DoubleRegister rounded =
2248       pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
2249   DoubleRegister converted_back =
2250       pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
2251 
2252   if (std::is_same<double, src_type>::value) {  // f64
2253     __ roundsd(rounded, src, kRoundToZero);
2254   } else {  // f32
2255     __ roundss(rounded, src, kRoundToZero);
2256   }
2257   ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
2258                                                converted_back, pinned);
2259   if (std::is_same<double, src_type>::value) {  // f64
2260     __ ucomisd(converted_back, rounded);
2261   } else {  // f32
2262     __ ucomiss(converted_back, rounded);
2263   }
2264 
2265   // Jump to trap if PF is 0 (one of the operands was NaN) or they are not
2266   // equal.
2267   __ j(parity_even, trap);
2268   __ j(not_equal, trap);
2269   return true;
2270 }
2271 
2272 template <typename dst_type, typename src_type>
EmitSatTruncateFloatToInt(LiftoffAssembler * assm,Register dst,DoubleRegister src)2273 inline bool EmitSatTruncateFloatToInt(LiftoffAssembler* assm, Register dst,
2274                                       DoubleRegister src) {
2275   if (!CpuFeatures::IsSupported(SSE4_1)) {
2276     __ bailout(kMissingCPUFeature, "no SSE4.1");
2277     return true;
2278   }
2279   CpuFeatureScope feature(assm, SSE4_1);
2280 
2281   Label done;
2282   Label not_nan;
2283   Label src_positive;
2284 
2285   LiftoffRegList pinned = {src, dst};
2286   DoubleRegister rounded =
2287       pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
2288   DoubleRegister converted_back =
2289       pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
2290   DoubleRegister zero_reg =
2291       pinned.set(__ GetUnusedRegister(kFpReg, pinned)).fp();
2292 
2293   if (std::is_same<double, src_type>::value) {  // f64
2294     __ roundsd(rounded, src, kRoundToZero);
2295   } else {  // f32
2296     __ roundss(rounded, src, kRoundToZero);
2297   }
2298 
2299   ConvertFloatToIntAndBack<dst_type, src_type>(assm, dst, rounded,
2300                                                converted_back, pinned);
2301   if (std::is_same<double, src_type>::value) {  // f64
2302     __ ucomisd(converted_back, rounded);
2303   } else {  // f32
2304     __ ucomiss(converted_back, rounded);
2305   }
2306 
2307   // Return 0 if PF is 0 (one of the operands was NaN)
2308   __ j(parity_odd, &not_nan);
2309   __ xor_(dst, dst);
2310   __ jmp(&done);
2311 
2312   __ bind(&not_nan);
2313   // If rounding is as expected, return result
2314   __ j(equal, &done);
2315 
2316   __ Xorpd(zero_reg, zero_reg);
2317 
2318   // if out-of-bounds, check if src is positive
2319   if (std::is_same<double, src_type>::value) {  // f64
2320     __ ucomisd(src, zero_reg);
2321   } else {  // f32
2322     __ ucomiss(src, zero_reg);
2323   }
2324   __ j(above, &src_positive);
2325   __ mov(dst, Immediate(std::numeric_limits<dst_type>::min()));
2326   __ jmp(&done);
2327 
2328   __ bind(&src_positive);
2329 
2330   __ mov(dst, Immediate(std::numeric_limits<dst_type>::max()));
2331 
2332   __ bind(&done);
2333   return true;
2334 }
2335 #undef __
2336 }  // namespace liftoff
2337 
emit_type_conversion(WasmOpcode opcode,LiftoffRegister dst,LiftoffRegister src,Label * trap)2338 bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
2339                                             LiftoffRegister dst,
2340                                             LiftoffRegister src, Label* trap) {
2341   switch (opcode) {
2342     case kExprI32ConvertI64:
2343       if (dst.gp() != src.low_gp()) mov(dst.gp(), src.low_gp());
2344       return true;
2345     case kExprI32SConvertF32:
2346       return liftoff::EmitTruncateFloatToInt<int32_t, float>(this, dst.gp(),
2347                                                              src.fp(), trap);
2348     case kExprI32UConvertF32:
2349       return liftoff::EmitTruncateFloatToInt<uint32_t, float>(this, dst.gp(),
2350                                                               src.fp(), trap);
2351     case kExprI32SConvertF64:
2352       return liftoff::EmitTruncateFloatToInt<int32_t, double>(this, dst.gp(),
2353                                                               src.fp(), trap);
2354     case kExprI32UConvertF64:
2355       return liftoff::EmitTruncateFloatToInt<uint32_t, double>(this, dst.gp(),
2356                                                                src.fp(), trap);
2357     case kExprI32SConvertSatF32:
2358       return liftoff::EmitSatTruncateFloatToInt<int32_t, float>(this, dst.gp(),
2359                                                                 src.fp());
2360     case kExprI32UConvertSatF32:
2361       return liftoff::EmitSatTruncateFloatToInt<uint32_t, float>(this, dst.gp(),
2362                                                                  src.fp());
2363     case kExprI32SConvertSatF64:
2364       return liftoff::EmitSatTruncateFloatToInt<int32_t, double>(this, dst.gp(),
2365                                                                  src.fp());
2366     case kExprI32UConvertSatF64:
2367       return liftoff::EmitSatTruncateFloatToInt<uint32_t, double>(
2368           this, dst.gp(), src.fp());
2369     case kExprI32ReinterpretF32:
2370       Movd(dst.gp(), src.fp());
2371       return true;
2372     case kExprI64SConvertI32:
2373       if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp());
2374       if (dst.high_gp() != src.gp()) mov(dst.high_gp(), src.gp());
2375       sar(dst.high_gp(), 31);
2376       return true;
2377     case kExprI64UConvertI32:
2378       if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp());
2379       xor_(dst.high_gp(), dst.high_gp());
2380       return true;
2381     case kExprI64ReinterpretF64:
2382       // Push src to the stack.
2383       AllocateStackSpace(8);
2384       movsd(Operand(esp, 0), src.fp());
2385       // Pop to dst.
2386       pop(dst.low_gp());
2387       pop(dst.high_gp());
2388       return true;
2389     case kExprF32SConvertI32:
2390       cvtsi2ss(dst.fp(), src.gp());
2391       return true;
2392     case kExprF32UConvertI32: {
2393       LiftoffRegList pinned = {dst, src};
2394       Register scratch = GetUnusedRegister(kGpReg, pinned).gp();
2395       Cvtui2ss(dst.fp(), src.gp(), scratch);
2396       return true;
2397     }
2398     case kExprF32ConvertF64:
2399       cvtsd2ss(dst.fp(), src.fp());
2400       return true;
2401     case kExprF32ReinterpretI32:
2402       Movd(dst.fp(), src.gp());
2403       return true;
2404     case kExprF64SConvertI32:
2405       Cvtsi2sd(dst.fp(), src.gp());
2406       return true;
2407     case kExprF64UConvertI32: {
2408       LiftoffRegList pinned = {dst, src};
2409       Register scratch = GetUnusedRegister(kGpReg, pinned).gp();
2410       Cvtui2sd(dst.fp(), src.gp(), scratch);
2411       return true;
2412     }
2413     case kExprF64ConvertF32:
2414       cvtss2sd(dst.fp(), src.fp());
2415       return true;
2416     case kExprF64ReinterpretI64:
2417       // Push src to the stack.
2418       push(src.high_gp());
2419       push(src.low_gp());
2420       // Pop to dst.
2421       movsd(dst.fp(), Operand(esp, 0));
2422       add(esp, Immediate(8));
2423       return true;
2424     default:
2425       return false;
2426   }
2427 }
2428 
emit_i32_signextend_i8(Register dst,Register src)2429 void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
2430   Register byte_reg = liftoff::GetTmpByteRegister(this, src);
2431   if (byte_reg != src) mov(byte_reg, src);
2432   movsx_b(dst, byte_reg);
2433 }
2434 
emit_i32_signextend_i16(Register dst,Register src)2435 void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
2436   movsx_w(dst, src);
2437 }
2438 
emit_i64_signextend_i8(LiftoffRegister dst,LiftoffRegister src)2439 void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
2440                                               LiftoffRegister src) {
2441   Register byte_reg = liftoff::GetTmpByteRegister(this, src.low_gp());
2442   if (byte_reg != src.low_gp()) mov(byte_reg, src.low_gp());
2443   movsx_b(dst.low_gp(), byte_reg);
2444   liftoff::SignExtendI32ToI64(this, dst);
2445 }
2446 
emit_i64_signextend_i16(LiftoffRegister dst,LiftoffRegister src)2447 void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
2448                                                LiftoffRegister src) {
2449   movsx_w(dst.low_gp(), src.low_gp());
2450   liftoff::SignExtendI32ToI64(this, dst);
2451 }
2452 
emit_i64_signextend_i32(LiftoffRegister dst,LiftoffRegister src)2453 void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
2454                                                LiftoffRegister src) {
2455   if (dst.low_gp() != src.low_gp()) mov(dst.low_gp(), src.low_gp());
2456   liftoff::SignExtendI32ToI64(this, dst);
2457 }
2458 
emit_jump(Label * label)2459 void LiftoffAssembler::emit_jump(Label* label) { jmp(label); }
2460 
emit_jump(Register target)2461 void LiftoffAssembler::emit_jump(Register target) { jmp(target); }
2462 
emit_cond_jump(LiftoffCondition liftoff_cond,Label * label,ValueKind kind,Register lhs,Register rhs)2463 void LiftoffAssembler::emit_cond_jump(LiftoffCondition liftoff_cond,
2464                                       Label* label, ValueKind kind,
2465                                       Register lhs, Register rhs) {
2466   Condition cond = liftoff::ToCondition(liftoff_cond);
2467   if (rhs != no_reg) {
2468     switch (kind) {
2469       case kRef:
2470       case kOptRef:
2471       case kRtt:
2472         DCHECK(liftoff_cond == kEqual || liftoff_cond == kUnequal);
2473         V8_FALLTHROUGH;
2474       case kI32:
2475         cmp(lhs, rhs);
2476         break;
2477       default:
2478         UNREACHABLE();
2479     }
2480   } else {
2481     DCHECK_EQ(kind, kI32);
2482     test(lhs, lhs);
2483   }
2484 
2485   j(cond, label);
2486 }
2487 
emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,Label * label,Register lhs,int imm)2488 void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,
2489                                            Label* label, Register lhs,
2490                                            int imm) {
2491   Condition cond = liftoff::ToCondition(liftoff_cond);
2492   cmp(lhs, Immediate(imm));
2493   j(cond, label);
2494 }
2495 
emit_i32_subi_jump_negative(Register value,int subtrahend,Label * result_negative)2496 void LiftoffAssembler::emit_i32_subi_jump_negative(Register value,
2497                                                    int subtrahend,
2498                                                    Label* result_negative) {
2499   sub(value, Immediate(subtrahend));
2500   j(negative, result_negative);
2501 }
2502 
2503 namespace liftoff {
2504 
2505 // Setcc into dst register, given a scratch byte register (might be the same as
2506 // dst). Never spills.
setcc_32_no_spill(LiftoffAssembler * assm,Condition cond,Register dst,Register tmp_byte_reg)2507 inline void setcc_32_no_spill(LiftoffAssembler* assm, Condition cond,
2508                               Register dst, Register tmp_byte_reg) {
2509   assm->setcc(cond, tmp_byte_reg);
2510   assm->movzx_b(dst, tmp_byte_reg);
2511 }
2512 
2513 // Setcc into dst register (no contraints). Might spill.
setcc_32(LiftoffAssembler * assm,Condition cond,Register dst)2514 inline void setcc_32(LiftoffAssembler* assm, Condition cond, Register dst) {
2515   Register tmp_byte_reg = GetTmpByteRegister(assm, dst);
2516   setcc_32_no_spill(assm, cond, dst, tmp_byte_reg);
2517 }
2518 
2519 }  // namespace liftoff
2520 
emit_i32_eqz(Register dst,Register src)2521 void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
2522   test(src, src);
2523   liftoff::setcc_32(this, equal, dst);
2524 }
2525 
emit_i32_set_cond(LiftoffCondition liftoff_cond,Register dst,Register lhs,Register rhs)2526 void LiftoffAssembler::emit_i32_set_cond(LiftoffCondition liftoff_cond,
2527                                          Register dst, Register lhs,
2528                                          Register rhs) {
2529   Condition cond = liftoff::ToCondition(liftoff_cond);
2530   cmp(lhs, rhs);
2531   liftoff::setcc_32(this, cond, dst);
2532 }
2533 
emit_i64_eqz(Register dst,LiftoffRegister src)2534 void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
2535   // Compute the OR of both registers in the src pair, using dst as scratch
2536   // register. Then check whether the result is equal to zero.
2537   if (src.low_gp() == dst) {
2538     or_(dst, src.high_gp());
2539   } else {
2540     if (src.high_gp() != dst) mov(dst, src.high_gp());
2541     or_(dst, src.low_gp());
2542   }
2543   liftoff::setcc_32(this, equal, dst);
2544 }
2545 
2546 namespace liftoff {
cond_make_unsigned(LiftoffCondition cond)2547 inline LiftoffCondition cond_make_unsigned(LiftoffCondition cond) {
2548   switch (cond) {
2549     case kSignedLessThan:
2550       return kUnsignedLessThan;
2551     case kSignedLessEqual:
2552       return kUnsignedLessEqual;
2553     case kSignedGreaterThan:
2554       return kUnsignedGreaterThan;
2555     case kSignedGreaterEqual:
2556       return kUnsignedGreaterEqual;
2557     default:
2558       return cond;
2559   }
2560 }
2561 }  // namespace liftoff
2562 
emit_i64_set_cond(LiftoffCondition liftoff_cond,Register dst,LiftoffRegister lhs,LiftoffRegister rhs)2563 void LiftoffAssembler::emit_i64_set_cond(LiftoffCondition liftoff_cond,
2564                                          Register dst, LiftoffRegister lhs,
2565                                          LiftoffRegister rhs) {
2566   Condition cond = liftoff::ToCondition(liftoff_cond);
2567   Condition unsigned_cond =
2568       liftoff::ToCondition(liftoff::cond_make_unsigned(liftoff_cond));
2569 
2570   // Get the tmp byte register out here, such that we don't conditionally spill
2571   // (this cannot be reflected in the cache state).
2572   Register tmp_byte_reg = liftoff::GetTmpByteRegister(this, dst);
2573 
2574   // For signed i64 comparisons, we still need to use unsigned comparison for
2575   // the low word (the only bit carrying signedness information is the MSB in
2576   // the high word).
2577   Label setcc;
2578   Label cont;
2579   // Compare high word first. If it differs, use if for the setcc. If it's
2580   // equal, compare the low word and use that for setcc.
2581   cmp(lhs.high_gp(), rhs.high_gp());
2582   j(not_equal, &setcc, Label::kNear);
2583   cmp(lhs.low_gp(), rhs.low_gp());
2584   if (unsigned_cond != cond) {
2585     // If the condition predicate for the low differs from that for the high
2586     // word, emit a separete setcc sequence for the low word.
2587     liftoff::setcc_32_no_spill(this, unsigned_cond, dst, tmp_byte_reg);
2588     jmp(&cont);
2589   }
2590   bind(&setcc);
2591   liftoff::setcc_32_no_spill(this, cond, dst, tmp_byte_reg);
2592   bind(&cont);
2593 }
2594 
2595 namespace liftoff {
2596 template <void (Assembler::*cmp_op)(DoubleRegister, DoubleRegister)>
EmitFloatSetCond(LiftoffAssembler * assm,Condition cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2597 void EmitFloatSetCond(LiftoffAssembler* assm, Condition cond, Register dst,
2598                       DoubleRegister lhs, DoubleRegister rhs) {
2599   Label cont;
2600   Label not_nan;
2601 
2602   // Get the tmp byte register out here, such that we don't conditionally spill
2603   // (this cannot be reflected in the cache state).
2604   Register tmp_byte_reg = GetTmpByteRegister(assm, dst);
2605 
2606   (assm->*cmp_op)(lhs, rhs);
2607   // If PF is one, one of the operands was Nan. This needs special handling.
2608   assm->j(parity_odd, &not_nan, Label::kNear);
2609   // Return 1 for f32.ne, 0 for all other cases.
2610   if (cond == not_equal) {
2611     assm->mov(dst, Immediate(1));
2612   } else {
2613     assm->xor_(dst, dst);
2614   }
2615   assm->jmp(&cont, Label::kNear);
2616   assm->bind(&not_nan);
2617 
2618   setcc_32_no_spill(assm, cond, dst, tmp_byte_reg);
2619   assm->bind(&cont);
2620 }
2621 }  // namespace liftoff
2622 
emit_f32_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2623 void LiftoffAssembler::emit_f32_set_cond(LiftoffCondition liftoff_cond,
2624                                          Register dst, DoubleRegister lhs,
2625                                          DoubleRegister rhs) {
2626   Condition cond = liftoff::ToCondition(liftoff_cond);
2627   liftoff::EmitFloatSetCond<&Assembler::ucomiss>(this, cond, dst, lhs, rhs);
2628 }
2629 
emit_f64_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2630 void LiftoffAssembler::emit_f64_set_cond(LiftoffCondition liftoff_cond,
2631                                          Register dst, DoubleRegister lhs,
2632                                          DoubleRegister rhs) {
2633   Condition cond = liftoff::ToCondition(liftoff_cond);
2634   liftoff::EmitFloatSetCond<&Assembler::ucomisd>(this, cond, dst, lhs, rhs);
2635 }
2636 
emit_select(LiftoffRegister dst,Register condition,LiftoffRegister true_value,LiftoffRegister false_value,ValueKind kind)2637 bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
2638                                    LiftoffRegister true_value,
2639                                    LiftoffRegister false_value,
2640                                    ValueKind kind) {
2641   return false;
2642 }
2643 
emit_smi_check(Register obj,Label * target,SmiCheckMode mode)2644 void LiftoffAssembler::emit_smi_check(Register obj, Label* target,
2645                                       SmiCheckMode mode) {
2646   test_b(obj, Immediate(kSmiTagMask));
2647   Condition condition = mode == kJumpOnSmi ? zero : not_zero;
2648   j(condition, target);
2649 }
2650 
2651 namespace liftoff {
2652 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2653           void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
2654 void EmitSimdCommutativeBinOp(
2655     LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs,
2656     LiftoffRegister rhs, base::Optional<CpuFeature> feature = base::nullopt) {
2657   if (CpuFeatures::IsSupported(AVX)) {
2658     CpuFeatureScope scope(assm, AVX);
2659     (assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
2660     return;
2661   }
2662 
2663   base::Optional<CpuFeatureScope> sse_scope;
2664   if (feature.has_value()) sse_scope.emplace(assm, *feature);
2665 
2666   if (dst.fp() == rhs.fp()) {
2667     (assm->*sse_op)(dst.fp(), lhs.fp());
2668   } else {
2669     if (dst.fp() != lhs.fp()) (assm->movaps)(dst.fp(), lhs.fp());
2670     (assm->*sse_op)(dst.fp(), rhs.fp());
2671   }
2672 }
2673 
2674 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2675           void (Assembler::*sse_op)(XMMRegister, XMMRegister)>
2676 void EmitSimdNonCommutativeBinOp(
2677     LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs,
2678     LiftoffRegister rhs, base::Optional<CpuFeature> feature = base::nullopt) {
2679   if (CpuFeatures::IsSupported(AVX)) {
2680     CpuFeatureScope scope(assm, AVX);
2681     (assm->*avx_op)(dst.fp(), lhs.fp(), rhs.fp());
2682     return;
2683   }
2684 
2685   base::Optional<CpuFeatureScope> sse_scope;
2686   if (feature.has_value()) sse_scope.emplace(assm, *feature);
2687 
2688   if (dst.fp() == rhs.fp()) {
2689     assm->movaps(kScratchDoubleReg, rhs.fp());
2690     assm->movaps(dst.fp(), lhs.fp());
2691     (assm->*sse_op)(dst.fp(), kScratchDoubleReg);
2692   } else {
2693     if (dst.fp() != lhs.fp()) assm->movaps(dst.fp(), lhs.fp());
2694     (assm->*sse_op)(dst.fp(), rhs.fp());
2695   }
2696 }
2697 
2698 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, XMMRegister),
2699           void (Assembler::*sse_op)(XMMRegister, XMMRegister), uint8_t width>
EmitSimdShiftOp(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister operand,LiftoffRegister count)2700 void EmitSimdShiftOp(LiftoffAssembler* assm, LiftoffRegister dst,
2701                      LiftoffRegister operand, LiftoffRegister count) {
2702   static constexpr RegClass tmp_rc = reg_class_for(kI32);
2703   LiftoffRegister tmp = assm->GetUnusedRegister(tmp_rc, LiftoffRegList{count});
2704   constexpr int mask = (1 << width) - 1;
2705 
2706   assm->mov(tmp.gp(), count.gp());
2707   assm->and_(tmp.gp(), Immediate(mask));
2708   assm->Movd(kScratchDoubleReg, tmp.gp());
2709   if (CpuFeatures::IsSupported(AVX)) {
2710     CpuFeatureScope scope(assm, AVX);
2711     (assm->*avx_op)(dst.fp(), operand.fp(), kScratchDoubleReg);
2712   } else {
2713     if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
2714     (assm->*sse_op)(dst.fp(), kScratchDoubleReg);
2715   }
2716 }
2717 
2718 template <void (Assembler::*avx_op)(XMMRegister, XMMRegister, byte),
2719           void (Assembler::*sse_op)(XMMRegister, byte), uint8_t width>
EmitSimdShiftOpImm(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister operand,int32_t count)2720 void EmitSimdShiftOpImm(LiftoffAssembler* assm, LiftoffRegister dst,
2721                         LiftoffRegister operand, int32_t count) {
2722   constexpr int mask = (1 << width) - 1;
2723   byte shift = static_cast<byte>(count & mask);
2724   if (CpuFeatures::IsSupported(AVX)) {
2725     CpuFeatureScope scope(assm, AVX);
2726     (assm->*avx_op)(dst.fp(), operand.fp(), shift);
2727   } else {
2728     if (dst.fp() != operand.fp()) assm->movaps(dst.fp(), operand.fp());
2729     (assm->*sse_op)(dst.fp(), shift);
2730   }
2731 }
2732 
EmitAnyTrue(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister src)2733 inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
2734                         LiftoffRegister src) {
2735   Register tmp = assm->GetUnusedRegister(kGpReg, LiftoffRegList{dst}).gp();
2736   assm->xor_(tmp, tmp);
2737   assm->mov(dst.gp(), Immediate(1));
2738   assm->Ptest(src.fp(), src.fp());
2739   assm->cmov(zero, dst.gp(), tmp);
2740 }
2741 
2742 template <void (SharedTurboAssembler::*pcmp)(XMMRegister, XMMRegister)>
2743 inline void EmitAllTrue(LiftoffAssembler* assm, LiftoffRegister dst,
2744                         LiftoffRegister src,
2745                         base::Optional<CpuFeature> feature = base::nullopt) {
2746   base::Optional<CpuFeatureScope> sse_scope;
2747   if (feature.has_value()) sse_scope.emplace(assm, *feature);
2748 
2749   Register tmp = assm->GetUnusedRegister(kGpReg, LiftoffRegList{dst}).gp();
2750   XMMRegister tmp_simd = liftoff::kScratchDoubleReg;
2751   assm->mov(tmp, Immediate(1));
2752   assm->xor_(dst.gp(), dst.gp());
2753   assm->Pxor(tmp_simd, tmp_simd);
2754   (assm->*pcmp)(tmp_simd, src.fp());
2755   assm->Ptest(tmp_simd, tmp_simd);
2756   assm->cmov(zero, dst.gp(), tmp);
2757 }
2758 
2759 }  // namespace liftoff
2760 
LoadTransform(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LoadTransformationKind transform,uint32_t * protected_load_pc)2761 void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
2762                                      Register offset_reg, uintptr_t offset_imm,
2763                                      LoadType type,
2764                                      LoadTransformationKind transform,
2765                                      uint32_t* protected_load_pc) {
2766   DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
2767   Operand src_op{src_addr, offset_reg, times_1,
2768                  static_cast<int32_t>(offset_imm)};
2769   *protected_load_pc = pc_offset();
2770 
2771   MachineType memtype = type.mem_type();
2772   if (transform == LoadTransformationKind::kExtend) {
2773     if (memtype == MachineType::Int8()) {
2774       Pmovsxbw(dst.fp(), src_op);
2775     } else if (memtype == MachineType::Uint8()) {
2776       Pmovzxbw(dst.fp(), src_op);
2777     } else if (memtype == MachineType::Int16()) {
2778       Pmovsxwd(dst.fp(), src_op);
2779     } else if (memtype == MachineType::Uint16()) {
2780       Pmovzxwd(dst.fp(), src_op);
2781     } else if (memtype == MachineType::Int32()) {
2782       Pmovsxdq(dst.fp(), src_op);
2783     } else if (memtype == MachineType::Uint32()) {
2784       Pmovzxdq(dst.fp(), src_op);
2785     }
2786   } else if (transform == LoadTransformationKind::kZeroExtend) {
2787     if (memtype == MachineType::Int32()) {
2788       Movss(dst.fp(), src_op);
2789     } else {
2790       DCHECK_EQ(MachineType::Int64(), memtype);
2791       Movsd(dst.fp(), src_op);
2792     }
2793   } else {
2794     DCHECK_EQ(LoadTransformationKind::kSplat, transform);
2795     if (memtype == MachineType::Int8()) {
2796       S128Load8Splat(dst.fp(), src_op, liftoff::kScratchDoubleReg);
2797     } else if (memtype == MachineType::Int16()) {
2798       S128Load16Splat(dst.fp(), src_op, liftoff::kScratchDoubleReg);
2799     } else if (memtype == MachineType::Int32()) {
2800       S128Load32Splat(dst.fp(), src_op);
2801     } else if (memtype == MachineType::Int64()) {
2802       Movddup(dst.fp(), src_op);
2803     }
2804   }
2805 }
2806 
LoadLane(LiftoffRegister dst,LiftoffRegister src,Register addr,Register offset_reg,uintptr_t offset_imm,LoadType type,uint8_t laneidx,uint32_t * protected_load_pc)2807 void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
2808                                 Register addr, Register offset_reg,
2809                                 uintptr_t offset_imm, LoadType type,
2810                                 uint8_t laneidx, uint32_t* protected_load_pc) {
2811   DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
2812   Operand src_op{addr, offset_reg, times_1, static_cast<int32_t>(offset_imm)};
2813   *protected_load_pc = pc_offset();
2814 
2815   MachineType mem_type = type.mem_type();
2816   if (mem_type == MachineType::Int8()) {
2817     Pinsrb(dst.fp(), src.fp(), src_op, laneidx);
2818   } else if (mem_type == MachineType::Int16()) {
2819     Pinsrw(dst.fp(), src.fp(), src_op, laneidx);
2820   } else if (mem_type == MachineType::Int32()) {
2821     Pinsrd(dst.fp(), src.fp(), src_op, laneidx);
2822   } else {
2823     DCHECK_EQ(MachineType::Int64(), mem_type);
2824     if (laneidx == 0) {
2825       Movlps(dst.fp(), src.fp(), src_op);
2826     } else {
2827       DCHECK_EQ(1, laneidx);
2828       Movhps(dst.fp(), src.fp(), src_op);
2829     }
2830   }
2831 }
2832 
StoreLane(Register dst,Register offset,uintptr_t offset_imm,LiftoffRegister src,StoreType type,uint8_t lane,uint32_t * protected_store_pc)2833 void LiftoffAssembler::StoreLane(Register dst, Register offset,
2834                                  uintptr_t offset_imm, LiftoffRegister src,
2835                                  StoreType type, uint8_t lane,
2836                                  uint32_t* protected_store_pc) {
2837   DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
2838   Operand dst_op = Operand(dst, offset, times_1, offset_imm);
2839   if (protected_store_pc) *protected_store_pc = pc_offset();
2840 
2841   MachineRepresentation rep = type.mem_rep();
2842   if (rep == MachineRepresentation::kWord8) {
2843     Pextrb(dst_op, src.fp(), lane);
2844   } else if (rep == MachineRepresentation::kWord16) {
2845     Pextrw(dst_op, src.fp(), lane);
2846   } else if (rep == MachineRepresentation::kWord32) {
2847     S128Store32Lane(dst_op, src.fp(), lane);
2848   } else {
2849     DCHECK_EQ(MachineRepresentation::kWord64, rep);
2850     S128Store64Lane(dst_op, src.fp(), lane);
2851   }
2852 }
2853 
emit_i8x16_shuffle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,const uint8_t shuffle[16],bool is_swizzle)2854 void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
2855                                           LiftoffRegister lhs,
2856                                           LiftoffRegister rhs,
2857                                           const uint8_t shuffle[16],
2858                                           bool is_swizzle) {
2859   LiftoffRegister tmp = GetUnusedRegister(kGpReg, {});
2860   // Prepare 16 byte aligned buffer for shuffle control mask.
2861   mov(tmp.gp(), esp);
2862   and_(esp, -16);
2863 
2864   if (is_swizzle) {
2865     uint32_t imms[4];
2866     // Shuffles that use just 1 operand are called swizzles, rhs can be ignored.
2867     wasm::SimdShuffle::Pack16Lanes(imms, shuffle);
2868     for (int i = 3; i >= 0; i--) {
2869       push_imm32(imms[i]);
2870     }
2871     Pshufb(dst.fp(), lhs.fp(), Operand(esp, 0));
2872     mov(esp, tmp.gp());
2873     return;
2874   }
2875 
2876   movups(liftoff::kScratchDoubleReg, lhs.fp());
2877   for (int i = 3; i >= 0; i--) {
2878     uint32_t mask = 0;
2879     for (int j = 3; j >= 0; j--) {
2880       uint8_t lane = shuffle[i * 4 + j];
2881       mask <<= 8;
2882       mask |= lane < kSimd128Size ? lane : 0x80;
2883     }
2884     push(Immediate(mask));
2885   }
2886   Pshufb(liftoff::kScratchDoubleReg, lhs.fp(), Operand(esp, 0));
2887 
2888   for (int i = 3; i >= 0; i--) {
2889     uint32_t mask = 0;
2890     for (int j = 3; j >= 0; j--) {
2891       uint8_t lane = shuffle[i * 4 + j];
2892       mask <<= 8;
2893       mask |= lane >= kSimd128Size ? (lane & 0x0F) : 0x80;
2894     }
2895     push(Immediate(mask));
2896   }
2897   Pshufb(dst.fp(), rhs.fp(), Operand(esp, 0));
2898   Por(dst.fp(), liftoff::kScratchDoubleReg);
2899   mov(esp, tmp.gp());
2900 }
2901 
emit_i8x16_swizzle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2902 void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
2903                                           LiftoffRegister lhs,
2904                                           LiftoffRegister rhs) {
2905   Register scratch = GetUnusedRegister(RegClass::kGpReg, {}).gp();
2906   I8x16Swizzle(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg,
2907                scratch);
2908 }
2909 
emit_i8x16_popcnt(LiftoffRegister dst,LiftoffRegister src)2910 void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
2911                                          LiftoffRegister src) {
2912   Register scratch = GetUnusedRegister(RegClass::kGpReg, {}).gp();
2913   XMMRegister tmp =
2914       GetUnusedRegister(RegClass::kFpReg, LiftoffRegList{dst, src}).fp();
2915   I8x16Popcnt(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, tmp, scratch);
2916 }
2917 
emit_i8x16_splat(LiftoffRegister dst,LiftoffRegister src)2918 void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
2919                                         LiftoffRegister src) {
2920   I8x16Splat(dst.fp(), src.gp(), liftoff::kScratchDoubleReg);
2921 }
2922 
emit_i16x8_splat(LiftoffRegister dst,LiftoffRegister src)2923 void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
2924                                         LiftoffRegister src) {
2925   I16x8Splat(dst.fp(), src.gp());
2926 }
2927 
emit_i32x4_splat(LiftoffRegister dst,LiftoffRegister src)2928 void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
2929                                         LiftoffRegister src) {
2930   Movd(dst.fp(), src.gp());
2931   Pshufd(dst.fp(), dst.fp(), uint8_t{0});
2932 }
2933 
emit_i64x2_splat(LiftoffRegister dst,LiftoffRegister src)2934 void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
2935                                         LiftoffRegister src) {
2936   Pinsrd(dst.fp(), src.low_gp(), 0);
2937   Pinsrd(dst.fp(), src.high_gp(), 1);
2938   Pshufd(dst.fp(), dst.fp(), uint8_t{0x44});
2939 }
2940 
emit_f32x4_splat(LiftoffRegister dst,LiftoffRegister src)2941 void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
2942                                         LiftoffRegister src) {
2943   F32x4Splat(dst.fp(), src.fp());
2944 }
2945 
emit_f64x2_splat(LiftoffRegister dst,LiftoffRegister src)2946 void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
2947                                         LiftoffRegister src) {
2948   Movddup(dst.fp(), src.fp());
2949 }
2950 
emit_i8x16_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2951 void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
2952                                      LiftoffRegister rhs) {
2953   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
2954       this, dst, lhs, rhs);
2955 }
2956 
emit_i8x16_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2957 void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
2958                                      LiftoffRegister rhs) {
2959   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqb, &Assembler::pcmpeqb>(
2960       this, dst, lhs, rhs);
2961   Pcmpeqb(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
2962   Pxor(dst.fp(), liftoff::kScratchDoubleReg);
2963 }
2964 
emit_i8x16_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2965 void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
2966                                        LiftoffRegister rhs) {
2967   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtb,
2968                                        &Assembler::pcmpgtb>(this, dst, lhs,
2969                                                             rhs);
2970 }
2971 
emit_i8x16_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2972 void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
2973                                        LiftoffRegister rhs) {
2974   DoubleRegister ref = rhs.fp();
2975   if (dst == rhs) {
2976     Movaps(liftoff::kScratchDoubleReg, rhs.fp());
2977     ref = liftoff::kScratchDoubleReg;
2978   }
2979   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
2980       this, dst, lhs, rhs);
2981   Pcmpeqb(dst.fp(), ref);
2982   Pcmpeqb(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
2983   Pxor(dst.fp(), liftoff::kScratchDoubleReg);
2984 }
2985 
emit_i8x16_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2986 void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
2987                                        LiftoffRegister rhs) {
2988   DoubleRegister ref = rhs.fp();
2989   if (dst == rhs) {
2990     Movaps(liftoff::kScratchDoubleReg, rhs.fp());
2991     ref = liftoff::kScratchDoubleReg;
2992   }
2993   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
2994       this, dst, lhs, rhs, SSE4_1);
2995   Pcmpeqb(dst.fp(), ref);
2996 }
2997 
emit_i8x16_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2998 void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
2999                                        LiftoffRegister rhs) {
3000   DoubleRegister ref = rhs.fp();
3001   if (dst == rhs) {
3002     Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3003     ref = liftoff::kScratchDoubleReg;
3004   }
3005   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
3006       this, dst, lhs, rhs);
3007   Pcmpeqb(dst.fp(), ref);
3008 }
3009 
emit_i16x8_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3010 void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
3011                                      LiftoffRegister rhs) {
3012   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
3013       this, dst, lhs, rhs);
3014 }
3015 
emit_i16x8_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3016 void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
3017                                      LiftoffRegister rhs) {
3018   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqw, &Assembler::pcmpeqw>(
3019       this, dst, lhs, rhs);
3020   Pcmpeqw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3021   Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3022 }
3023 
emit_i16x8_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3024 void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3025                                        LiftoffRegister rhs) {
3026   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtw,
3027                                        &Assembler::pcmpgtw>(this, dst, lhs,
3028                                                             rhs);
3029 }
3030 
emit_i16x8_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3031 void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3032                                        LiftoffRegister rhs) {
3033   DoubleRegister ref = rhs.fp();
3034   if (dst == rhs) {
3035     Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3036     ref = liftoff::kScratchDoubleReg;
3037   }
3038   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
3039       this, dst, lhs, rhs, SSE4_1);
3040   Pcmpeqw(dst.fp(), ref);
3041   Pcmpeqw(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3042   Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3043 }
3044 
emit_i16x8_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3045 void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3046                                        LiftoffRegister rhs) {
3047   DoubleRegister ref = rhs.fp();
3048   if (dst == rhs) {
3049     Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3050     ref = liftoff::kScratchDoubleReg;
3051   }
3052   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
3053       this, dst, lhs, rhs);
3054   Pcmpeqw(dst.fp(), ref);
3055 }
3056 
emit_i16x8_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3057 void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3058                                        LiftoffRegister rhs) {
3059   DoubleRegister ref = rhs.fp();
3060   if (dst == rhs) {
3061     Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3062     ref = liftoff::kScratchDoubleReg;
3063   }
3064   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
3065       this, dst, lhs, rhs, SSE4_1);
3066   Pcmpeqw(dst.fp(), ref);
3067 }
3068 
emit_i32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3069 void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
3070                                      LiftoffRegister rhs) {
3071   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
3072       this, dst, lhs, rhs);
3073 }
3074 
emit_i32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3075 void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
3076                                      LiftoffRegister rhs) {
3077   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqd, &Assembler::pcmpeqd>(
3078       this, dst, lhs, rhs);
3079   Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3080   Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3081 }
3082 
emit_i32x4_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3083 void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3084                                        LiftoffRegister rhs) {
3085   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpcmpgtd,
3086                                        &Assembler::pcmpgtd>(this, dst, lhs,
3087                                                             rhs);
3088 }
3089 
emit_i32x4_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3090 void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3091                                        LiftoffRegister rhs) {
3092   DoubleRegister ref = rhs.fp();
3093   if (dst == rhs) {
3094     Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3095     ref = liftoff::kScratchDoubleReg;
3096   }
3097   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
3098       this, dst, lhs, rhs, SSE4_1);
3099   Pcmpeqd(dst.fp(), ref);
3100   Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3101   Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3102 }
3103 
emit_i32x4_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3104 void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3105                                        LiftoffRegister rhs) {
3106   DoubleRegister ref = rhs.fp();
3107   if (dst == rhs) {
3108     Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3109     ref = liftoff::kScratchDoubleReg;
3110   }
3111   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
3112       this, dst, lhs, rhs, SSE4_1);
3113   Pcmpeqd(dst.fp(), ref);
3114 }
3115 
emit_i32x4_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3116 void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3117                                        LiftoffRegister rhs) {
3118   DoubleRegister ref = rhs.fp();
3119   if (dst == rhs) {
3120     Movaps(liftoff::kScratchDoubleReg, rhs.fp());
3121     ref = liftoff::kScratchDoubleReg;
3122   }
3123   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
3124       this, dst, lhs, rhs, SSE4_1);
3125   Pcmpeqd(dst.fp(), ref);
3126 }
3127 
emit_i64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3128 void LiftoffAssembler::emit_i64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
3129                                      LiftoffRegister rhs) {
3130   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqq, &Assembler::pcmpeqq>(
3131       this, dst, lhs, rhs, SSE4_1);
3132 }
3133 
emit_i64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3134 void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
3135                                      LiftoffRegister rhs) {
3136   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpcmpeqq, &Assembler::pcmpeqq>(
3137       this, dst, lhs, rhs, SSE4_1);
3138   Pcmpeqq(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3139   Pxor(dst.fp(), liftoff::kScratchDoubleReg);
3140 }
3141 
emit_i64x2_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3142 void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3143                                        LiftoffRegister rhs) {
3144   // Different register alias requirements depending on CpuFeatures supported:
3145   if (CpuFeatures::IsSupported(AVX) || CpuFeatures::IsSupported(SSE4_2)) {
3146     // 1. AVX, or SSE4_2 no requirements (I64x2GtS takes care of aliasing).
3147     I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3148   } else {
3149     // 2. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
3150     if (dst == lhs || dst == rhs) {
3151       LiftoffRegister tmp =
3152           GetUnusedRegister(RegClass::kFpReg, LiftoffRegList{lhs, rhs});
3153       I64x2GtS(tmp.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3154       movaps(dst.fp(), tmp.fp());
3155     } else {
3156       I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3157     }
3158   }
3159 }
3160 
emit_i64x2_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3161 void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3162                                        LiftoffRegister rhs) {
3163   // Different register alias requirements depending on CpuFeatures supported:
3164   if (CpuFeatures::IsSupported(AVX)) {
3165     // 1. AVX, no requirements.
3166     I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3167   } else if (CpuFeatures::IsSupported(SSE4_2)) {
3168     // 2. SSE4_2, dst != lhs.
3169     if (dst == lhs) {
3170       LiftoffRegister tmp =
3171           GetUnusedRegister(RegClass::kFpReg, {rhs}, LiftoffRegList{lhs});
3172       // macro-assembler uses kScratchDoubleReg, so don't use it.
3173       I64x2GeS(tmp.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3174       movaps(dst.fp(), tmp.fp());
3175     } else {
3176       I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3177     }
3178   } else {
3179     // 3. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
3180     if (dst == lhs || dst == rhs) {
3181       LiftoffRegister tmp =
3182           GetUnusedRegister(RegClass::kFpReg, LiftoffRegList{lhs, rhs});
3183       I64x2GeS(tmp.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3184       movaps(dst.fp(), tmp.fp());
3185     } else {
3186       I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3187     }
3188   }
3189 }
3190 
emit_f32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3191 void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
3192                                      LiftoffRegister rhs) {
3193   liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqps, &Assembler::cmpeqps>(
3194       this, dst, lhs, rhs);
3195 }
3196 
emit_f32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3197 void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
3198                                      LiftoffRegister rhs) {
3199   liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqps,
3200                                     &Assembler::cmpneqps>(this, dst, lhs, rhs);
3201 }
3202 
emit_f32x4_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3203 void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
3204                                      LiftoffRegister rhs) {
3205   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltps,
3206                                        &Assembler::cmpltps>(this, dst, lhs,
3207                                                             rhs);
3208 }
3209 
emit_f32x4_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3210 void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
3211                                      LiftoffRegister rhs) {
3212   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpleps,
3213                                        &Assembler::cmpleps>(this, dst, lhs,
3214                                                             rhs);
3215 }
3216 
emit_f64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3217 void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
3218                                      LiftoffRegister rhs) {
3219   liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqpd, &Assembler::cmpeqpd>(
3220       this, dst, lhs, rhs);
3221 }
3222 
emit_f64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3223 void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
3224                                      LiftoffRegister rhs) {
3225   liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpneqpd,
3226                                     &Assembler::cmpneqpd>(this, dst, lhs, rhs);
3227 }
3228 
emit_f64x2_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3229 void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
3230                                      LiftoffRegister rhs) {
3231   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmpltpd,
3232                                        &Assembler::cmpltpd>(this, dst, lhs,
3233                                                             rhs);
3234 }
3235 
emit_f64x2_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3236 void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
3237                                      LiftoffRegister rhs) {
3238   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vcmplepd,
3239                                        &Assembler::cmplepd>(this, dst, lhs,
3240                                                             rhs);
3241 }
3242 
emit_s128_const(LiftoffRegister dst,const uint8_t imms[16])3243 void LiftoffAssembler::emit_s128_const(LiftoffRegister dst,
3244                                        const uint8_t imms[16]) {
3245   uint64_t vals[2];
3246   memcpy(vals, imms, sizeof(vals));
3247   TurboAssembler::Move(dst.fp(), vals[0]);
3248 
3249   uint64_t high = vals[1];
3250   Register tmp = GetUnusedRegister(RegClass::kGpReg, {}).gp();
3251   TurboAssembler::Move(tmp, Immediate(high & 0xffff'ffff));
3252   Pinsrd(dst.fp(), tmp, 2);
3253 
3254   TurboAssembler::Move(tmp, Immediate(high >> 32));
3255   Pinsrd(dst.fp(), tmp, 3);
3256 }
3257 
3258 void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
3259   S128Not(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
3260 }
3261 
3262 void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
3263                                      LiftoffRegister rhs) {
3264   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpand, &Assembler::pand>(
3265       this, dst, lhs, rhs);
3266 }
3267 
3268 void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
3269                                     LiftoffRegister rhs) {
3270   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpor, &Assembler::por>(
3271       this, dst, lhs, rhs);
3272 }
3273 
3274 void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
3275                                      LiftoffRegister rhs) {
3276   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpxor, &Assembler::pxor>(
3277       this, dst, lhs, rhs);
3278 }
3279 
3280 void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
3281                                         LiftoffRegister src1,
3282                                         LiftoffRegister src2,
3283                                         LiftoffRegister mask) {
3284   // Ensure that we don't overwrite any inputs with the movaps below.
3285   DCHECK_NE(dst, src1);
3286   DCHECK_NE(dst, src2);
3287   if (!CpuFeatures::IsSupported(AVX) && dst != mask) {
3288     movaps(dst.fp(), mask.fp());
3289     S128Select(dst.fp(), dst.fp(), src1.fp(), src2.fp(),
3290                liftoff::kScratchDoubleReg);
3291   } else {
3292     S128Select(dst.fp(), mask.fp(), src1.fp(), src2.fp(),
3293                liftoff::kScratchDoubleReg);
3294   }
3295 }
3296 
emit_i8x16_neg(LiftoffRegister dst,LiftoffRegister src)3297 void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
3298                                       LiftoffRegister src) {
3299   if (dst.fp() == src.fp()) {
3300     Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3301     Psignb(dst.fp(), liftoff::kScratchDoubleReg);
3302   } else {
3303     Pxor(dst.fp(), dst.fp());
3304     Psubb(dst.fp(), src.fp());
3305   }
3306 }
3307 
emit_v128_anytrue(LiftoffRegister dst,LiftoffRegister src)3308 void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst,
3309                                          LiftoffRegister src) {
3310   liftoff::EmitAnyTrue(this, dst, src);
3311 }
3312 
emit_i8x16_alltrue(LiftoffRegister dst,LiftoffRegister src)3313 void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst,
3314                                           LiftoffRegister src) {
3315   liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqb>(this, dst, src);
3316 }
3317 
emit_i8x16_bitmask(LiftoffRegister dst,LiftoffRegister src)3318 void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
3319                                           LiftoffRegister src) {
3320   Pmovmskb(dst.gp(), src.fp());
3321 }
3322 
emit_i8x16_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3323 void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
3324                                       LiftoffRegister rhs) {
3325   LiftoffRegister tmp = GetUnusedRegister(kGpReg, LiftoffRegList{rhs});
3326   LiftoffRegister tmp_simd =
3327       GetUnusedRegister(kFpReg, LiftoffRegList{dst, lhs});
3328   I8x16Shl(dst.fp(), lhs.fp(), rhs.gp(), tmp.gp(), liftoff::kScratchDoubleReg,
3329            tmp_simd.fp());
3330 }
3331 
emit_i8x16_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3332 void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
3333                                        int32_t rhs) {
3334   LiftoffRegister tmp = GetUnusedRegister(kGpReg, {});
3335   I8x16Shl(dst.fp(), lhs.fp(), rhs, tmp.gp(), liftoff::kScratchDoubleReg);
3336 }
3337 
emit_i8x16_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3338 void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
3339                                         LiftoffRegister lhs,
3340                                         LiftoffRegister rhs) {
3341   Register tmp = GetUnusedRegister(kGpReg, LiftoffRegList{rhs}).gp();
3342   XMMRegister tmp_simd =
3343       GetUnusedRegister(kFpReg, LiftoffRegList{dst, lhs}).fp();
3344   I8x16ShrS(dst.fp(), lhs.fp(), rhs.gp(), tmp, liftoff::kScratchDoubleReg,
3345             tmp_simd);
3346 }
3347 
emit_i8x16_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3348 void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
3349                                          LiftoffRegister lhs, int32_t rhs) {
3350   I8x16ShrS(dst.fp(), lhs.fp(), rhs, liftoff::kScratchDoubleReg);
3351 }
3352 
emit_i8x16_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3353 void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
3354                                         LiftoffRegister lhs,
3355                                         LiftoffRegister rhs) {
3356   Register tmp = GetUnusedRegister(kGpReg, LiftoffRegList{rhs}).gp();
3357   XMMRegister tmp_simd =
3358       GetUnusedRegister(kFpReg, LiftoffRegList{dst, lhs}).fp();
3359   I8x16ShrU(dst.fp(), lhs.fp(), rhs.gp(), tmp, liftoff::kScratchDoubleReg,
3360             tmp_simd);
3361 }
3362 
emit_i8x16_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3363 void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
3364                                          LiftoffRegister lhs, int32_t rhs) {
3365   Register tmp = GetUnusedRegister(kGpReg, {}).gp();
3366   I8x16ShrU(dst.fp(), lhs.fp(), rhs, tmp, liftoff::kScratchDoubleReg);
3367 }
3368 
emit_i8x16_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3369 void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
3370                                       LiftoffRegister rhs) {
3371   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddb, &Assembler::paddb>(
3372       this, dst, lhs, rhs);
3373 }
3374 
emit_i8x16_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3375 void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
3376                                             LiftoffRegister lhs,
3377                                             LiftoffRegister rhs) {
3378   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsb, &Assembler::paddsb>(
3379       this, dst, lhs, rhs);
3380 }
3381 
emit_i8x16_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3382 void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst,
3383                                             LiftoffRegister lhs,
3384                                             LiftoffRegister rhs) {
3385   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusb, &Assembler::paddusb>(
3386       this, dst, lhs, rhs);
3387 }
3388 
emit_i8x16_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3389 void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
3390                                       LiftoffRegister rhs) {
3391   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubb, &Assembler::psubb>(
3392       this, dst, lhs, rhs);
3393 }
3394 
emit_i8x16_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3395 void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
3396                                             LiftoffRegister lhs,
3397                                             LiftoffRegister rhs) {
3398   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsb, &Assembler::psubsb>(
3399       this, dst, lhs, rhs);
3400 }
3401 
emit_i8x16_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3402 void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst,
3403                                             LiftoffRegister lhs,
3404                                             LiftoffRegister rhs) {
3405   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusb,
3406                                        &Assembler::psubusb>(this, dst, lhs,
3407                                                             rhs);
3408 }
3409 
emit_i8x16_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3410 void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
3411                                         LiftoffRegister lhs,
3412                                         LiftoffRegister rhs) {
3413   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsb, &Assembler::pminsb>(
3414       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3415 }
3416 
emit_i8x16_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3417 void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
3418                                         LiftoffRegister lhs,
3419                                         LiftoffRegister rhs) {
3420   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminub, &Assembler::pminub>(
3421       this, dst, lhs, rhs);
3422 }
3423 
emit_i8x16_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3424 void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
3425                                         LiftoffRegister lhs,
3426                                         LiftoffRegister rhs) {
3427   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsb, &Assembler::pmaxsb>(
3428       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3429 }
3430 
emit_i8x16_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3431 void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
3432                                         LiftoffRegister lhs,
3433                                         LiftoffRegister rhs) {
3434   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxub, &Assembler::pmaxub>(
3435       this, dst, lhs, rhs);
3436 }
3437 
emit_i16x8_neg(LiftoffRegister dst,LiftoffRegister src)3438 void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
3439                                       LiftoffRegister src) {
3440   if (dst.fp() == src.fp()) {
3441     Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3442     Psignw(dst.fp(), liftoff::kScratchDoubleReg);
3443   } else {
3444     Pxor(dst.fp(), dst.fp());
3445     Psubw(dst.fp(), src.fp());
3446   }
3447 }
3448 
emit_i16x8_alltrue(LiftoffRegister dst,LiftoffRegister src)3449 void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
3450                                           LiftoffRegister src) {
3451   liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqw>(this, dst, src);
3452 }
3453 
emit_i16x8_bitmask(LiftoffRegister dst,LiftoffRegister src)3454 void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
3455                                           LiftoffRegister src) {
3456   XMMRegister tmp = liftoff::kScratchDoubleReg;
3457   Packsswb(tmp, src.fp());
3458   Pmovmskb(dst.gp(), tmp);
3459   shr(dst.gp(), 8);
3460 }
3461 
emit_i16x8_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3462 void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
3463                                       LiftoffRegister rhs) {
3464   liftoff::EmitSimdShiftOp<&Assembler::vpsllw, &Assembler::psllw, 4>(this, dst,
3465                                                                      lhs, rhs);
3466 }
3467 
emit_i16x8_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3468 void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
3469                                        int32_t rhs) {
3470   liftoff::EmitSimdShiftOpImm<&Assembler::vpsllw, &Assembler::psllw, 4>(
3471       this, dst, lhs, rhs);
3472 }
3473 
emit_i16x8_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3474 void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
3475                                         LiftoffRegister lhs,
3476                                         LiftoffRegister rhs) {
3477   liftoff::EmitSimdShiftOp<&Assembler::vpsraw, &Assembler::psraw, 4>(this, dst,
3478                                                                      lhs, rhs);
3479 }
3480 
emit_i16x8_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3481 void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
3482                                          LiftoffRegister lhs, int32_t rhs) {
3483   liftoff::EmitSimdShiftOpImm<&Assembler::vpsraw, &Assembler::psraw, 4>(
3484       this, dst, lhs, rhs);
3485 }
3486 
emit_i16x8_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3487 void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
3488                                         LiftoffRegister lhs,
3489                                         LiftoffRegister rhs) {
3490   liftoff::EmitSimdShiftOp<&Assembler::vpsrlw, &Assembler::psrlw, 4>(this, dst,
3491                                                                      lhs, rhs);
3492 }
3493 
emit_i16x8_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3494 void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
3495                                          LiftoffRegister lhs, int32_t rhs) {
3496   liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlw, &Assembler::psrlw, 4>(
3497       this, dst, lhs, rhs);
3498 }
3499 
emit_i16x8_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3500 void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
3501                                       LiftoffRegister rhs) {
3502   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddw, &Assembler::paddw>(
3503       this, dst, lhs, rhs);
3504 }
3505 
emit_i16x8_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3506 void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
3507                                             LiftoffRegister lhs,
3508                                             LiftoffRegister rhs) {
3509   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddsw, &Assembler::paddsw>(
3510       this, dst, lhs, rhs);
3511 }
3512 
emit_i16x8_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3513 void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
3514                                             LiftoffRegister lhs,
3515                                             LiftoffRegister rhs) {
3516   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddusw, &Assembler::paddusw>(
3517       this, dst, lhs, rhs);
3518 }
3519 
emit_i16x8_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3520 void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
3521                                       LiftoffRegister rhs) {
3522   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubw, &Assembler::psubw>(
3523       this, dst, lhs, rhs);
3524 }
3525 
emit_i16x8_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3526 void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
3527                                             LiftoffRegister lhs,
3528                                             LiftoffRegister rhs) {
3529   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubsw, &Assembler::psubsw>(
3530       this, dst, lhs, rhs);
3531 }
3532 
emit_i16x8_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3533 void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
3534                                             LiftoffRegister lhs,
3535                                             LiftoffRegister rhs) {
3536   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubusw,
3537                                        &Assembler::psubusw>(this, dst, lhs,
3538                                                             rhs);
3539 }
3540 
emit_i16x8_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3541 void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
3542                                       LiftoffRegister rhs) {
3543   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmullw, &Assembler::pmullw>(
3544       this, dst, lhs, rhs);
3545 }
3546 
emit_i16x8_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3547 void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
3548                                         LiftoffRegister lhs,
3549                                         LiftoffRegister rhs) {
3550   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsw, &Assembler::pminsw>(
3551       this, dst, lhs, rhs);
3552 }
3553 
emit_i16x8_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3554 void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
3555                                         LiftoffRegister lhs,
3556                                         LiftoffRegister rhs) {
3557   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminuw, &Assembler::pminuw>(
3558       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3559 }
3560 
emit_i16x8_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3561 void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
3562                                         LiftoffRegister lhs,
3563                                         LiftoffRegister rhs) {
3564   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsw, &Assembler::pmaxsw>(
3565       this, dst, lhs, rhs);
3566 }
3567 
emit_i16x8_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3568 void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
3569                                         LiftoffRegister lhs,
3570                                         LiftoffRegister rhs) {
3571   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxuw, &Assembler::pmaxuw>(
3572       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3573 }
3574 
emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,LiftoffRegister src)3575 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
3576                                                           LiftoffRegister src) {
3577   I16x8ExtAddPairwiseI8x16S(dst.fp(), src.fp(), liftoff::kScratchDoubleReg,
3578                             GetUnusedRegister(kGpReg, {}).gp());
3579 }
3580 
emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,LiftoffRegister src)3581 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
3582                                                           LiftoffRegister src) {
3583   I16x8ExtAddPairwiseI8x16U(dst.fp(), src.fp(),
3584                             GetUnusedRegister(kGpReg, {}).gp());
3585 }
3586 
emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3587 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
3588                                                      LiftoffRegister src1,
3589                                                      LiftoffRegister src2) {
3590   I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
3591                  /*is_signed=*/true);
3592 }
3593 
emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3594 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
3595                                                      LiftoffRegister src1,
3596                                                      LiftoffRegister src2) {
3597   I16x8ExtMulLow(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
3598                  /*is_signed=*/false);
3599 }
3600 
emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3601 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
3602                                                       LiftoffRegister src1,
3603                                                       LiftoffRegister src2) {
3604   I16x8ExtMulHighS(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg);
3605 }
3606 
emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3607 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
3608                                                       LiftoffRegister src1,
3609                                                       LiftoffRegister src2) {
3610   I16x8ExtMulHighU(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg);
3611 }
3612 
emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3613 void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
3614                                                 LiftoffRegister src1,
3615                                                 LiftoffRegister src2) {
3616   I16x8Q15MulRSatS(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg);
3617 }
3618 
emit_i32x4_neg(LiftoffRegister dst,LiftoffRegister src)3619 void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
3620                                       LiftoffRegister src) {
3621   if (dst.fp() == src.fp()) {
3622     Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
3623     Psignd(dst.fp(), liftoff::kScratchDoubleReg);
3624   } else {
3625     Pxor(dst.fp(), dst.fp());
3626     Psubd(dst.fp(), src.fp());
3627   }
3628 }
3629 
emit_i32x4_alltrue(LiftoffRegister dst,LiftoffRegister src)3630 void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
3631                                           LiftoffRegister src) {
3632   liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqd>(this, dst, src);
3633 }
3634 
emit_i32x4_bitmask(LiftoffRegister dst,LiftoffRegister src)3635 void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
3636                                           LiftoffRegister src) {
3637   Movmskps(dst.gp(), src.fp());
3638 }
3639 
emit_i32x4_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3640 void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
3641                                       LiftoffRegister rhs) {
3642   liftoff::EmitSimdShiftOp<&Assembler::vpslld, &Assembler::pslld, 5>(this, dst,
3643                                                                      lhs, rhs);
3644 }
3645 
emit_i32x4_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3646 void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
3647                                        int32_t rhs) {
3648   liftoff::EmitSimdShiftOpImm<&Assembler::vpslld, &Assembler::pslld, 5>(
3649       this, dst, lhs, rhs);
3650 }
3651 
emit_i32x4_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3652 void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
3653                                         LiftoffRegister lhs,
3654                                         LiftoffRegister rhs) {
3655   liftoff::EmitSimdShiftOp<&Assembler::vpsrad, &Assembler::psrad, 5>(this, dst,
3656                                                                      lhs, rhs);
3657 }
3658 
emit_i32x4_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3659 void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
3660                                          LiftoffRegister lhs, int32_t rhs) {
3661   liftoff::EmitSimdShiftOpImm<&Assembler::vpsrad, &Assembler::psrad, 5>(
3662       this, dst, lhs, rhs);
3663 }
3664 
emit_i32x4_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3665 void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
3666                                         LiftoffRegister lhs,
3667                                         LiftoffRegister rhs) {
3668   liftoff::EmitSimdShiftOp<&Assembler::vpsrld, &Assembler::psrld, 5>(this, dst,
3669                                                                      lhs, rhs);
3670 }
3671 
emit_i32x4_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3672 void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
3673                                          LiftoffRegister lhs, int32_t rhs) {
3674   liftoff::EmitSimdShiftOpImm<&Assembler::vpsrld, &Assembler::psrld, 5>(
3675       this, dst, lhs, rhs);
3676 }
3677 
emit_i32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3678 void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3679                                       LiftoffRegister rhs) {
3680   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddd, &Assembler::paddd>(
3681       this, dst, lhs, rhs);
3682 }
3683 
emit_i32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3684 void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3685                                       LiftoffRegister rhs) {
3686   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubd, &Assembler::psubd>(
3687       this, dst, lhs, rhs);
3688 }
3689 
emit_i32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3690 void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3691                                       LiftoffRegister rhs) {
3692   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmulld, &Assembler::pmulld>(
3693       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3694 }
3695 
emit_i32x4_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3696 void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
3697                                         LiftoffRegister lhs,
3698                                         LiftoffRegister rhs) {
3699   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminsd, &Assembler::pminsd>(
3700       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3701 }
3702 
emit_i32x4_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3703 void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
3704                                         LiftoffRegister lhs,
3705                                         LiftoffRegister rhs) {
3706   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpminud, &Assembler::pminud>(
3707       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3708 }
3709 
emit_i32x4_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3710 void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
3711                                         LiftoffRegister lhs,
3712                                         LiftoffRegister rhs) {
3713   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxsd, &Assembler::pmaxsd>(
3714       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3715 }
3716 
emit_i32x4_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3717 void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
3718                                         LiftoffRegister lhs,
3719                                         LiftoffRegister rhs) {
3720   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaxud, &Assembler::pmaxud>(
3721       this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
3722 }
3723 
emit_i32x4_dot_i16x8_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3724 void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
3725                                               LiftoffRegister lhs,
3726                                               LiftoffRegister rhs) {
3727   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpmaddwd, &Assembler::pmaddwd>(
3728       this, dst, lhs, rhs);
3729 }
3730 
emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,LiftoffRegister src)3731 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
3732                                                           LiftoffRegister src) {
3733   I32x4ExtAddPairwiseI16x8S(dst.fp(), src.fp(),
3734                             GetUnusedRegister(kGpReg, {}).gp());
3735 }
3736 
emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,LiftoffRegister src)3737 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
3738                                                           LiftoffRegister src) {
3739   I32x4ExtAddPairwiseI16x8U(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
3740 }
3741 
3742 namespace liftoff {
3743 // Helper function to check for register aliasing, AVX support, and moves
3744 // registers around before calling the actual macro-assembler function.
I32x4ExtMulHelper(LiftoffAssembler * assm,XMMRegister dst,XMMRegister src1,XMMRegister src2,bool low,bool is_signed)3745 inline void I32x4ExtMulHelper(LiftoffAssembler* assm, XMMRegister dst,
3746                               XMMRegister src1, XMMRegister src2, bool low,
3747                               bool is_signed) {
3748   // I32x4ExtMul requires dst == src1 if AVX is not supported.
3749   if (CpuFeatures::IsSupported(AVX) || dst == src1) {
3750     assm->I32x4ExtMul(dst, src1, src2, liftoff::kScratchDoubleReg, low,
3751                       is_signed);
3752   } else if (dst != src2) {
3753     // dst != src1 && dst != src2
3754     assm->movaps(dst, src1);
3755     assm->I32x4ExtMul(dst, dst, src2, liftoff::kScratchDoubleReg, low,
3756                       is_signed);
3757   } else {
3758     // dst == src2
3759     // Extended multiplication is commutative,
3760     assm->movaps(dst, src2);
3761     assm->I32x4ExtMul(dst, dst, src1, liftoff::kScratchDoubleReg, low,
3762                       is_signed);
3763   }
3764 }
3765 }  // namespace liftoff
3766 
emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3767 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
3768                                                      LiftoffRegister src1,
3769                                                      LiftoffRegister src2) {
3770   liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(), /*low=*/true,
3771                              /*is_signed=*/true);
3772 }
3773 
emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3774 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
3775                                                      LiftoffRegister src1,
3776                                                      LiftoffRegister src2) {
3777   liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(), /*low=*/true,
3778                              /*is_signed=*/false);
3779 }
3780 
emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3781 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
3782                                                       LiftoffRegister src1,
3783                                                       LiftoffRegister src2) {
3784   liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(),
3785                              /*low=*/false,
3786                              /*is_signed=*/true);
3787 }
3788 
emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3789 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
3790                                                       LiftoffRegister src1,
3791                                                       LiftoffRegister src2) {
3792   liftoff::I32x4ExtMulHelper(this, dst.fp(), src1.fp(), src2.fp(),
3793                              /*low=*/false,
3794                              /*is_signed=*/false);
3795 }
3796 
emit_i64x2_neg(LiftoffRegister dst,LiftoffRegister src)3797 void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
3798                                       LiftoffRegister src) {
3799   I64x2Neg(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
3800 }
3801 
emit_i64x2_alltrue(LiftoffRegister dst,LiftoffRegister src)3802 void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst,
3803                                           LiftoffRegister src) {
3804   liftoff::EmitAllTrue<&TurboAssembler::Pcmpeqq>(this, dst, src, SSE4_1);
3805 }
3806 
emit_i64x2_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3807 void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
3808                                       LiftoffRegister rhs) {
3809   liftoff::EmitSimdShiftOp<&Assembler::vpsllq, &Assembler::psllq, 6>(this, dst,
3810                                                                      lhs, rhs);
3811 }
3812 
emit_i64x2_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3813 void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
3814                                        int32_t rhs) {
3815   liftoff::EmitSimdShiftOpImm<&Assembler::vpsllq, &Assembler::psllq, 6>(
3816       this, dst, lhs, rhs);
3817 }
3818 
emit_i64x2_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3819 void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
3820                                         LiftoffRegister lhs,
3821                                         LiftoffRegister rhs) {
3822   XMMRegister tmp =
3823       GetUnusedRegister(RegClass::kFpReg, LiftoffRegList{dst, lhs}).fp();
3824   Register scratch =
3825       GetUnusedRegister(RegClass::kGpReg, LiftoffRegList{rhs}).gp();
3826 
3827   I64x2ShrS(dst.fp(), lhs.fp(), rhs.gp(), liftoff::kScratchDoubleReg, tmp,
3828             scratch);
3829 }
3830 
emit_i64x2_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3831 void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
3832                                          LiftoffRegister lhs, int32_t rhs) {
3833   I64x2ShrS(dst.fp(), lhs.fp(), rhs & 0x3F, liftoff::kScratchDoubleReg);
3834 }
3835 
emit_i64x2_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3836 void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
3837                                         LiftoffRegister lhs,
3838                                         LiftoffRegister rhs) {
3839   liftoff::EmitSimdShiftOp<&Assembler::vpsrlq, &Assembler::psrlq, 6>(this, dst,
3840                                                                      lhs, rhs);
3841 }
3842 
emit_i64x2_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3843 void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
3844                                          LiftoffRegister lhs, int32_t rhs) {
3845   liftoff::EmitSimdShiftOpImm<&Assembler::vpsrlq, &Assembler::psrlq, 6>(
3846       this, dst, lhs, rhs);
3847 }
3848 
emit_i64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3849 void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
3850                                       LiftoffRegister rhs) {
3851   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpaddq, &Assembler::paddq>(
3852       this, dst, lhs, rhs);
3853 }
3854 
emit_i64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3855 void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
3856                                       LiftoffRegister rhs) {
3857   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpsubq, &Assembler::psubq>(
3858       this, dst, lhs, rhs);
3859 }
3860 
emit_i64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3861 void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
3862                                       LiftoffRegister rhs) {
3863   static constexpr RegClass tmp_rc = reg_class_for(kS128);
3864   LiftoffRegister tmp1 =
3865       GetUnusedRegister(tmp_rc, LiftoffRegList{dst, lhs, rhs});
3866   LiftoffRegister tmp2 =
3867       GetUnusedRegister(tmp_rc, LiftoffRegList{dst, lhs, rhs, tmp1});
3868   I64x2Mul(dst.fp(), lhs.fp(), rhs.fp(), tmp1.fp(), tmp2.fp());
3869 }
3870 
emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3871 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
3872                                                      LiftoffRegister src1,
3873                                                      LiftoffRegister src2) {
3874   I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
3875               /*low=*/true, /*is_signed=*/true);
3876 }
3877 
emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3878 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
3879                                                      LiftoffRegister src1,
3880                                                      LiftoffRegister src2) {
3881   I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
3882               /*low=*/true, /*is_signed=*/false);
3883 }
3884 
emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3885 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
3886                                                       LiftoffRegister src1,
3887                                                       LiftoffRegister src2) {
3888   I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
3889               /*low=*/false, /*is_signed=*/true);
3890 }
3891 
emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3892 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
3893                                                       LiftoffRegister src1,
3894                                                       LiftoffRegister src2) {
3895   I64x2ExtMul(dst.fp(), src1.fp(), src2.fp(), liftoff::kScratchDoubleReg,
3896               /*low=*/false, /*is_signed=*/false);
3897 }
3898 
emit_i64x2_bitmask(LiftoffRegister dst,LiftoffRegister src)3899 void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
3900                                           LiftoffRegister src) {
3901   Movmskpd(dst.gp(), src.fp());
3902 }
3903 
emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)3904 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
3905                                                      LiftoffRegister src) {
3906   Pmovsxdq(dst.fp(), src.fp());
3907 }
3908 
emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)3909 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
3910                                                       LiftoffRegister src) {
3911   I64x2SConvertI32x4High(dst.fp(), src.fp());
3912 }
3913 
emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)3914 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
3915                                                      LiftoffRegister src) {
3916   Pmovzxdq(dst.fp(), src.fp());
3917 }
3918 
emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)3919 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
3920                                                       LiftoffRegister src) {
3921   I64x2UConvertI32x4High(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
3922 }
3923 
emit_f32x4_abs(LiftoffRegister dst,LiftoffRegister src)3924 void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
3925                                       LiftoffRegister src) {
3926   Register tmp = GetUnusedRegister(kGpReg, {}).gp();
3927   Absps(dst.fp(), src.fp(), tmp);
3928 }
3929 
emit_f32x4_neg(LiftoffRegister dst,LiftoffRegister src)3930 void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
3931                                       LiftoffRegister src) {
3932   Register tmp = GetUnusedRegister(kGpReg, {}).gp();
3933   Negps(dst.fp(), src.fp(), tmp);
3934 }
3935 
emit_f32x4_sqrt(LiftoffRegister dst,LiftoffRegister src)3936 void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
3937                                        LiftoffRegister src) {
3938   Sqrtps(dst.fp(), src.fp());
3939 }
3940 
emit_f32x4_ceil(LiftoffRegister dst,LiftoffRegister src)3941 bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
3942                                        LiftoffRegister src) {
3943   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3944   Roundps(dst.fp(), src.fp(), kRoundUp);
3945   return true;
3946 }
3947 
emit_f32x4_floor(LiftoffRegister dst,LiftoffRegister src)3948 bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
3949                                         LiftoffRegister src) {
3950   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3951   Roundps(dst.fp(), src.fp(), kRoundDown);
3952   return true;
3953 }
3954 
emit_f32x4_trunc(LiftoffRegister dst,LiftoffRegister src)3955 bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
3956                                         LiftoffRegister src) {
3957   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3958   Roundps(dst.fp(), src.fp(), kRoundToZero);
3959   return true;
3960 }
3961 
emit_f32x4_nearest_int(LiftoffRegister dst,LiftoffRegister src)3962 bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
3963                                               LiftoffRegister src) {
3964   DCHECK(CpuFeatures::IsSupported(SSE4_1));
3965   Roundps(dst.fp(), src.fp(), kRoundToNearest);
3966   return true;
3967 }
3968 
emit_f32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3969 void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3970                                       LiftoffRegister rhs) {
3971   liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddps, &Assembler::addps>(
3972       this, dst, lhs, rhs);
3973 }
3974 
emit_f32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3975 void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3976                                       LiftoffRegister rhs) {
3977   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubps, &Assembler::subps>(
3978       this, dst, lhs, rhs);
3979 }
3980 
emit_f32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3981 void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3982                                       LiftoffRegister rhs) {
3983   liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulps, &Assembler::mulps>(
3984       this, dst, lhs, rhs);
3985 }
3986 
emit_f32x4_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3987 void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
3988                                       LiftoffRegister rhs) {
3989   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivps, &Assembler::divps>(
3990       this, dst, lhs, rhs);
3991 }
3992 
emit_f32x4_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3993 void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
3994                                       LiftoffRegister rhs) {
3995   F32x4Min(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
3996 }
3997 
emit_f32x4_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3998 void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
3999                                       LiftoffRegister rhs) {
4000   F32x4Max(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
4001 }
4002 
emit_f32x4_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4003 void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
4004                                        LiftoffRegister rhs) {
4005   // Due to the way minps works, pmin(a, b) = minps(b, a).
4006   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminps, &Assembler::minps>(
4007       this, dst, rhs, lhs);
4008 }
4009 
emit_f32x4_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4010 void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
4011                                        LiftoffRegister rhs) {
4012   // Due to the way maxps works, pmax(a, b) = maxps(b, a).
4013   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxps, &Assembler::maxps>(
4014       this, dst, rhs, lhs);
4015 }
4016 
emit_f64x2_abs(LiftoffRegister dst,LiftoffRegister src)4017 void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
4018                                       LiftoffRegister src) {
4019   Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4020   Abspd(dst.fp(), src.fp(), tmp);
4021 }
4022 
emit_f64x2_neg(LiftoffRegister dst,LiftoffRegister src)4023 void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
4024                                       LiftoffRegister src) {
4025   Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4026   Negpd(dst.fp(), src.fp(), tmp);
4027 }
4028 
emit_f64x2_sqrt(LiftoffRegister dst,LiftoffRegister src)4029 void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
4030                                        LiftoffRegister src) {
4031   Sqrtpd(dst.fp(), src.fp());
4032 }
4033 
emit_f64x2_ceil(LiftoffRegister dst,LiftoffRegister src)4034 bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
4035                                        LiftoffRegister src) {
4036   DCHECK(CpuFeatures::IsSupported(SSE4_1));
4037   Roundpd(dst.fp(), src.fp(), kRoundUp);
4038   return true;
4039 }
4040 
emit_f64x2_floor(LiftoffRegister dst,LiftoffRegister src)4041 bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
4042                                         LiftoffRegister src) {
4043   DCHECK(CpuFeatures::IsSupported(SSE4_1));
4044   Roundpd(dst.fp(), src.fp(), kRoundDown);
4045   return true;
4046 }
4047 
emit_f64x2_trunc(LiftoffRegister dst,LiftoffRegister src)4048 bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
4049                                         LiftoffRegister src) {
4050   DCHECK(CpuFeatures::IsSupported(SSE4_1));
4051   Roundpd(dst.fp(), src.fp(), kRoundToZero);
4052   return true;
4053 }
4054 
emit_f64x2_nearest_int(LiftoffRegister dst,LiftoffRegister src)4055 bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
4056                                               LiftoffRegister src) {
4057   DCHECK(CpuFeatures::IsSupported(SSE4_1));
4058   Roundpd(dst.fp(), src.fp(), kRoundToNearest);
4059   return true;
4060 }
4061 
emit_f64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4062 void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
4063                                       LiftoffRegister rhs) {
4064   liftoff::EmitSimdCommutativeBinOp<&Assembler::vaddpd, &Assembler::addpd>(
4065       this, dst, lhs, rhs);
4066 }
4067 
emit_f64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4068 void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
4069                                       LiftoffRegister rhs) {
4070   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vsubpd, &Assembler::subpd>(
4071       this, dst, lhs, rhs);
4072 }
4073 
emit_f64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4074 void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
4075                                       LiftoffRegister rhs) {
4076   liftoff::EmitSimdCommutativeBinOp<&Assembler::vmulpd, &Assembler::mulpd>(
4077       this, dst, lhs, rhs);
4078 }
4079 
emit_f64x2_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4080 void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
4081                                       LiftoffRegister rhs) {
4082   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vdivpd, &Assembler::divpd>(
4083       this, dst, lhs, rhs);
4084 }
4085 
emit_f64x2_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4086 void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
4087                                       LiftoffRegister rhs) {
4088   F64x2Min(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
4089 }
4090 
emit_f64x2_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4091 void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
4092                                       LiftoffRegister rhs) {
4093   F64x2Max(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
4094 }
4095 
emit_f64x2_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4096 void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
4097                                        LiftoffRegister rhs) {
4098   // Due to the way minpd works, pmin(a, b) = minpd(b, a).
4099   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vminpd, &Assembler::minpd>(
4100       this, dst, rhs, lhs);
4101 }
4102 
emit_f64x2_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4103 void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
4104                                        LiftoffRegister rhs) {
4105   // Due to the way maxpd works, pmax(a, b) = maxpd(b, a).
4106   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vmaxpd, &Assembler::maxpd>(
4107       this, dst, rhs, lhs);
4108 }
4109 
emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src)4110 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
4111                                                       LiftoffRegister src) {
4112   Cvtdq2pd(dst.fp(), src.fp());
4113 }
4114 
emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src)4115 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
4116                                                       LiftoffRegister src) {
4117   Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4118   F64x2ConvertLowI32x4U(dst.fp(), src.fp(), tmp);
4119 }
4120 
emit_f64x2_promote_low_f32x4(LiftoffRegister dst,LiftoffRegister src)4121 void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
4122                                                     LiftoffRegister src) {
4123   Cvtps2pd(dst.fp(), src.fp());
4124 }
4125 
emit_i32x4_sconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)4126 void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
4127                                                  LiftoffRegister src) {
4128   Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4129   I32x4SConvertF32x4(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, tmp);
4130 }
4131 
emit_i32x4_uconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)4132 void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
4133                                                  LiftoffRegister src) {
4134   static constexpr RegClass tmp_rc = reg_class_for(kS128);
4135   DoubleRegister tmp = GetUnusedRegister(tmp_rc, LiftoffRegList{dst, src}).fp();
4136   // NAN->0, negative->0.
4137   Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
4138   if (CpuFeatures::IsSupported(AVX)) {
4139     CpuFeatureScope scope(this, AVX);
4140     vmaxps(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4141   } else {
4142     if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
4143     maxps(dst.fp(), liftoff::kScratchDoubleReg);
4144   }
4145   // scratch: float representation of max_signed.
4146   Pcmpeqd(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
4147   Psrld(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg,
4148         uint8_t{1});  // 0x7fffffff
4149   Cvtdq2ps(liftoff::kScratchDoubleReg,
4150            liftoff::kScratchDoubleReg);  // 0x4f000000
4151   // tmp: convert (src-max_signed).
4152   // Set positive overflow lanes to 0x7FFFFFFF.
4153   // Set negative lanes to 0.
4154   if (CpuFeatures::IsSupported(AVX)) {
4155     CpuFeatureScope scope(this, AVX);
4156     vsubps(tmp, dst.fp(), liftoff::kScratchDoubleReg);
4157   } else {
4158     movaps(tmp, dst.fp());
4159     subps(tmp, liftoff::kScratchDoubleReg);
4160   }
4161   Cmpleps(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg, tmp);
4162   Cvttps2dq(tmp, tmp);
4163   Pxor(tmp, liftoff::kScratchDoubleReg);
4164   Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);
4165   Pmaxsd(tmp, tmp, liftoff::kScratchDoubleReg);
4166   // Convert to int. Overflow lanes above max_signed will be 0x80000000.
4167   Cvttps2dq(dst.fp(), dst.fp());
4168   // Add (src-max_signed) for overflow lanes.
4169   Paddd(dst.fp(), dst.fp(), tmp);
4170 }
4171 
emit_f32x4_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)4172 void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
4173                                                  LiftoffRegister src) {
4174   Cvtdq2ps(dst.fp(), src.fp());
4175 }
4176 
emit_f32x4_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)4177 void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
4178                                                  LiftoffRegister src) {
4179   Pxor(liftoff::kScratchDoubleReg, liftoff::kScratchDoubleReg);  // Zeros.
4180   Pblendw(liftoff::kScratchDoubleReg, src.fp(),
4181           uint8_t{0x55});  // Get lo 16 bits.
4182   if (CpuFeatures::IsSupported(AVX)) {
4183     CpuFeatureScope scope(this, AVX);
4184     vpsubd(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);  // Get hi 16 bits.
4185   } else {
4186     if (dst.fp() != src.fp()) movaps(dst.fp(), src.fp());
4187     psubd(dst.fp(), liftoff::kScratchDoubleReg);
4188   }
4189   Cvtdq2ps(liftoff::kScratchDoubleReg,
4190            liftoff::kScratchDoubleReg);  // Convert lo exactly.
4191   Psrld(dst.fp(), dst.fp(), byte{1});   // Divide by 2 to get in unsigned range.
4192   Cvtdq2ps(dst.fp(), dst.fp());         // Convert hi, exactly.
4193   Addps(dst.fp(), dst.fp(), dst.fp());  // Double hi, exactly.
4194   Addps(dst.fp(), dst.fp(),
4195         liftoff::kScratchDoubleReg);  // Add hi and lo, may round.
4196 }
4197 
emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,LiftoffRegister src)4198 void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
4199                                                     LiftoffRegister src) {
4200   Cvtpd2ps(dst.fp(), src.fp());
4201 }
4202 
emit_i8x16_sconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4203 void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
4204                                                  LiftoffRegister lhs,
4205                                                  LiftoffRegister rhs) {
4206   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpacksswb,
4207                                        &Assembler::packsswb>(this, dst, lhs,
4208                                                              rhs);
4209 }
4210 
emit_i8x16_uconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4211 void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
4212                                                  LiftoffRegister lhs,
4213                                                  LiftoffRegister rhs) {
4214   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackuswb,
4215                                        &Assembler::packuswb>(this, dst, lhs,
4216                                                              rhs);
4217 }
4218 
emit_i16x8_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4219 void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
4220                                                  LiftoffRegister lhs,
4221                                                  LiftoffRegister rhs) {
4222   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackssdw,
4223                                        &Assembler::packssdw>(this, dst, lhs,
4224                                                              rhs);
4225 }
4226 
emit_i16x8_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4227 void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
4228                                                  LiftoffRegister lhs,
4229                                                  LiftoffRegister rhs) {
4230   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vpackusdw,
4231                                        &Assembler::packusdw>(this, dst, lhs,
4232                                                              rhs, SSE4_1);
4233 }
4234 
emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)4235 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
4236                                                      LiftoffRegister src) {
4237   Pmovsxbw(dst.fp(), src.fp());
4238 }
4239 
emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)4240 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
4241                                                       LiftoffRegister src) {
4242   I16x8SConvertI8x16High(dst.fp(), src.fp());
4243 }
4244 
emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)4245 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
4246                                                      LiftoffRegister src) {
4247   Pmovzxbw(dst.fp(), src.fp());
4248 }
4249 
emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)4250 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
4251                                                       LiftoffRegister src) {
4252   I16x8UConvertI8x16High(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4253 }
4254 
emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)4255 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
4256                                                      LiftoffRegister src) {
4257   Pmovsxwd(dst.fp(), src.fp());
4258 }
4259 
emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)4260 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
4261                                                       LiftoffRegister src) {
4262   I32x4SConvertI16x8High(dst.fp(), src.fp());
4263 }
4264 
emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)4265 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
4266                                                      LiftoffRegister src) {
4267   Pmovzxwd(dst.fp(), src.fp());
4268 }
4269 
emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)4270 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
4271                                                       LiftoffRegister src) {
4272   I32x4UConvertI16x8High(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4273 }
4274 
emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,LiftoffRegister src)4275 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
4276                                                          LiftoffRegister src) {
4277   Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4278   I32x4TruncSatF64x2SZero(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, tmp);
4279 }
4280 
emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,LiftoffRegister src)4281 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
4282                                                          LiftoffRegister src) {
4283   Register tmp = GetUnusedRegister(kGpReg, {}).gp();
4284   I32x4TruncSatF64x2UZero(dst.fp(), src.fp(), liftoff::kScratchDoubleReg, tmp);
4285 }
4286 
emit_s128_and_not(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4287 void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
4288                                          LiftoffRegister lhs,
4289                                          LiftoffRegister rhs) {
4290   liftoff::EmitSimdNonCommutativeBinOp<&Assembler::vandnps, &Assembler::andnps>(
4291       this, dst, rhs, lhs);
4292 }
4293 
emit_i8x16_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4294 void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
4295                                                      LiftoffRegister lhs,
4296                                                      LiftoffRegister rhs) {
4297   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgb, &Assembler::pavgb>(
4298       this, dst, lhs, rhs);
4299 }
4300 
emit_i16x8_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4301 void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
4302                                                      LiftoffRegister lhs,
4303                                                      LiftoffRegister rhs) {
4304   liftoff::EmitSimdCommutativeBinOp<&Assembler::vpavgw, &Assembler::pavgw>(
4305       this, dst, lhs, rhs);
4306 }
4307 
emit_i8x16_abs(LiftoffRegister dst,LiftoffRegister src)4308 void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
4309                                       LiftoffRegister src) {
4310   Pabsb(dst.fp(), src.fp());
4311 }
4312 
emit_i16x8_abs(LiftoffRegister dst,LiftoffRegister src)4313 void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
4314                                       LiftoffRegister src) {
4315   Pabsw(dst.fp(), src.fp());
4316 }
4317 
emit_i32x4_abs(LiftoffRegister dst,LiftoffRegister src)4318 void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
4319                                       LiftoffRegister src) {
4320   Pabsd(dst.fp(), src.fp());
4321 }
4322 
emit_i64x2_abs(LiftoffRegister dst,LiftoffRegister src)4323 void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
4324                                       LiftoffRegister src) {
4325   I64x2Abs(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
4326 }
4327 
emit_i8x16_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)4328 void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
4329                                                  LiftoffRegister lhs,
4330                                                  uint8_t imm_lane_idx) {
4331   Register byte_reg = liftoff::GetTmpByteRegister(this, dst.gp());
4332   Pextrb(byte_reg, lhs.fp(), imm_lane_idx);
4333   movsx_b(dst.gp(), byte_reg);
4334 }
4335 
emit_i8x16_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)4336 void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
4337                                                  LiftoffRegister lhs,
4338                                                  uint8_t imm_lane_idx) {
4339   Pextrb(dst.gp(), lhs.fp(), imm_lane_idx);
4340 }
4341 
emit_i16x8_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)4342 void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
4343                                                  LiftoffRegister lhs,
4344                                                  uint8_t imm_lane_idx) {
4345   Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
4346   movsx_w(dst.gp(), dst.gp());
4347 }
4348 
emit_i16x8_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)4349 void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
4350                                                  LiftoffRegister lhs,
4351                                                  uint8_t imm_lane_idx) {
4352   Pextrw(dst.gp(), lhs.fp(), imm_lane_idx);
4353 }
4354 
emit_i32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)4355 void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
4356                                                LiftoffRegister lhs,
4357                                                uint8_t imm_lane_idx) {
4358   Pextrd(dst.gp(), lhs.fp(), imm_lane_idx);
4359 }
4360 
emit_i64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)4361 void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
4362                                                LiftoffRegister lhs,
4363                                                uint8_t imm_lane_idx) {
4364   Pextrd(dst.low_gp(), lhs.fp(), imm_lane_idx * 2);
4365   Pextrd(dst.high_gp(), lhs.fp(), imm_lane_idx * 2 + 1);
4366 }
4367 
emit_f32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)4368 void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
4369                                                LiftoffRegister lhs,
4370                                                uint8_t imm_lane_idx) {
4371   F32x4ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
4372 }
4373 
emit_f64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)4374 void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
4375                                                LiftoffRegister lhs,
4376                                                uint8_t imm_lane_idx) {
4377   F64x2ExtractLane(dst.fp(), lhs.fp(), imm_lane_idx);
4378 }
4379 
emit_i8x16_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)4380 void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
4381                                                LiftoffRegister src1,
4382                                                LiftoffRegister src2,
4383                                                uint8_t imm_lane_idx) {
4384   if (CpuFeatures::IsSupported(AVX)) {
4385     CpuFeatureScope scope(this, AVX);
4386     vpinsrb(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
4387   } else {
4388     CpuFeatureScope scope(this, SSE4_1);
4389     if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4390     pinsrb(dst.fp(), src2.gp(), imm_lane_idx);
4391   }
4392 }
4393 
emit_i16x8_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)4394 void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
4395                                                LiftoffRegister src1,
4396                                                LiftoffRegister src2,
4397                                                uint8_t imm_lane_idx) {
4398   if (CpuFeatures::IsSupported(AVX)) {
4399     CpuFeatureScope scope(this, AVX);
4400     vpinsrw(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
4401   } else {
4402     if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4403     pinsrw(dst.fp(), src2.gp(), imm_lane_idx);
4404   }
4405 }
4406 
emit_i32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)4407 void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
4408                                                LiftoffRegister src1,
4409                                                LiftoffRegister src2,
4410                                                uint8_t imm_lane_idx) {
4411   if (CpuFeatures::IsSupported(AVX)) {
4412     CpuFeatureScope scope(this, AVX);
4413     vpinsrd(dst.fp(), src1.fp(), src2.gp(), imm_lane_idx);
4414   } else {
4415     CpuFeatureScope scope(this, SSE4_1);
4416     if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4417     pinsrd(dst.fp(), src2.gp(), imm_lane_idx);
4418   }
4419 }
4420 
emit_i64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)4421 void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
4422                                                LiftoffRegister src1,
4423                                                LiftoffRegister src2,
4424                                                uint8_t imm_lane_idx) {
4425   if (CpuFeatures::IsSupported(AVX)) {
4426     CpuFeatureScope scope(this, AVX);
4427     vpinsrd(dst.fp(), src1.fp(), src2.low_gp(), imm_lane_idx * 2);
4428     vpinsrd(dst.fp(), dst.fp(), src2.high_gp(), imm_lane_idx * 2 + 1);
4429   } else {
4430     CpuFeatureScope scope(this, SSE4_1);
4431     if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4432     pinsrd(dst.fp(), src2.low_gp(), imm_lane_idx * 2);
4433     pinsrd(dst.fp(), src2.high_gp(), imm_lane_idx * 2 + 1);
4434   }
4435 }
4436 
emit_f32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)4437 void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
4438                                                LiftoffRegister src1,
4439                                                LiftoffRegister src2,
4440                                                uint8_t imm_lane_idx) {
4441   if (CpuFeatures::IsSupported(AVX)) {
4442     CpuFeatureScope scope(this, AVX);
4443     vinsertps(dst.fp(), src1.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
4444   } else {
4445     CpuFeatureScope scope(this, SSE4_1);
4446     if (dst.fp() != src1.fp()) movaps(dst.fp(), src1.fp());
4447     insertps(dst.fp(), src2.fp(), (imm_lane_idx << 4) & 0x30);
4448   }
4449 }
4450 
emit_f64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)4451 void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
4452                                                LiftoffRegister src1,
4453                                                LiftoffRegister src2,
4454                                                uint8_t imm_lane_idx) {
4455   F64x2ReplaceLane(dst.fp(), src1.fp(), src2.fp(), imm_lane_idx);
4456 }
4457 
StackCheck(Label * ool_code,Register limit_address)4458 void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
4459   cmp(esp, Operand(limit_address, 0));
4460   j(below_equal, ool_code);
4461 }
4462 
CallTrapCallbackForTesting()4463 void LiftoffAssembler::CallTrapCallbackForTesting() {
4464   PrepareCallCFunction(0, GetUnusedRegister(kGpReg, {}).gp());
4465   CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
4466 }
4467 
AssertUnreachable(AbortReason reason)4468 void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
4469   TurboAssembler::AssertUnreachable(reason);
4470 }
4471 
PushRegisters(LiftoffRegList regs)4472 void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
4473   LiftoffRegList gp_regs = regs & kGpCacheRegList;
4474   while (!gp_regs.is_empty()) {
4475     LiftoffRegister reg = gp_regs.GetFirstRegSet();
4476     push(reg.gp());
4477     gp_regs.clear(reg);
4478   }
4479   LiftoffRegList fp_regs = regs & kFpCacheRegList;
4480   unsigned num_fp_regs = fp_regs.GetNumRegsSet();
4481   if (num_fp_regs) {
4482     AllocateStackSpace(num_fp_regs * kSimd128Size);
4483     unsigned offset = 0;
4484     while (!fp_regs.is_empty()) {
4485       LiftoffRegister reg = fp_regs.GetFirstRegSet();
4486       Movdqu(Operand(esp, offset), reg.fp());
4487       fp_regs.clear(reg);
4488       offset += kSimd128Size;
4489     }
4490     DCHECK_EQ(offset, num_fp_regs * kSimd128Size);
4491   }
4492 }
4493 
PopRegisters(LiftoffRegList regs)4494 void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
4495   LiftoffRegList fp_regs = regs & kFpCacheRegList;
4496   unsigned fp_offset = 0;
4497   while (!fp_regs.is_empty()) {
4498     LiftoffRegister reg = fp_regs.GetFirstRegSet();
4499     Movdqu(reg.fp(), Operand(esp, fp_offset));
4500     fp_regs.clear(reg);
4501     fp_offset += kSimd128Size;
4502   }
4503   if (fp_offset) add(esp, Immediate(fp_offset));
4504   LiftoffRegList gp_regs = regs & kGpCacheRegList;
4505   while (!gp_regs.is_empty()) {
4506     LiftoffRegister reg = gp_regs.GetLastRegSet();
4507     pop(reg.gp());
4508     gp_regs.clear(reg);
4509   }
4510 }
4511 
RecordSpillsInSafepoint(SafepointTableBuilder::Safepoint & safepoint,LiftoffRegList all_spills,LiftoffRegList ref_spills,int spill_offset)4512 void LiftoffAssembler::RecordSpillsInSafepoint(
4513     SafepointTableBuilder::Safepoint& safepoint, LiftoffRegList all_spills,
4514     LiftoffRegList ref_spills, int spill_offset) {
4515   int spill_space_size = 0;
4516   while (!all_spills.is_empty()) {
4517     LiftoffRegister reg = all_spills.GetFirstRegSet();
4518     if (ref_spills.has(reg)) {
4519       safepoint.DefineTaggedStackSlot(spill_offset);
4520     }
4521     all_spills.clear(reg);
4522     ++spill_offset;
4523     spill_space_size += kSystemPointerSize;
4524   }
4525   // Record the number of additional spill slots.
4526   RecordOolSpillSpaceSize(spill_space_size);
4527 }
4528 
DropStackSlotsAndRet(uint32_t num_stack_slots)4529 void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
4530   DCHECK_LT(num_stack_slots,
4531             (1 << 16) / kSystemPointerSize);  // 16 bit immediate
4532   ret(static_cast<int>(num_stack_slots * kSystemPointerSize));
4533 }
4534 
CallC(const ValueKindSig * sig,const LiftoffRegister * args,const LiftoffRegister * rets,ValueKind out_argument_kind,int stack_bytes,ExternalReference ext_ref)4535 void LiftoffAssembler::CallC(const ValueKindSig* sig,
4536                              const LiftoffRegister* args,
4537                              const LiftoffRegister* rets,
4538                              ValueKind out_argument_kind, int stack_bytes,
4539                              ExternalReference ext_ref) {
4540   AllocateStackSpace(stack_bytes);
4541 
4542   int arg_bytes = 0;
4543   for (ValueKind param_kind : sig->parameters()) {
4544     liftoff::Store(this, esp, arg_bytes, *args++, param_kind);
4545     arg_bytes += value_kind_size(param_kind);
4546   }
4547   DCHECK_LE(arg_bytes, stack_bytes);
4548 
4549   constexpr Register kScratch = eax;
4550   constexpr Register kArgumentBuffer = ecx;
4551   constexpr int kNumCCallArgs = 1;
4552   mov(kArgumentBuffer, esp);
4553   PrepareCallCFunction(kNumCCallArgs, kScratch);
4554 
4555   // Pass a pointer to the buffer with the arguments to the C function. ia32
4556   // does not use registers here, so push to the stack.
4557   mov(Operand(esp, 0), kArgumentBuffer);
4558 
4559   // Now call the C function.
4560   CallCFunction(ext_ref, kNumCCallArgs);
4561 
4562   // Move return value to the right register.
4563   const LiftoffRegister* next_result_reg = rets;
4564   if (sig->return_count() > 0) {
4565     DCHECK_EQ(1, sig->return_count());
4566     constexpr Register kReturnReg = eax;
4567     if (kReturnReg != next_result_reg->gp()) {
4568       Move(*next_result_reg, LiftoffRegister(kReturnReg), sig->GetReturn(0));
4569     }
4570     ++next_result_reg;
4571   }
4572 
4573   // Load potential output value from the buffer on the stack.
4574   if (out_argument_kind != kVoid) {
4575     liftoff::Load(this, *next_result_reg, esp, 0, out_argument_kind);
4576   }
4577 
4578   add(esp, Immediate(stack_bytes));
4579 }
4580 
CallNativeWasmCode(Address addr)4581 void LiftoffAssembler::CallNativeWasmCode(Address addr) {
4582   wasm_call(addr, RelocInfo::WASM_CALL);
4583 }
4584 
TailCallNativeWasmCode(Address addr)4585 void LiftoffAssembler::TailCallNativeWasmCode(Address addr) {
4586   jmp(addr, RelocInfo::WASM_CALL);
4587 }
4588 
CallIndirect(const ValueKindSig * sig,compiler::CallDescriptor * call_descriptor,Register target)4589 void LiftoffAssembler::CallIndirect(const ValueKindSig* sig,
4590                                     compiler::CallDescriptor* call_descriptor,
4591                                     Register target) {
4592   // Since we have more cache registers than parameter registers, the
4593   // {LiftoffCompiler} should always be able to place {target} in a register.
4594   DCHECK(target.is_valid());
4595   call(target);
4596 }
4597 
TailCallIndirect(Register target)4598 void LiftoffAssembler::TailCallIndirect(Register target) {
4599   // Since we have more cache registers than parameter registers, the
4600   // {LiftoffCompiler} should always be able to place {target} in a register.
4601   DCHECK(target.is_valid());
4602   jmp(target);
4603 }
4604 
CallRuntimeStub(WasmCode::RuntimeStubId sid)4605 void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) {
4606   // A direct call to a wasm runtime stub defined in this module.
4607   // Just encode the stub index. This will be patched at relocation.
4608   wasm_call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL);
4609 }
4610 
AllocateStackSlot(Register addr,uint32_t size)4611 void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
4612   AllocateStackSpace(size);
4613   mov(addr, esp);
4614 }
4615 
DeallocateStackSlot(uint32_t size)4616 void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
4617   add(esp, Immediate(size));
4618 }
4619 
MaybeOSR()4620 void LiftoffAssembler::MaybeOSR() {}
4621 
emit_set_if_nan(Register dst,DoubleRegister src,ValueKind kind)4622 void LiftoffAssembler::emit_set_if_nan(Register dst, DoubleRegister src,
4623                                        ValueKind kind) {
4624   if (kind == kF32) {
4625     ucomiss(src, src);
4626   } else {
4627     DCHECK_EQ(kind, kF64);
4628     ucomisd(src, src);
4629   }
4630   Label ret;
4631   j(parity_odd, &ret);
4632   mov(Operand(dst, 0), Immediate(1));
4633   bind(&ret);
4634 }
4635 
emit_s128_set_if_nan(Register dst,LiftoffRegister src,Register tmp_gp,LiftoffRegister tmp_s128,ValueKind lane_kind)4636 void LiftoffAssembler::emit_s128_set_if_nan(Register dst, LiftoffRegister src,
4637                                             Register tmp_gp,
4638                                             LiftoffRegister tmp_s128,
4639                                             ValueKind lane_kind) {
4640   if (lane_kind == kF32) {
4641     movaps(tmp_s128.fp(), src.fp());
4642     cmpunordps(tmp_s128.fp(), tmp_s128.fp());
4643   } else {
4644     DCHECK_EQ(lane_kind, kF64);
4645     movapd(tmp_s128.fp(), src.fp());
4646     cmpunordpd(tmp_s128.fp(), tmp_s128.fp());
4647   }
4648   pmovmskb(tmp_gp, tmp_s128.fp());
4649   or_(Operand(dst, 0), tmp_gp);
4650 }
4651 
Construct(int param_slots)4652 void LiftoffStackSlots::Construct(int param_slots) {
4653   DCHECK_LT(0, slots_.size());
4654   SortInPushOrder();
4655   int last_stack_slot = param_slots;
4656   for (auto& slot : slots_) {
4657     const int stack_slot = slot.dst_slot_;
4658     int stack_decrement = (last_stack_slot - stack_slot) * kSystemPointerSize;
4659     DCHECK_LT(0, stack_decrement);
4660     last_stack_slot = stack_slot;
4661     const LiftoffAssembler::VarState& src = slot.src_;
4662     switch (src.loc()) {
4663       case LiftoffAssembler::VarState::kStack:
4664         // The combination of AllocateStackSpace and 2 movdqu is usually smaller
4665         // in code size than doing 4 pushes.
4666         if (src.kind() == kS128) {
4667           asm_->AllocateStackSpace(stack_decrement);
4668           asm_->movdqu(liftoff::kScratchDoubleReg,
4669                        liftoff::GetStackSlot(slot.src_offset_));
4670           asm_->movdqu(Operand(esp, 0), liftoff::kScratchDoubleReg);
4671           break;
4672         }
4673         if (src.kind() == kF64) {
4674           asm_->AllocateStackSpace(stack_decrement - kDoubleSize);
4675           DCHECK_EQ(kLowWord, slot.half_);
4676           asm_->push(liftoff::GetHalfStackSlot(slot.src_offset_, kHighWord));
4677           stack_decrement = kSystemPointerSize;
4678         }
4679         asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4680         asm_->push(liftoff::GetHalfStackSlot(slot.src_offset_, slot.half_));
4681         break;
4682       case LiftoffAssembler::VarState::kRegister:
4683         if (src.kind() == kI64) {
4684           liftoff::push(
4685               asm_, slot.half_ == kLowWord ? src.reg().low() : src.reg().high(),
4686               kI32, stack_decrement - kSystemPointerSize);
4687         } else {
4688           int pushed_bytes = SlotSizeInBytes(slot);
4689           liftoff::push(asm_, src.reg(), src.kind(),
4690                         stack_decrement - pushed_bytes);
4691         }
4692         break;
4693       case LiftoffAssembler::VarState::kIntConst:
4694         asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4695         // The high word is the sign extension of the low word.
4696         asm_->push(Immediate(slot.half_ == kLowWord ? src.i32_const()
4697                                                     : src.i32_const() >> 31));
4698         break;
4699     }
4700   }
4701 }
4702 
4703 #undef RETURN_FALSE_IF_MISSING_CPU_FEATURE
4704 
4705 }  // namespace wasm
4706 }  // namespace internal
4707 }  // namespace v8
4708 
4709 #endif  // V8_WASM_BASELINE_IA32_LIFTOFF_ASSEMBLER_IA32_H_
4710