• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_
6 #define V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_
7 
8 #include "src/base/platform/wrappers.h"
9 #include "src/codegen/arm/register-arm.h"
10 #include "src/heap/memory-chunk.h"
11 #include "src/wasm/baseline/liftoff-assembler.h"
12 #include "src/wasm/baseline/liftoff-register.h"
13 #include "src/wasm/wasm-objects.h"
14 
15 namespace v8 {
16 namespace internal {
17 namespace wasm {
18 
19 namespace liftoff {
20 
ToCondition(LiftoffCondition liftoff_cond)21 inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
22   switch (liftoff_cond) {
23     case kEqual:
24       return eq;
25     case kUnequal:
26       return ne;
27     case kSignedLessThan:
28       return lt;
29     case kSignedLessEqual:
30       return le;
31     case kSignedGreaterThan:
32       return gt;
33     case kSignedGreaterEqual:
34       return ge;
35     case kUnsignedLessThan:
36       return lo;
37     case kUnsignedLessEqual:
38       return ls;
39     case kUnsignedGreaterThan:
40       return hi;
41     case kUnsignedGreaterEqual:
42       return hs;
43   }
44 }
45 
46 //  half
47 //  slot        Frame
48 //  -----+--------------------+---------------------------
49 //  n+3  |   parameter n      |
50 //  ...  |       ...          |
51 //   4   |   parameter 1      | or parameter 2
52 //   3   |   parameter 0      | or parameter 1
53 //   2   |  (result address)  | or parameter 0
54 //  -----+--------------------+---------------------------
55 //   1   | return addr (lr)   |
56 //   0   | previous frame (fp)|
57 //  -----+--------------------+  <-- frame ptr (fp)
58 //  -1   | StackFrame::WASM   |
59 //  -2   |    instance        |
60 //  -3   |    feedback vector |
61 //  -4   |    tiering budget  |
62 //  -----+--------------------+---------------------------
63 //  -5   |    slot 0 (high)   |   ^
64 //  -6   |    slot 0 (low)    |   |
65 //  -7   |    slot 1 (high)   | Frame slots
66 //  -8   |    slot 1 (low)    |   |
67 //       |                    |   v
68 //  -----+--------------------+  <-- stack ptr (sp)
69 //
70 static_assert(2 * kSystemPointerSize == LiftoffAssembler::kStackSlotSize,
71               "Slot size should be twice the size of the 32 bit pointer.");
72 constexpr int kInstanceOffset = 2 * kSystemPointerSize;
73 constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
74 constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
75 // kPatchInstructionsRequired sets a maximum limit of how many instructions that
76 // PatchPrepareStackFrame will use in order to increase the stack appropriately.
77 // Three instructions are required to sub a large constant, movw + movt + sub.
78 constexpr int32_t kPatchInstructionsRequired = 3;
79 constexpr int kHalfStackSlotSize = LiftoffAssembler::kStackSlotSize >> 1;
80 
GetStackSlot(int offset)81 inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
82 
GetHalfStackSlot(int offset,RegPairHalf half)83 inline MemOperand GetHalfStackSlot(int offset, RegPairHalf half) {
84   int32_t half_offset =
85       half == kLowWord ? 0 : LiftoffAssembler::kStackSlotSize / 2;
86   return MemOperand(offset > 0 ? fp : sp, -offset + half_offset);
87 }
88 
GetInstanceOperand()89 inline MemOperand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
90 
GetMemOp(LiftoffAssembler * assm,UseScratchRegisterScope * temps,Register addr,Register offset,int32_t offset_imm)91 inline MemOperand GetMemOp(LiftoffAssembler* assm,
92                            UseScratchRegisterScope* temps, Register addr,
93                            Register offset, int32_t offset_imm) {
94   if (offset != no_reg) {
95     if (offset_imm == 0) return MemOperand(addr, offset);
96     Register tmp = temps->Acquire();
97     assm->add(tmp, offset, Operand(offset_imm));
98     return MemOperand(addr, tmp);
99   }
100   return MemOperand(addr, offset_imm);
101 }
102 
103 inline Register CalculateActualAddress(LiftoffAssembler* assm,
104                                        UseScratchRegisterScope* temps,
105                                        Register addr_reg, Register offset_reg,
106                                        uintptr_t offset_imm,
107                                        Register result_reg = no_reg) {
108   if (offset_reg == no_reg && offset_imm == 0) {
109     if (result_reg == no_reg) {
110       return addr_reg;
111     } else {
112       assm->mov(result_reg, addr_reg);
113       return result_reg;
114     }
115   }
116   Register actual_addr_reg =
117       result_reg != no_reg ? result_reg : temps->Acquire();
118   if (offset_reg == no_reg) {
119     assm->add(actual_addr_reg, addr_reg, Operand(offset_imm));
120   } else {
121     assm->add(actual_addr_reg, addr_reg, Operand(offset_reg));
122     if (offset_imm != 0) {
123       assm->add(actual_addr_reg, actual_addr_reg, Operand(offset_imm));
124     }
125   }
126   return actual_addr_reg;
127 }
128 
MakeUnsigned(LiftoffCondition cond)129 inline LiftoffCondition MakeUnsigned(LiftoffCondition cond) {
130   switch (cond) {
131     case kSignedLessThan:
132       return kUnsignedLessThan;
133     case kSignedLessEqual:
134       return kUnsignedLessEqual;
135     case kSignedGreaterThan:
136       return kUnsignedGreaterThan;
137     case kSignedGreaterEqual:
138       return kUnsignedGreaterEqual;
139     case kEqual:
140     case kUnequal:
141     case kUnsignedLessThan:
142     case kUnsignedLessEqual:
143     case kUnsignedGreaterThan:
144     case kUnsignedGreaterEqual:
145       return cond;
146     default:
147       UNREACHABLE();
148   }
149 }
150 
151 template <void (Assembler::*op)(Register, Register, Register, SBit, Condition),
152           void (Assembler::*op_with_carry)(Register, Register, const Operand&,
153                                            SBit, Condition)>
I64Binop(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)154 inline void I64Binop(LiftoffAssembler* assm, LiftoffRegister dst,
155                      LiftoffRegister lhs, LiftoffRegister rhs) {
156   Register dst_low = dst.low_gp();
157   if (dst_low == lhs.high_gp() || dst_low == rhs.high_gp()) {
158     dst_low =
159         assm->GetUnusedRegister(kGpReg, LiftoffRegList{lhs, rhs, dst.high_gp()})
160             .gp();
161   }
162   (assm->*op)(dst_low, lhs.low_gp(), rhs.low_gp(), SetCC, al);
163   (assm->*op_with_carry)(dst.high_gp(), lhs.high_gp(), Operand(rhs.high_gp()),
164                          LeaveCC, al);
165   if (dst_low != dst.low_gp()) assm->mov(dst.low_gp(), dst_low);
166 }
167 
168 template <void (Assembler::*op)(Register, Register, const Operand&, SBit,
169                                 Condition),
170           void (Assembler::*op_with_carry)(Register, Register, const Operand&,
171                                            SBit, Condition)>
I64BinopI(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister lhs,int64_t imm)172 inline void I64BinopI(LiftoffAssembler* assm, LiftoffRegister dst,
173                       LiftoffRegister lhs, int64_t imm) {
174   // The compiler allocated registers such that either {dst == lhs} or there is
175   // no overlap between the two.
176   DCHECK_NE(dst.low_gp(), lhs.high_gp());
177   int32_t imm_low_word = static_cast<int32_t>(imm);
178   int32_t imm_high_word = static_cast<int32_t>(imm >> 32);
179   (assm->*op)(dst.low_gp(), lhs.low_gp(), Operand(imm_low_word), SetCC, al);
180   (assm->*op_with_carry)(dst.high_gp(), lhs.high_gp(), Operand(imm_high_word),
181                          LeaveCC, al);
182 }
183 
184 template <void (TurboAssembler::*op)(Register, Register, Register, Register,
185                                      Register),
186           bool is_left_shift>
I64Shiftop(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister src,Register amount)187 inline void I64Shiftop(LiftoffAssembler* assm, LiftoffRegister dst,
188                        LiftoffRegister src, Register amount) {
189   Register src_low = src.low_gp();
190   Register src_high = src.high_gp();
191   Register dst_low = dst.low_gp();
192   Register dst_high = dst.high_gp();
193   // Left shift writes {dst_high} then {dst_low}, right shifts write {dst_low}
194   // then {dst_high}.
195   Register clobbered_dst_reg = is_left_shift ? dst_high : dst_low;
196   LiftoffRegList pinned = {clobbered_dst_reg, src};
197   Register amount_capped =
198       pinned.set(assm->GetUnusedRegister(kGpReg, pinned)).gp();
199   assm->and_(amount_capped, amount, Operand(0x3F));
200 
201   // Ensure that writing the first half of {dst} does not overwrite the still
202   // needed half of {src}.
203   Register* later_src_reg = is_left_shift ? &src_low : &src_high;
204   if (*later_src_reg == clobbered_dst_reg) {
205     *later_src_reg = assm->GetUnusedRegister(kGpReg, pinned).gp();
206     assm->TurboAssembler::Move(*later_src_reg, clobbered_dst_reg);
207   }
208 
209   (assm->*op)(dst_low, dst_high, src_low, src_high, amount_capped);
210 }
211 
GetFloatRegister(DoubleRegister reg)212 inline FloatRegister GetFloatRegister(DoubleRegister reg) {
213   DCHECK_LT(reg.code(), kDoubleCode_d16);
214   return LowDwVfpRegister::from_code(reg.code()).low();
215 }
216 
GetSimd128Register(DoubleRegister reg)217 inline Simd128Register GetSimd128Register(DoubleRegister reg) {
218   return QwNeonRegister::from_code(reg.code() / 2);
219 }
220 
GetSimd128Register(LiftoffRegister reg)221 inline Simd128Register GetSimd128Register(LiftoffRegister reg) {
222   return liftoff::GetSimd128Register(reg.low_fp());
223 }
224 
225 enum class MinOrMax : uint8_t { kMin, kMax };
226 template <typename RegisterType>
EmitFloatMinOrMax(LiftoffAssembler * assm,RegisterType dst,RegisterType lhs,RegisterType rhs,MinOrMax min_or_max)227 inline void EmitFloatMinOrMax(LiftoffAssembler* assm, RegisterType dst,
228                               RegisterType lhs, RegisterType rhs,
229                               MinOrMax min_or_max) {
230   DCHECK(RegisterType::kSizeInBytes == 4 || RegisterType::kSizeInBytes == 8);
231   if (lhs == rhs) {
232     assm->TurboAssembler::Move(dst, lhs);
233     return;
234   }
235   Label done, is_nan;
236   if (min_or_max == MinOrMax::kMin) {
237     assm->TurboAssembler::FloatMin(dst, lhs, rhs, &is_nan);
238   } else {
239     assm->TurboAssembler::FloatMax(dst, lhs, rhs, &is_nan);
240   }
241   assm->b(&done);
242   assm->bind(&is_nan);
243   // Create a NaN output.
244   assm->vadd(dst, lhs, rhs);
245   assm->bind(&done);
246 }
247 
EnsureNoAlias(Assembler * assm,Register reg,Register must_not_alias,UseScratchRegisterScope * temps)248 inline Register EnsureNoAlias(Assembler* assm, Register reg,
249                               Register must_not_alias,
250                               UseScratchRegisterScope* temps) {
251   if (reg != must_not_alias) return reg;
252   Register tmp = temps->Acquire();
253   DCHECK_NE(reg, tmp);
254   assm->mov(tmp, reg);
255   return tmp;
256 }
257 
S128NarrowOp(LiftoffAssembler * assm,NeonDataType dt,NeonDataType sdt,LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)258 inline void S128NarrowOp(LiftoffAssembler* assm, NeonDataType dt,
259                          NeonDataType sdt, LiftoffRegister dst,
260                          LiftoffRegister lhs, LiftoffRegister rhs) {
261   if (dst == lhs) {
262     assm->vqmovn(dt, sdt, dst.low_fp(), liftoff::GetSimd128Register(lhs));
263     assm->vqmovn(dt, sdt, dst.high_fp(), liftoff::GetSimd128Register(rhs));
264   } else {
265     assm->vqmovn(dt, sdt, dst.high_fp(), liftoff::GetSimd128Register(rhs));
266     assm->vqmovn(dt, sdt, dst.low_fp(), liftoff::GetSimd128Register(lhs));
267   }
268 }
269 
F64x2Compare(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Condition cond)270 inline void F64x2Compare(LiftoffAssembler* assm, LiftoffRegister dst,
271                          LiftoffRegister lhs, LiftoffRegister rhs,
272                          Condition cond) {
273   DCHECK(cond == eq || cond == ne || cond == lt || cond == le);
274 
275   QwNeonRegister dest = liftoff::GetSimd128Register(dst);
276   QwNeonRegister left = liftoff::GetSimd128Register(lhs);
277   QwNeonRegister right = liftoff::GetSimd128Register(rhs);
278   UseScratchRegisterScope temps(assm);
279   Register scratch = temps.Acquire();
280 
281   assm->mov(scratch, Operand(0));
282   assm->VFPCompareAndSetFlags(left.low(), right.low());
283   assm->mov(scratch, Operand(-1), LeaveCC, cond);
284   if (cond == lt || cond == le) {
285     // Check for NaN.
286     assm->mov(scratch, Operand(0), LeaveCC, vs);
287   }
288   assm->vmov(dest.low(), scratch, scratch);
289 
290   assm->mov(scratch, Operand(0));
291   assm->VFPCompareAndSetFlags(left.high(), right.high());
292   assm->mov(scratch, Operand(-1), LeaveCC, cond);
293   if (cond == lt || cond == le) {
294     // Check for NaN.
295     assm->mov(scratch, Operand(0), LeaveCC, vs);
296   }
297   assm->vmov(dest.high(), scratch, scratch);
298 }
299 
Store(LiftoffAssembler * assm,LiftoffRegister src,MemOperand dst,ValueKind kind)300 inline void Store(LiftoffAssembler* assm, LiftoffRegister src, MemOperand dst,
301                   ValueKind kind) {
302 #ifdef DEBUG
303   // The {str} instruction needs a temp register when the immediate in the
304   // provided MemOperand does not fit into 12 bits. This happens for large stack
305   // frames. This DCHECK checks that the temp register is available when needed.
306   DCHECK(UseScratchRegisterScope{assm}.CanAcquire());
307 #endif
308   switch (kind) {
309     case kI32:
310     case kOptRef:
311     case kRef:
312     case kRtt:
313       assm->str(src.gp(), dst);
314       break;
315     case kI64:
316       // Positive offsets should be lowered to kI32.
317       assm->str(src.low_gp(), MemOperand(dst.rn(), dst.offset()));
318       assm->str(
319           src.high_gp(),
320           MemOperand(dst.rn(), dst.offset() + liftoff::kHalfStackSlotSize));
321       break;
322     case kF32:
323       assm->vstr(liftoff::GetFloatRegister(src.fp()), dst);
324       break;
325     case kF64:
326       assm->vstr(src.fp(), dst);
327       break;
328     case kS128: {
329       UseScratchRegisterScope temps(assm);
330       Register addr = liftoff::CalculateActualAddress(assm, &temps, dst.rn(),
331                                                       no_reg, dst.offset());
332       assm->vst1(Neon8, NeonListOperand(src.low_fp(), 2), NeonMemOperand(addr));
333       break;
334     }
335     default:
336       UNREACHABLE();
337   }
338 }
339 
Load(LiftoffAssembler * assm,LiftoffRegister dst,MemOperand src,ValueKind kind)340 inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, MemOperand src,
341                  ValueKind kind) {
342   switch (kind) {
343     case kI32:
344     case kOptRef:
345     case kRef:
346     case kRtt:
347       assm->ldr(dst.gp(), src);
348       break;
349     case kI64:
350       assm->ldr(dst.low_gp(), MemOperand(src.rn(), src.offset()));
351       assm->ldr(
352           dst.high_gp(),
353           MemOperand(src.rn(), src.offset() + liftoff::kHalfStackSlotSize));
354       break;
355     case kF32:
356       assm->vldr(liftoff::GetFloatRegister(dst.fp()), src);
357       break;
358     case kF64:
359       assm->vldr(dst.fp(), src);
360       break;
361     case kS128: {
362       // Get memory address of slot to fill from.
363       UseScratchRegisterScope temps(assm);
364       Register addr = liftoff::CalculateActualAddress(assm, &temps, src.rn(),
365                                                       no_reg, src.offset());
366       assm->vld1(Neon8, NeonListOperand(dst.low_fp(), 2), NeonMemOperand(addr));
367       break;
368     }
369     default:
370       UNREACHABLE();
371   }
372 }
373 
MaskFromNeonDataType(NeonDataType dt)374 constexpr int MaskFromNeonDataType(NeonDataType dt) {
375   switch (dt) {
376     case NeonS8:
377     case NeonU8:
378       return 7;
379     case NeonS16:
380     case NeonU16:
381       return 15;
382     case NeonS32:
383     case NeonU32:
384       return 31;
385     case NeonS64:
386     case NeonU64:
387       return 63;
388   }
389 }
390 
391 enum ShiftDirection { kLeft, kRight };
392 
393 template <ShiftDirection dir = kLeft, NeonDataType dt, NeonSize sz>
EmitSimdShift(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)394 inline void EmitSimdShift(LiftoffAssembler* assm, LiftoffRegister dst,
395                           LiftoffRegister lhs, LiftoffRegister rhs) {
396   constexpr int mask = MaskFromNeonDataType(dt);
397   UseScratchRegisterScope temps(assm);
398   QwNeonRegister tmp = temps.AcquireQ();
399   Register shift = temps.Acquire();
400   assm->and_(shift, rhs.gp(), Operand(mask));
401   assm->vdup(sz, tmp, shift);
402   if (dir == kRight) {
403     assm->vneg(sz, tmp, tmp);
404   }
405   assm->vshl(dt, liftoff::GetSimd128Register(dst),
406              liftoff::GetSimd128Register(lhs), tmp);
407 }
408 
409 template <ShiftDirection dir, NeonDataType dt>
EmitSimdShiftImmediate(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)410 inline void EmitSimdShiftImmediate(LiftoffAssembler* assm, LiftoffRegister dst,
411                                    LiftoffRegister lhs, int32_t rhs) {
412   // vshr by 0 is not allowed, so check for it, and only move if dst != lhs.
413   int32_t shift = rhs & MaskFromNeonDataType(dt);
414   if (shift) {
415     if (dir == kLeft) {
416       assm->vshl(dt, liftoff::GetSimd128Register(dst),
417                  liftoff::GetSimd128Register(lhs), shift);
418     } else {
419       assm->vshr(dt, liftoff::GetSimd128Register(dst),
420                  liftoff::GetSimd128Register(lhs), shift);
421     }
422   } else if (dst != lhs) {
423     assm->vmov(liftoff::GetSimd128Register(dst),
424                liftoff::GetSimd128Register(lhs));
425   }
426 }
427 
EmitAnyTrue(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister src)428 inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
429                         LiftoffRegister src) {
430   UseScratchRegisterScope temps(assm);
431   DwVfpRegister scratch = temps.AcquireD();
432   assm->vpmax(NeonU32, scratch, src.low_fp(), src.high_fp());
433   assm->vpmax(NeonU32, scratch, scratch, scratch);
434   assm->ExtractLane(dst.gp(), scratch, NeonS32, 0);
435   assm->cmp(dst.gp(), Operand(0));
436   assm->mov(dst.gp(), Operand(1), LeaveCC, ne);
437 }
438 
439 }  // namespace liftoff
440 
PrepareStackFrame()441 int LiftoffAssembler::PrepareStackFrame() {
442   if (!CpuFeatures::IsSupported(ARMv7)) {
443     bailout(kUnsupportedArchitecture, "Liftoff needs ARMv7");
444     return 0;
445   }
446   uint32_t offset = static_cast<uint32_t>(pc_offset());
447   // PatchPrepareStackFrame will patch this in order to increase the stack
448   // appropriately. Additional nops are required as the bytes operand might
449   // require extra moves to encode.
450   for (int i = 0; i < liftoff::kPatchInstructionsRequired; i++) {
451     nop();
452   }
453   DCHECK_EQ(offset + liftoff::kPatchInstructionsRequired * kInstrSize,
454             pc_offset());
455   return offset;
456 }
457 
PrepareTailCall(int num_callee_stack_params,int stack_param_delta)458 void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params,
459                                        int stack_param_delta) {
460   UseScratchRegisterScope temps(this);
461   Register scratch = temps.Acquire();
462 
463   // Push the return address and frame pointer to complete the stack frame.
464   sub(sp, sp, Operand(8));
465   ldr(scratch, MemOperand(fp, 4));
466   str(scratch, MemOperand(sp, 4));
467   ldr(scratch, MemOperand(fp, 0));
468   str(scratch, MemOperand(sp, 0));
469 
470   // Shift the whole frame upwards.
471   int slot_count = num_callee_stack_params + 2;
472   for (int i = slot_count - 1; i >= 0; --i) {
473     ldr(scratch, MemOperand(sp, i * 4));
474     str(scratch, MemOperand(fp, (i - stack_param_delta) * 4));
475   }
476 
477   // Set the new stack and frame pointer.
478   sub(sp, fp, Operand(stack_param_delta * 4));
479   Pop(lr, fp);
480 }
481 
AlignFrameSize()482 void LiftoffAssembler::AlignFrameSize() {}
483 
PatchPrepareStackFrame(int offset,SafepointTableBuilder * safepoint_table_builder)484 void LiftoffAssembler::PatchPrepareStackFrame(
485     int offset, SafepointTableBuilder* safepoint_table_builder) {
486   // The frame_size includes the frame marker and the instance slot. Both are
487   // pushed as part of frame construction, so we don't need to allocate memory
488   // for them anymore.
489   int frame_size = GetTotalFrameSize() - 2 * kSystemPointerSize;
490 
491   PatchingAssembler patching_assembler(AssemblerOptions{},
492                                        buffer_start_ + offset,
493                                        liftoff::kPatchInstructionsRequired);
494   if (V8_LIKELY(frame_size < 4 * KB)) {
495     // This is the standard case for small frames: just subtract from SP and be
496     // done with it.
497     patching_assembler.sub(sp, sp, Operand(frame_size));
498     patching_assembler.PadWithNops();
499     return;
500   }
501 
502   // The frame size is bigger than 4KB, so we might overflow the available stack
503   // space if we first allocate the frame and then do the stack check (we will
504   // need some remaining stack space for throwing the exception). That's why we
505   // check the available stack space before we allocate the frame. To do this we
506   // replace the {__ sub(sp, sp, framesize)} with a jump to OOL code that does
507   // this "extended stack check".
508   //
509   // The OOL code can simply be generated here with the normal assembler,
510   // because all other code generation, including OOL code, has already finished
511   // when {PatchPrepareStackFrame} is called. The function prologue then jumps
512   // to the current {pc_offset()} to execute the OOL code for allocating the
513   // large frame.
514 
515   // Emit the unconditional branch in the function prologue (from {offset} to
516   // {pc_offset()}).
517   patching_assembler.b(pc_offset() - offset - Instruction::kPcLoadDelta);
518   patching_assembler.PadWithNops();
519 
520   // If the frame is bigger than the stack, we throw the stack overflow
521   // exception unconditionally. Thereby we can avoid the integer overflow
522   // check in the condition code.
523   RecordComment("OOL: stack check for large frame");
524   Label continuation;
525   if (frame_size < FLAG_stack_size * 1024) {
526     UseScratchRegisterScope temps(this);
527     Register stack_limit = temps.Acquire();
528     ldr(stack_limit,
529         FieldMemOperand(kWasmInstanceRegister,
530                         WasmInstanceObject::kRealStackLimitAddressOffset));
531     ldr(stack_limit, MemOperand(stack_limit));
532     add(stack_limit, stack_limit, Operand(frame_size));
533     cmp(sp, stack_limit);
534     b(cs /* higher or same */, &continuation);
535   }
536 
537   Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
538   // The call will not return; just define an empty safepoint.
539   safepoint_table_builder->DefineSafepoint(this);
540   if (FLAG_debug_code) stop();
541 
542   bind(&continuation);
543 
544   // Now allocate the stack space. Note that this might do more than just
545   // decrementing the SP; consult {TurboAssembler::AllocateStackSpace}.
546   AllocateStackSpace(frame_size);
547 
548   // Jump back to the start of the function, from {pc_offset()} to
549   // right after the reserved space for the {__ sub(sp, sp, framesize)} (which
550   // is a branch now).
551   int func_start_offset =
552       offset + liftoff::kPatchInstructionsRequired * kInstrSize;
553   b(func_start_offset - pc_offset() - Instruction::kPcLoadDelta);
554 }
555 
FinishCode()556 void LiftoffAssembler::FinishCode() { CheckConstPool(true, false); }
557 
AbortCompilation()558 void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
559 
560 // static
StaticStackFrameSize()561 constexpr int LiftoffAssembler::StaticStackFrameSize() {
562   return liftoff::kTierupBudgetOffset;
563 }
564 
SlotSizeForType(ValueKind kind)565 int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
566   switch (kind) {
567     case kS128:
568       return value_kind_size(kind);
569     default:
570       return kStackSlotSize;
571   }
572 }
573 
NeedsAlignment(ValueKind kind)574 bool LiftoffAssembler::NeedsAlignment(ValueKind kind) {
575   return kind == kS128 || is_reference(kind);
576 }
577 
LoadConstant(LiftoffRegister reg,WasmValue value,RelocInfo::Mode rmode)578 void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
579                                     RelocInfo::Mode rmode) {
580   switch (value.type().kind()) {
581     case kI32:
582       TurboAssembler::Move(reg.gp(), Operand(value.to_i32(), rmode));
583       break;
584     case kI64: {
585       DCHECK(RelocInfo::IsNoInfo(rmode));
586       int32_t low_word = value.to_i64();
587       int32_t high_word = value.to_i64() >> 32;
588       TurboAssembler::Move(reg.low_gp(), Operand(low_word));
589       TurboAssembler::Move(reg.high_gp(), Operand(high_word));
590       break;
591     }
592     case kF32:
593       vmov(liftoff::GetFloatRegister(reg.fp()), value.to_f32_boxed());
594       break;
595     case kF64: {
596       Register extra_scratch = GetUnusedRegister(kGpReg, {}).gp();
597       vmov(reg.fp(), base::Double(value.to_f64_boxed().get_bits()),
598            extra_scratch);
599       break;
600     }
601     default:
602       UNREACHABLE();
603   }
604 }
605 
LoadInstanceFromFrame(Register dst)606 void LiftoffAssembler::LoadInstanceFromFrame(Register dst) {
607   ldr(dst, liftoff::GetInstanceOperand());
608 }
609 
LoadFromInstance(Register dst,Register instance,int offset,int size)610 void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
611                                         int offset, int size) {
612   DCHECK_LE(0, offset);
613   MemOperand src{instance, offset};
614   switch (size) {
615     case 1:
616       ldrb(dst, src);
617       break;
618     case 4:
619       ldr(dst, src);
620       break;
621     default:
622       UNIMPLEMENTED();
623   }
624 }
625 
LoadTaggedPointerFromInstance(Register dst,Register instance,int offset)626 void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
627                                                      Register instance,
628                                                      int offset) {
629   STATIC_ASSERT(kTaggedSize == kSystemPointerSize);
630   ldr(dst, MemOperand{instance, offset});
631 }
632 
SpillInstance(Register instance)633 void LiftoffAssembler::SpillInstance(Register instance) {
634   str(instance, liftoff::GetInstanceOperand());
635 }
636 
ResetOSRTarget()637 void LiftoffAssembler::ResetOSRTarget() {}
638 
639 namespace liftoff {
640 #define __ lasm->
641 inline void LoadInternal(LiftoffAssembler* lasm, LiftoffRegister dst,
642                          Register src_addr, Register offset_reg,
643                          int32_t offset_imm, LoadType type,
644                          LiftoffRegList pinned,
645                          uint32_t* protected_load_pc = nullptr,
646                          bool is_load_mem = false) {
647   DCHECK_IMPLIES(type.value_type() == kWasmI64, dst.is_gp_pair());
648   UseScratchRegisterScope temps(lasm);
649   if (type.value() == LoadType::kF64Load ||
650       type.value() == LoadType::kF32Load ||
651       type.value() == LoadType::kS128Load) {
652     Register actual_src_addr = liftoff::CalculateActualAddress(
653         lasm, &temps, src_addr, offset_reg, offset_imm);
654     if (type.value() == LoadType::kF64Load) {
655       // Armv6 is not supported so Neon can be used to avoid alignment issues.
656       CpuFeatureScope scope(lasm, NEON);
657       __ vld1(Neon64, NeonListOperand(dst.fp()),
658               NeonMemOperand(actual_src_addr));
659     } else if (type.value() == LoadType::kF32Load) {
660       // TODO(arm): Use vld1 for f32 when implemented in simulator as used for
661       // f64. It supports unaligned access.
662       Register scratch =
663           (actual_src_addr == src_addr) ? temps.Acquire() : actual_src_addr;
664       __ ldr(scratch, MemOperand(actual_src_addr));
665       __ vmov(liftoff::GetFloatRegister(dst.fp()), scratch);
666     } else {
667       // Armv6 is not supported so Neon can be used to avoid alignment issues.
668       CpuFeatureScope scope(lasm, NEON);
669       __ vld1(Neon8, NeonListOperand(dst.low_fp(), 2),
670               NeonMemOperand(actual_src_addr));
671     }
672   } else {
673     MemOperand src_op =
674         liftoff::GetMemOp(lasm, &temps, src_addr, offset_reg, offset_imm);
675     if (protected_load_pc) *protected_load_pc = __ pc_offset();
676     switch (type.value()) {
677       case LoadType::kI32Load8U:
678         __ ldrb(dst.gp(), src_op);
679         break;
680       case LoadType::kI64Load8U:
681         __ ldrb(dst.low_gp(), src_op);
682         __ mov(dst.high_gp(), Operand(0));
683         break;
684       case LoadType::kI32Load8S:
685         __ ldrsb(dst.gp(), src_op);
686         break;
687       case LoadType::kI64Load8S:
688         __ ldrsb(dst.low_gp(), src_op);
689         __ asr(dst.high_gp(), dst.low_gp(), Operand(31));
690         break;
691       case LoadType::kI32Load16U:
692         __ ldrh(dst.gp(), src_op);
693         break;
694       case LoadType::kI64Load16U:
695         __ ldrh(dst.low_gp(), src_op);
696         __ mov(dst.high_gp(), Operand(0));
697         break;
698       case LoadType::kI32Load16S:
699         __ ldrsh(dst.gp(), src_op);
700         break;
701       case LoadType::kI32Load:
702         __ ldr(dst.gp(), src_op);
703         break;
704       case LoadType::kI64Load16S:
705         __ ldrsh(dst.low_gp(), src_op);
706         __ asr(dst.high_gp(), dst.low_gp(), Operand(31));
707         break;
708       case LoadType::kI64Load32U:
709         __ ldr(dst.low_gp(), src_op);
710         __ mov(dst.high_gp(), Operand(0));
711         break;
712       case LoadType::kI64Load32S:
713         __ ldr(dst.low_gp(), src_op);
714         __ asr(dst.high_gp(), dst.low_gp(), Operand(31));
715         break;
716       case LoadType::kI64Load:
717         __ ldr(dst.low_gp(), src_op);
718         // GetMemOp may use a scratch register as the offset register, in which
719         // case, calling GetMemOp again will fail due to the assembler having
720         // ran out of scratch registers.
721         if (temps.CanAcquire()) {
722           src_op = liftoff::GetMemOp(lasm, &temps, src_addr, offset_reg,
723                                      offset_imm + kSystemPointerSize);
724         } else {
725           __ add(src_op.rm(), src_op.rm(), Operand(kSystemPointerSize));
726         }
727         __ ldr(dst.high_gp(), src_op);
728         break;
729       default:
730         UNREACHABLE();
731     }
732   }
733 }
734 #undef __
735 }  // namespace liftoff
736 
LoadTaggedPointer(Register dst,Register src_addr,Register offset_reg,int32_t offset_imm,LiftoffRegList pinned)737 void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
738                                          Register offset_reg,
739                                          int32_t offset_imm,
740                                          LiftoffRegList pinned) {
741   STATIC_ASSERT(kTaggedSize == kInt32Size);
742   liftoff::LoadInternal(this, LiftoffRegister(dst), src_addr, offset_reg,
743                         offset_imm, LoadType::kI32Load, pinned);
744 }
745 
LoadFullPointer(Register dst,Register src_addr,int32_t offset_imm)746 void LiftoffAssembler::LoadFullPointer(Register dst, Register src_addr,
747                                        int32_t offset_imm) {
748   UseScratchRegisterScope temps(this);
749   MemOperand src_op =
750       liftoff::GetMemOp(this, &temps, src_addr, no_reg, offset_imm);
751   ldr(dst, src_op);
752 }
753 
StoreTaggedPointer(Register dst_addr,Register offset_reg,int32_t offset_imm,LiftoffRegister src,LiftoffRegList pinned,SkipWriteBarrier skip_write_barrier)754 void LiftoffAssembler::StoreTaggedPointer(Register dst_addr,
755                                           Register offset_reg,
756                                           int32_t offset_imm,
757                                           LiftoffRegister src,
758                                           LiftoffRegList pinned,
759                                           SkipWriteBarrier skip_write_barrier) {
760   STATIC_ASSERT(kTaggedSize == kInt32Size);
761   Register actual_offset_reg = offset_reg;
762   if (offset_reg != no_reg && offset_imm != 0) {
763     if (cache_state()->is_used(LiftoffRegister(offset_reg))) {
764       actual_offset_reg = GetUnusedRegister(kGpReg, pinned).gp();
765     }
766     add(actual_offset_reg, offset_reg, Operand(offset_imm));
767   }
768   MemOperand dst_op = actual_offset_reg == no_reg
769                           ? MemOperand(dst_addr, offset_imm)
770                           : MemOperand(dst_addr, actual_offset_reg);
771   str(src.gp(), dst_op);
772 
773   if (skip_write_barrier || FLAG_disable_write_barriers) return;
774 
775   // The write barrier.
776   Label write_barrier;
777   Label exit;
778   CheckPageFlag(dst_addr, MemoryChunk::kPointersFromHereAreInterestingMask, ne,
779                 &write_barrier);
780   b(&exit);
781   bind(&write_barrier);
782   JumpIfSmi(src.gp(), &exit);
783   CheckPageFlag(src.gp(), MemoryChunk::kPointersToHereAreInterestingMask, eq,
784                 &exit);
785   CallRecordWriteStubSaveRegisters(
786       dst_addr,
787       actual_offset_reg == no_reg ? Operand(offset_imm)
788                                   : Operand(actual_offset_reg),
789       RememberedSetAction::kEmit, SaveFPRegsMode::kSave,
790       StubCallMode::kCallWasmRuntimeStub);
791   bind(&exit);
792 }
793 
Load(LiftoffRegister dst,Register src_addr,Register offset_reg,uint32_t offset_imm,LoadType type,LiftoffRegList pinned,uint32_t * protected_load_pc,bool is_load_mem,bool i64_offset)794 void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
795                             Register offset_reg, uint32_t offset_imm,
796                             LoadType type, LiftoffRegList pinned,
797                             uint32_t* protected_load_pc, bool is_load_mem,
798                             bool i64_offset) {
799   // Offsets >=2GB are statically OOB on 32-bit systems.
800   DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
801   liftoff::LoadInternal(this, dst, src_addr, offset_reg,
802                         static_cast<int32_t>(offset_imm), type, pinned,
803                         protected_load_pc, is_load_mem);
804 }
805 
Store(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList pinned,uint32_t * protected_store_pc,bool is_store_mem)806 void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
807                              uint32_t offset_imm, LiftoffRegister src,
808                              StoreType type, LiftoffRegList pinned,
809                              uint32_t* protected_store_pc, bool is_store_mem) {
810   // Offsets >=2GB are statically OOB on 32-bit systems.
811   DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
812   UseScratchRegisterScope temps(this);
813   if (type.value() == StoreType::kF64Store) {
814     Register actual_dst_addr = liftoff::CalculateActualAddress(
815         this, &temps, dst_addr, offset_reg, offset_imm);
816     // Armv6 is not supported so Neon can be used to avoid alignment issues.
817     CpuFeatureScope scope(this, NEON);
818     vst1(Neon64, NeonListOperand(src.fp()), NeonMemOperand(actual_dst_addr));
819   } else if (type.value() == StoreType::kS128Store) {
820     Register actual_dst_addr = liftoff::CalculateActualAddress(
821         this, &temps, dst_addr, offset_reg, offset_imm);
822     // Armv6 is not supported so Neon can be used to avoid alignment issues.
823     CpuFeatureScope scope(this, NEON);
824     vst1(Neon8, NeonListOperand(src.low_fp(), 2),
825          NeonMemOperand(actual_dst_addr));
826   } else if (type.value() == StoreType::kF32Store) {
827     // TODO(arm): Use vst1 for f32 when implemented in simulator as used for
828     // f64. It supports unaligned access.
829     // CalculateActualAddress will only not use a scratch register if the
830     // following condition holds, otherwise another register must be
831     // retrieved.
832     Register scratch = (offset_reg == no_reg && offset_imm == 0)
833                            ? temps.Acquire()
834                            : GetUnusedRegister(kGpReg, pinned).gp();
835     Register actual_dst_addr = liftoff::CalculateActualAddress(
836         this, &temps, dst_addr, offset_reg, offset_imm);
837     vmov(scratch, liftoff::GetFloatRegister(src.fp()));
838     str(scratch, MemOperand(actual_dst_addr));
839   } else {
840     MemOperand dst_op =
841         liftoff::GetMemOp(this, &temps, dst_addr, offset_reg, offset_imm);
842     if (protected_store_pc) *protected_store_pc = pc_offset();
843     switch (type.value()) {
844       case StoreType::kI64Store8:
845         src = src.low();
846         V8_FALLTHROUGH;
847       case StoreType::kI32Store8:
848         strb(src.gp(), dst_op);
849         break;
850       case StoreType::kI64Store16:
851         src = src.low();
852         V8_FALLTHROUGH;
853       case StoreType::kI32Store16:
854         strh(src.gp(), dst_op);
855         break;
856       case StoreType::kI64Store32:
857         src = src.low();
858         V8_FALLTHROUGH;
859       case StoreType::kI32Store:
860         str(src.gp(), dst_op);
861         break;
862       case StoreType::kI64Store:
863         str(src.low_gp(), dst_op);
864         // GetMemOp may use a scratch register as the offset register, in which
865         // case, calling GetMemOp again will fail due to the assembler having
866         // ran out of scratch registers.
867         if (temps.CanAcquire()) {
868           dst_op = liftoff::GetMemOp(this, &temps, dst_addr, offset_reg,
869                                      offset_imm + kSystemPointerSize);
870         } else {
871           add(dst_op.rm(), dst_op.rm(), Operand(kSystemPointerSize));
872         }
873         str(src.high_gp(), dst_op);
874         break;
875       default:
876         UNREACHABLE();
877     }
878   }
879 }
880 
881 namespace liftoff {
882 #define __ lasm->
883 
AtomicOp32(LiftoffAssembler * lasm,Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,LiftoffRegList pinned,void (Assembler::* load)(Register,Register,Condition),void (Assembler::* store)(Register,Register,Register,Condition),void (* op)(LiftoffAssembler *,Register,Register,Register))884 inline void AtomicOp32(
885     LiftoffAssembler* lasm, Register dst_addr, Register offset_reg,
886     uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result,
887     LiftoffRegList pinned,
888     void (Assembler::*load)(Register, Register, Condition),
889     void (Assembler::*store)(Register, Register, Register, Condition),
890     void (*op)(LiftoffAssembler*, Register, Register, Register)) {
891   Register store_result = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
892 
893   // Allocate an additional {temp} register to hold the result that should be
894   // stored to memory. Note that {temp} and {store_result} are not allowed to be
895   // the same register.
896   Register temp = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
897 
898   // {LiftoffCompiler::AtomicBinop} ensures that {result} is unique.
899   DCHECK(result.gp() != value.gp() && result.gp() != dst_addr &&
900          result.gp() != offset_reg);
901 
902   UseScratchRegisterScope temps(lasm);
903   Register actual_addr = liftoff::CalculateActualAddress(
904       lasm, &temps, dst_addr, offset_reg, offset_imm);
905 
906   __ dmb(ISH);
907   Label retry;
908   __ bind(&retry);
909   (lasm->*load)(result.gp(), actual_addr, al);
910   op(lasm, temp, result.gp(), value.gp());
911   (lasm->*store)(store_result, temp, actual_addr, al);
912   __ cmp(store_result, Operand(0));
913   __ b(ne, &retry);
914   __ dmb(ISH);
915 }
916 
Add(LiftoffAssembler * lasm,Register dst,Register lhs,Register rhs)917 inline void Add(LiftoffAssembler* lasm, Register dst, Register lhs,
918                 Register rhs) {
919   __ add(dst, lhs, rhs);
920 }
921 
Sub(LiftoffAssembler * lasm,Register dst,Register lhs,Register rhs)922 inline void Sub(LiftoffAssembler* lasm, Register dst, Register lhs,
923                 Register rhs) {
924   __ sub(dst, lhs, rhs);
925 }
926 
And(LiftoffAssembler * lasm,Register dst,Register lhs,Register rhs)927 inline void And(LiftoffAssembler* lasm, Register dst, Register lhs,
928                 Register rhs) {
929   __ and_(dst, lhs, rhs);
930 }
931 
Or(LiftoffAssembler * lasm,Register dst,Register lhs,Register rhs)932 inline void Or(LiftoffAssembler* lasm, Register dst, Register lhs,
933                Register rhs) {
934   __ orr(dst, lhs, rhs);
935 }
936 
Xor(LiftoffAssembler * lasm,Register dst,Register lhs,Register rhs)937 inline void Xor(LiftoffAssembler* lasm, Register dst, Register lhs,
938                 Register rhs) {
939   __ eor(dst, lhs, rhs);
940 }
941 
Exchange(LiftoffAssembler * lasm,Register dst,Register lhs,Register rhs)942 inline void Exchange(LiftoffAssembler* lasm, Register dst, Register lhs,
943                      Register rhs) {
944   __ mov(dst, rhs);
945 }
946 
AtomicBinop32(LiftoffAssembler * lasm,Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type,void (* op)(LiftoffAssembler *,Register,Register,Register))947 inline void AtomicBinop32(LiftoffAssembler* lasm, Register dst_addr,
948                           Register offset_reg, uint32_t offset_imm,
949                           LiftoffRegister value, LiftoffRegister result,
950                           StoreType type,
951                           void (*op)(LiftoffAssembler*, Register, Register,
952                                      Register)) {
953   LiftoffRegList pinned = {dst_addr, offset_reg, value, result};
954   switch (type.value()) {
955     case StoreType::kI64Store8:
956       __ LoadConstant(result.high(), WasmValue(0));
957       result = result.low();
958       value = value.low();
959       V8_FALLTHROUGH;
960     case StoreType::kI32Store8:
961       liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result,
962                           pinned, &Assembler::ldrexb, &Assembler::strexb, op);
963       return;
964     case StoreType::kI64Store16:
965       __ LoadConstant(result.high(), WasmValue(0));
966       result = result.low();
967       value = value.low();
968       V8_FALLTHROUGH;
969     case StoreType::kI32Store16:
970       liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result,
971                           pinned, &Assembler::ldrexh, &Assembler::strexh, op);
972       return;
973     case StoreType::kI64Store32:
974       __ LoadConstant(result.high(), WasmValue(0));
975       result = result.low();
976       value = value.low();
977       V8_FALLTHROUGH;
978     case StoreType::kI32Store:
979       liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result,
980                           pinned, &Assembler::ldrex, &Assembler::strex, op);
981       return;
982     default:
983       UNREACHABLE();
984   }
985 }
986 
AtomicOp64(LiftoffAssembler * lasm,Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,base::Optional<LiftoffRegister> result,void (* op)(LiftoffAssembler *,LiftoffRegister,LiftoffRegister,LiftoffRegister))987 inline void AtomicOp64(LiftoffAssembler* lasm, Register dst_addr,
988                        Register offset_reg, uint32_t offset_imm,
989                        LiftoffRegister value,
990                        base::Optional<LiftoffRegister> result,
991                        void (*op)(LiftoffAssembler*, LiftoffRegister,
992                                   LiftoffRegister, LiftoffRegister)) {
993   // strexd loads a 64 bit word into two registers. The first register needs
994   // to have an even index, e.g. r8, the second register needs to be the one
995   // with the next higher index, e.g. r9 if the first register is r8. In the
996   // following code we use the fixed register pair r8/r9 to make the code here
997   // simpler, even though other register pairs would also be possible.
998   constexpr Register dst_low = r8;
999   constexpr Register dst_high = r9;
1000 
1001   // Make sure {dst_low} and {dst_high} are not occupied by any other value.
1002   Register value_low = value.low_gp();
1003   Register value_high = value.high_gp();
1004   LiftoffRegList pinned = {dst_addr,   offset_reg, value_low,
1005                            value_high, dst_low,    dst_high};
1006   __ ClearRegister(dst_low, {&dst_addr, &offset_reg, &value_low, &value_high},
1007                    pinned);
1008   pinned = pinned | LiftoffRegList{dst_addr, offset_reg, value_low, value_high};
1009   __ ClearRegister(dst_high, {&dst_addr, &offset_reg, &value_low, &value_high},
1010                    pinned);
1011   pinned = pinned | LiftoffRegList{dst_addr, offset_reg, value_low, value_high};
1012 
1013   // Make sure that {result}, if it exists, also does not overlap with
1014   // {dst_low} and {dst_high}. We don't have to transfer the value stored in
1015   // {result}.
1016   Register result_low = no_reg;
1017   Register result_high = no_reg;
1018   if (result.has_value()) {
1019     result_low = result.value().low_gp();
1020     if (pinned.has(result_low)) {
1021       result_low = __ GetUnusedRegister(kGpReg, pinned).gp();
1022     }
1023     pinned.set(result_low);
1024 
1025     result_high = result.value().high_gp();
1026     if (pinned.has(result_high)) {
1027       result_high = __ GetUnusedRegister(kGpReg, pinned).gp();
1028     }
1029     pinned.set(result_high);
1030   }
1031 
1032   Register store_result = __ GetUnusedRegister(kGpReg, pinned).gp();
1033 
1034   UseScratchRegisterScope temps(lasm);
1035   Register actual_addr = liftoff::CalculateActualAddress(
1036       lasm, &temps, dst_addr, offset_reg, offset_imm);
1037 
1038   __ dmb(ISH);
1039   Label retry;
1040   __ bind(&retry);
1041   // {ldrexd} is needed here so that the {strexd} instruction below can
1042   // succeed. We don't need the value we are reading. We use {dst_low} and
1043   // {dst_high} as the destination registers because {ldrexd} has the same
1044   // restrictions on registers as {strexd}, see the comment above.
1045   __ ldrexd(dst_low, dst_high, actual_addr);
1046   if (result.has_value()) {
1047     __ mov(result_low, dst_low);
1048     __ mov(result_high, dst_high);
1049   }
1050   op(lasm, LiftoffRegister::ForPair(dst_low, dst_high),
1051      LiftoffRegister::ForPair(dst_low, dst_high),
1052      LiftoffRegister::ForPair(value_low, value_high));
1053   __ strexd(store_result, dst_low, dst_high, actual_addr);
1054   __ cmp(store_result, Operand(0));
1055   __ b(ne, &retry);
1056   __ dmb(ISH);
1057 
1058   if (result.has_value()) {
1059     if (result_low != result.value().low_gp()) {
1060       __ mov(result.value().low_gp(), result_low);
1061     }
1062     if (result_high != result.value().high_gp()) {
1063       __ mov(result.value().high_gp(), result_high);
1064     }
1065   }
1066 }
1067 
I64Store(LiftoffAssembler * lasm,LiftoffRegister dst,LiftoffRegister,LiftoffRegister src)1068 inline void I64Store(LiftoffAssembler* lasm, LiftoffRegister dst,
1069                      LiftoffRegister, LiftoffRegister src) {
1070   __ mov(dst.low_gp(), src.low_gp());
1071   __ mov(dst.high_gp(), src.high_gp());
1072 }
1073 
1074 #undef __
1075 }  // namespace liftoff
1076 
AtomicLoad(LiftoffRegister dst,Register src_addr,Register offset_reg,uint32_t offset_imm,LoadType type,LiftoffRegList pinned)1077 void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
1078                                   Register offset_reg, uint32_t offset_imm,
1079                                   LoadType type, LiftoffRegList pinned) {
1080   if (type.value() != LoadType::kI64Load) {
1081     Load(dst, src_addr, offset_reg, offset_imm, type, pinned, nullptr, true);
1082     dmb(ISH);
1083     return;
1084   }
1085   // ldrexd loads a 64 bit word into two registers. The first register needs to
1086   // have an even index, e.g. r8, the second register needs to be the one with
1087   // the next higher index, e.g. r9 if the first register is r8. In the
1088   // following code we use the fixed register pair r8/r9 to make the code here
1089   // simpler, even though other register pairs would also be possible.
1090   constexpr Register dst_low = r8;
1091   constexpr Register dst_high = r9;
1092   SpillRegisters(dst_low, dst_high);
1093   {
1094     UseScratchRegisterScope temps(this);
1095     Register actual_addr = liftoff::CalculateActualAddress(
1096         this, &temps, src_addr, offset_reg, offset_imm);
1097     ldrexd(dst_low, dst_high, actual_addr);
1098     dmb(ISH);
1099   }
1100 
1101   ParallelRegisterMove(
1102       {{dst, LiftoffRegister::ForPair(dst_low, dst_high), kI64}});
1103 }
1104 
AtomicStore(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList pinned)1105 void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
1106                                    uint32_t offset_imm, LiftoffRegister src,
1107                                    StoreType type, LiftoffRegList pinned) {
1108   if (type.value() == StoreType::kI64Store) {
1109     liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, src, {},
1110                         liftoff::I64Store);
1111     return;
1112   }
1113 
1114   dmb(ISH);
1115   Store(dst_addr, offset_reg, offset_imm, src, type, pinned, nullptr, true);
1116   dmb(ISH);
1117   return;
1118 }
1119 
AtomicAdd(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)1120 void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
1121                                  uint32_t offset_imm, LiftoffRegister value,
1122                                  LiftoffRegister result, StoreType type) {
1123   if (type.value() == StoreType::kI64Store) {
1124     liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1125                         liftoff::I64Binop<&Assembler::add, &Assembler::adc>);
1126     return;
1127   }
1128   liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1129                          type, &liftoff::Add);
1130 }
1131 
AtomicSub(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)1132 void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
1133                                  uint32_t offset_imm, LiftoffRegister value,
1134                                  LiftoffRegister result, StoreType type) {
1135   if (type.value() == StoreType::kI64Store) {
1136     liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1137                         liftoff::I64Binop<&Assembler::sub, &Assembler::sbc>);
1138     return;
1139   }
1140   liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1141                          type, &liftoff::Sub);
1142 }
1143 
AtomicAnd(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)1144 void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
1145                                  uint32_t offset_imm, LiftoffRegister value,
1146                                  LiftoffRegister result, StoreType type) {
1147   if (type.value() == StoreType::kI64Store) {
1148     liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1149                         liftoff::I64Binop<&Assembler::and_, &Assembler::and_>);
1150     return;
1151   }
1152   liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1153                          type, &liftoff::And);
1154 }
1155 
AtomicOr(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)1156 void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
1157                                 uint32_t offset_imm, LiftoffRegister value,
1158                                 LiftoffRegister result, StoreType type) {
1159   if (type.value() == StoreType::kI64Store) {
1160     liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1161                         liftoff::I64Binop<&Assembler::orr, &Assembler::orr>);
1162     return;
1163   }
1164   liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1165                          type, &liftoff::Or);
1166 }
1167 
AtomicXor(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)1168 void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
1169                                  uint32_t offset_imm, LiftoffRegister value,
1170                                  LiftoffRegister result, StoreType type) {
1171   if (type.value() == StoreType::kI64Store) {
1172     liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1173                         liftoff::I64Binop<&Assembler::eor, &Assembler::eor>);
1174     return;
1175   }
1176   liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1177                          type, &liftoff::Xor);
1178 }
1179 
AtomicExchange(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)1180 void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
1181                                       uint32_t offset_imm,
1182                                       LiftoffRegister value,
1183                                       LiftoffRegister result, StoreType type) {
1184   if (type.value() == StoreType::kI64Store) {
1185     liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1186                         liftoff::I64Store);
1187     return;
1188   }
1189   liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1190                          type, &liftoff::Exchange);
1191 }
1192 
1193 namespace liftoff {
1194 #define __ lasm->
1195 
AtomicI64CompareExchange(LiftoffAssembler * lasm,Register dst_addr_reg,Register offset_reg,uint32_t offset_imm,LiftoffRegister expected,LiftoffRegister new_value,LiftoffRegister result)1196 inline void AtomicI64CompareExchange(LiftoffAssembler* lasm,
1197                                      Register dst_addr_reg, Register offset_reg,
1198                                      uint32_t offset_imm,
1199                                      LiftoffRegister expected,
1200                                      LiftoffRegister new_value,
1201                                      LiftoffRegister result) {
1202   // To implement I64AtomicCompareExchange, we nearly need all registers, with
1203   // some registers having special constraints, e.g. like for {new_value} and
1204   // {result} the low-word register has to have an even register code, and the
1205   // high-word has to be in the next higher register. To avoid complicated
1206   // register allocation code here, we just assign fixed registers to all
1207   // values here, and then move all values into the correct register.
1208   Register dst_addr = r0;
1209   Register offset = r1;
1210   Register result_low = r4;
1211   Register result_high = r5;
1212   Register new_value_low = r2;
1213   Register new_value_high = r3;
1214   Register store_result = r6;
1215   Register expected_low = r8;
1216   Register expected_high = r9;
1217 
1218   // We spill all registers, so that we can re-assign them afterwards.
1219   __ SpillRegisters(dst_addr, offset, result_low, result_high, new_value_low,
1220                     new_value_high, store_result, expected_low, expected_high);
1221 
1222   __ ParallelRegisterMove(
1223       {{LiftoffRegister::ForPair(new_value_low, new_value_high), new_value,
1224         kI64},
1225        {LiftoffRegister::ForPair(expected_low, expected_high), expected, kI64},
1226        {dst_addr, dst_addr_reg, kI32},
1227        {offset, offset_reg != no_reg ? offset_reg : offset, kI32}});
1228 
1229   {
1230     UseScratchRegisterScope temps(lasm);
1231     Register temp = liftoff::CalculateActualAddress(
1232         lasm, &temps, dst_addr, offset_reg == no_reg ? no_reg : offset,
1233         offset_imm, dst_addr);
1234     // Make sure the actual address is stored in the right register.
1235     DCHECK_EQ(dst_addr, temp);
1236     USE(temp);
1237   }
1238 
1239   Label retry;
1240   Label done;
1241   __ dmb(ISH);
1242   __ bind(&retry);
1243   __ ldrexd(result_low, result_high, dst_addr);
1244   __ cmp(result_low, expected_low);
1245   __ b(ne, &done);
1246   __ cmp(result_high, expected_high);
1247   __ b(ne, &done);
1248   __ strexd(store_result, new_value_low, new_value_high, dst_addr);
1249   __ cmp(store_result, Operand(0));
1250   __ b(ne, &retry);
1251   __ dmb(ISH);
1252   __ bind(&done);
1253 
1254   __ ParallelRegisterMove(
1255       {{result, LiftoffRegister::ForPair(result_low, result_high), kI64}});
1256 }
1257 #undef __
1258 }  // namespace liftoff
1259 
AtomicCompareExchange(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister expected,LiftoffRegister new_value,LiftoffRegister result,StoreType type)1260 void LiftoffAssembler::AtomicCompareExchange(
1261     Register dst_addr, Register offset_reg, uint32_t offset_imm,
1262     LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
1263     StoreType type) {
1264   if (type.value() == StoreType::kI64Store) {
1265     liftoff::AtomicI64CompareExchange(this, dst_addr, offset_reg, offset_imm,
1266                                       expected, new_value, result);
1267     return;
1268   }
1269 
1270   // The other versions of CompareExchange can share code, but need special load
1271   // and store instructions.
1272   void (Assembler::*load)(Register, Register, Condition) = nullptr;
1273   void (Assembler::*store)(Register, Register, Register, Condition) = nullptr;
1274 
1275   LiftoffRegList pinned = {dst_addr, offset_reg};
1276   // We need to remember the high word of {result}, so we can set it to zero in
1277   // the end if necessary.
1278   Register result_high = no_reg;
1279   switch (type.value()) {
1280     case StoreType::kI64Store8:
1281       result_high = result.high_gp();
1282       result = result.low();
1283       new_value = new_value.low();
1284       expected = expected.low();
1285       V8_FALLTHROUGH;
1286     case StoreType::kI32Store8:
1287       load = &Assembler::ldrexb;
1288       store = &Assembler::strexb;
1289       // We have to clear the high bits of {expected}, as we can only do a
1290       // 32-bit comparison. If the {expected} register is used, we spill it
1291       // first.
1292       if (cache_state()->is_used(expected)) {
1293         SpillRegister(expected);
1294       }
1295       uxtb(expected.gp(), expected.gp());
1296       break;
1297     case StoreType::kI64Store16:
1298       result_high = result.high_gp();
1299       result = result.low();
1300       new_value = new_value.low();
1301       expected = expected.low();
1302       V8_FALLTHROUGH;
1303     case StoreType::kI32Store16:
1304       load = &Assembler::ldrexh;
1305       store = &Assembler::strexh;
1306       // We have to clear the high bits of {expected}, as we can only do a
1307       // 32-bit comparison. If the {expected} register is used, we spill it
1308       // first.
1309       if (cache_state()->is_used(expected)) {
1310         SpillRegister(expected);
1311       }
1312       uxth(expected.gp(), expected.gp());
1313       break;
1314     case StoreType::kI64Store32:
1315       result_high = result.high_gp();
1316       result = result.low();
1317       new_value = new_value.low();
1318       expected = expected.low();
1319       V8_FALLTHROUGH;
1320     case StoreType::kI32Store:
1321       load = &Assembler::ldrex;
1322       store = &Assembler::strex;
1323       break;
1324     default:
1325       UNREACHABLE();
1326   }
1327   pinned.set(new_value);
1328   pinned.set(expected);
1329 
1330   Register result_reg = result.gp();
1331   if (pinned.has(result)) {
1332     result_reg = GetUnusedRegister(kGpReg, pinned).gp();
1333   }
1334   pinned.set(LiftoffRegister(result));
1335   Register store_result = GetUnusedRegister(kGpReg, pinned).gp();
1336 
1337   UseScratchRegisterScope temps(this);
1338   Register actual_addr = liftoff::CalculateActualAddress(
1339       this, &temps, dst_addr, offset_reg, offset_imm);
1340 
1341   Label retry;
1342   Label done;
1343   dmb(ISH);
1344   bind(&retry);
1345   (this->*load)(result.gp(), actual_addr, al);
1346   cmp(result.gp(), expected.gp());
1347   b(ne, &done);
1348   (this->*store)(store_result, new_value.gp(), actual_addr, al);
1349   cmp(store_result, Operand(0));
1350   b(ne, &retry);
1351   dmb(ISH);
1352   bind(&done);
1353 
1354   if (result.gp() != result_reg) {
1355     mov(result.gp(), result_reg);
1356   }
1357   if (result_high != no_reg) {
1358     LoadConstant(LiftoffRegister(result_high), WasmValue(0));
1359   }
1360 }
1361 
AtomicFence()1362 void LiftoffAssembler::AtomicFence() { dmb(ISH); }
1363 
LoadCallerFrameSlot(LiftoffRegister dst,uint32_t caller_slot_idx,ValueKind kind)1364 void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
1365                                            uint32_t caller_slot_idx,
1366                                            ValueKind kind) {
1367   MemOperand src(fp, (caller_slot_idx + 1) * kSystemPointerSize);
1368   liftoff::Load(this, dst, src, kind);
1369 }
1370 
StoreCallerFrameSlot(LiftoffRegister src,uint32_t caller_slot_idx,ValueKind kind)1371 void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
1372                                             uint32_t caller_slot_idx,
1373                                             ValueKind kind) {
1374   MemOperand dst(fp, (caller_slot_idx + 1) * kSystemPointerSize);
1375   liftoff::Store(this, src, dst, kind);
1376 }
1377 
LoadReturnStackSlot(LiftoffRegister dst,int offset,ValueKind kind)1378 void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister dst, int offset,
1379                                            ValueKind kind) {
1380   MemOperand src(sp, offset);
1381   liftoff::Load(this, dst, src, kind);
1382 }
1383 
MoveStackValue(uint32_t dst_offset,uint32_t src_offset,ValueKind kind)1384 void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
1385                                       ValueKind kind) {
1386   DCHECK_NE(dst_offset, src_offset);
1387   LiftoffRegister reg = GetUnusedRegister(reg_class_for(kind), {});
1388   Fill(reg, src_offset, kind);
1389   Spill(dst_offset, reg, kind);
1390 }
1391 
Move(Register dst,Register src,ValueKind kind)1392 void LiftoffAssembler::Move(Register dst, Register src, ValueKind kind) {
1393   DCHECK_NE(dst, src);
1394   DCHECK(kind == kI32 || is_reference(kind));
1395   TurboAssembler::Move(dst, src);
1396 }
1397 
Move(DoubleRegister dst,DoubleRegister src,ValueKind kind)1398 void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
1399                             ValueKind kind) {
1400   DCHECK_NE(dst, src);
1401   if (kind == kF32) {
1402     vmov(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src));
1403   } else if (kind == kF64) {
1404     vmov(dst, src);
1405   } else {
1406     DCHECK_EQ(kS128, kind);
1407     vmov(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
1408   }
1409 }
1410 
Spill(int offset,LiftoffRegister reg,ValueKind kind)1411 void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueKind kind) {
1412   // The {str} instruction needs a temp register when the immediate in the
1413   // provided MemOperand does not fit into 12 bits. This happens for large stack
1414   // frames. This DCHECK checks that the temp register is available when needed.
1415   DCHECK(UseScratchRegisterScope{this}.CanAcquire());
1416   DCHECK_LT(0, offset);
1417   RecordUsedSpillOffset(offset);
1418   MemOperand dst(fp, -offset);
1419   liftoff::Store(this, reg, dst, kind);
1420 }
1421 
Spill(int offset,WasmValue value)1422 void LiftoffAssembler::Spill(int offset, WasmValue value) {
1423   RecordUsedSpillOffset(offset);
1424   MemOperand dst = liftoff::GetStackSlot(offset);
1425   UseScratchRegisterScope temps(this);
1426   Register src = no_reg;
1427   // The scratch register will be required by str if multiple instructions
1428   // are required to encode the offset, and so we cannot use it in that case.
1429   if (!ImmediateFitsAddrMode2Instruction(dst.offset())) {
1430     src = GetUnusedRegister(kGpReg, {}).gp();
1431   } else {
1432     src = temps.Acquire();
1433   }
1434   switch (value.type().kind()) {
1435     case kI32:
1436       mov(src, Operand(value.to_i32()));
1437       str(src, dst);
1438       break;
1439     case kI64: {
1440       int32_t low_word = value.to_i64();
1441       mov(src, Operand(low_word));
1442       str(src, liftoff::GetHalfStackSlot(offset, kLowWord));
1443       int32_t high_word = value.to_i64() >> 32;
1444       mov(src, Operand(high_word));
1445       str(src, liftoff::GetHalfStackSlot(offset, kHighWord));
1446       break;
1447     }
1448     default:
1449       // We do not track f32 and f64 constants, hence they are unreachable.
1450       UNREACHABLE();
1451   }
1452 }
1453 
Fill(LiftoffRegister reg,int offset,ValueKind kind)1454 void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueKind kind) {
1455   liftoff::Load(this, reg, liftoff::GetStackSlot(offset), kind);
1456 }
1457 
FillI64Half(Register reg,int offset,RegPairHalf half)1458 void LiftoffAssembler::FillI64Half(Register reg, int offset, RegPairHalf half) {
1459   ldr(reg, liftoff::GetHalfStackSlot(offset, half));
1460 }
1461 
FillStackSlotsWithZero(int start,int size)1462 void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
1463   DCHECK_LT(0, size);
1464   DCHECK_EQ(0, size % 4);
1465   RecordUsedSpillOffset(start + size);
1466 
1467   // We need a zero reg. Always use r0 for that, and push it before to restore
1468   // its value afterwards.
1469   push(r0);
1470   mov(r0, Operand(0));
1471 
1472   if (size <= 36) {
1473     // Special straight-line code for up to 9 words. Generates one
1474     // instruction per word.
1475     for (int offset = 4; offset <= size; offset += 4) {
1476       str(r0, liftoff::GetHalfStackSlot(start + offset, kLowWord));
1477     }
1478   } else {
1479     // General case for bigger counts (9 instructions).
1480     // Use r1 for start address (inclusive), r2 for end address (exclusive).
1481     push(r1);
1482     push(r2);
1483     sub(r1, fp, Operand(start + size));
1484     sub(r2, fp, Operand(start));
1485 
1486     Label loop;
1487     bind(&loop);
1488     str(r0, MemOperand(r1, /* offset */ kSystemPointerSize, PostIndex));
1489     cmp(r1, r2);
1490     b(&loop, ne);
1491 
1492     pop(r2);
1493     pop(r1);
1494   }
1495 
1496   pop(r0);
1497 }
1498 
1499 #define I32_BINOP(name, instruction)                             \
1500   void LiftoffAssembler::emit_##name(Register dst, Register lhs, \
1501                                      Register rhs) {             \
1502     instruction(dst, lhs, rhs);                                  \
1503   }
1504 #define I32_BINOP_I(name, instruction)                              \
1505   I32_BINOP(name, instruction)                                      \
1506   void LiftoffAssembler::emit_##name##i(Register dst, Register lhs, \
1507                                         int32_t imm) {              \
1508     instruction(dst, lhs, Operand(imm));                            \
1509   }
1510 #define I32_SHIFTOP(name, instruction)                              \
1511   void LiftoffAssembler::emit_##name(Register dst, Register src,    \
1512                                      Register amount) {             \
1513     UseScratchRegisterScope temps(this);                            \
1514     Register scratch = temps.Acquire();                             \
1515     and_(scratch, amount, Operand(0x1f));                           \
1516     instruction(dst, src, Operand(scratch));                        \
1517   }                                                                 \
1518   void LiftoffAssembler::emit_##name##i(Register dst, Register src, \
1519                                         int32_t amount) {           \
1520     if (V8_LIKELY((amount & 31) != 0)) {                            \
1521       instruction(dst, src, Operand(amount & 31));                  \
1522     } else if (dst != src) {                                        \
1523       mov(dst, src);                                                \
1524     }                                                               \
1525   }
1526 #define FP32_UNOP(name, instruction)                                           \
1527   void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
1528     instruction(liftoff::GetFloatRegister(dst),                                \
1529                 liftoff::GetFloatRegister(src));                               \
1530   }
1531 #define FP32_BINOP(name, instruction)                                        \
1532   void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
1533                                      DoubleRegister rhs) {                   \
1534     instruction(liftoff::GetFloatRegister(dst),                              \
1535                 liftoff::GetFloatRegister(lhs),                              \
1536                 liftoff::GetFloatRegister(rhs));                             \
1537   }
1538 #define FP64_UNOP(name, instruction)                                           \
1539   void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
1540     instruction(dst, src);                                                     \
1541   }
1542 #define FP64_BINOP(name, instruction)                                        \
1543   void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
1544                                      DoubleRegister rhs) {                   \
1545     instruction(dst, lhs, rhs);                                              \
1546   }
1547 
I32_BINOP_I(i32_add,add)1548 I32_BINOP_I(i32_add, add)
1549 I32_BINOP_I(i32_sub, sub)
1550 I32_BINOP(i32_mul, mul)
1551 I32_BINOP_I(i32_and, and_)
1552 I32_BINOP_I(i32_or, orr)
1553 I32_BINOP_I(i32_xor, eor)
1554 I32_SHIFTOP(i32_shl, lsl)
1555 I32_SHIFTOP(i32_sar, asr)
1556 I32_SHIFTOP(i32_shr, lsr)
1557 FP32_BINOP(f32_add, vadd)
1558 FP32_BINOP(f32_sub, vsub)
1559 FP32_BINOP(f32_mul, vmul)
1560 FP32_BINOP(f32_div, vdiv)
1561 FP32_UNOP(f32_abs, vabs)
1562 FP32_UNOP(f32_neg, vneg)
1563 FP32_UNOP(f32_sqrt, vsqrt)
1564 FP64_BINOP(f64_add, vadd)
1565 FP64_BINOP(f64_sub, vsub)
1566 FP64_BINOP(f64_mul, vmul)
1567 FP64_BINOP(f64_div, vdiv)
1568 FP64_UNOP(f64_abs, vabs)
1569 FP64_UNOP(f64_neg, vneg)
1570 FP64_UNOP(f64_sqrt, vsqrt)
1571 
1572 #undef I32_BINOP
1573 #undef I32_SHIFTOP
1574 #undef FP32_UNOP
1575 #undef FP32_BINOP
1576 #undef FP64_UNOP
1577 #undef FP64_BINOP
1578 
1579 void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
1580   clz(dst, src);
1581 }
1582 
emit_i32_ctz(Register dst,Register src)1583 void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
1584   rbit(dst, src);
1585   clz(dst, dst);
1586 }
1587 
1588 namespace liftoff {
GeneratePopCnt(Assembler * assm,Register dst,Register src,Register scratch1,Register scratch2)1589 inline void GeneratePopCnt(Assembler* assm, Register dst, Register src,
1590                            Register scratch1, Register scratch2) {
1591   DCHECK(!AreAliased(dst, scratch1, scratch2));
1592   if (src == scratch1) std::swap(scratch1, scratch2);
1593   // x = x - ((x & (0x55555555 << 1)) >> 1)
1594   assm->and_(scratch1, src, Operand(0xaaaaaaaa));
1595   assm->sub(dst, src, Operand(scratch1, LSR, 1));
1596   // x = (x & 0x33333333) + ((x & (0x33333333 << 2)) >> 2)
1597   assm->mov(scratch1, Operand(0x33333333));
1598   assm->and_(scratch2, dst, Operand(scratch1, LSL, 2));
1599   assm->and_(scratch1, dst, scratch1);
1600   assm->add(dst, scratch1, Operand(scratch2, LSR, 2));
1601   // x = (x + (x >> 4)) & 0x0F0F0F0F
1602   assm->add(dst, dst, Operand(dst, LSR, 4));
1603   assm->and_(dst, dst, Operand(0x0f0f0f0f));
1604   // x = x + (x >> 8)
1605   assm->add(dst, dst, Operand(dst, LSR, 8));
1606   // x = x + (x >> 16)
1607   assm->add(dst, dst, Operand(dst, LSR, 16));
1608   // x = x & 0x3F
1609   assm->and_(dst, dst, Operand(0x3f));
1610 }
1611 }  // namespace liftoff
1612 
emit_i32_popcnt(Register dst,Register src)1613 bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
1614   LiftoffRegList pinned = {dst};
1615   Register scratch1 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
1616   Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
1617   liftoff::GeneratePopCnt(this, dst, src, scratch1, scratch2);
1618   return true;
1619 }
1620 
emit_i32_divs(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1621 void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
1622                                      Label* trap_div_by_zero,
1623                                      Label* trap_div_unrepresentable) {
1624   if (!CpuFeatures::IsSupported(SUDIV)) {
1625     bailout(kMissingCPUFeature, "i32_divs");
1626     return;
1627   }
1628   CpuFeatureScope scope(this, SUDIV);
1629   // Issue division early so we can perform the trapping checks whilst it
1630   // completes.
1631   bool speculative_sdiv = dst != lhs && dst != rhs;
1632   if (speculative_sdiv) {
1633     sdiv(dst, lhs, rhs);
1634   }
1635   Label noTrap;
1636   // Check for division by zero.
1637   cmp(rhs, Operand(0));
1638   b(trap_div_by_zero, eq);
1639   // Check for kMinInt / -1. This is unrepresentable.
1640   cmp(rhs, Operand(-1));
1641   b(&noTrap, ne);
1642   cmp(lhs, Operand(kMinInt));
1643   b(trap_div_unrepresentable, eq);
1644   bind(&noTrap);
1645   if (!speculative_sdiv) {
1646     sdiv(dst, lhs, rhs);
1647   }
1648 }
1649 
emit_i32_divu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1650 void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
1651                                      Label* trap_div_by_zero) {
1652   if (!CpuFeatures::IsSupported(SUDIV)) {
1653     bailout(kMissingCPUFeature, "i32_divu");
1654     return;
1655   }
1656   CpuFeatureScope scope(this, SUDIV);
1657   // Check for division by zero.
1658   cmp(rhs, Operand(0));
1659   b(trap_div_by_zero, eq);
1660   udiv(dst, lhs, rhs);
1661 }
1662 
emit_i32_rems(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1663 void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
1664                                      Label* trap_div_by_zero) {
1665   if (!CpuFeatures::IsSupported(SUDIV)) {
1666     // When this case is handled, a check for ARMv7 is required to use mls.
1667     // Mls support is implied with SUDIV support.
1668     bailout(kMissingCPUFeature, "i32_rems");
1669     return;
1670   }
1671   CpuFeatureScope scope(this, SUDIV);
1672   // No need to check kMinInt / -1 because the result is kMinInt and then
1673   // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0.
1674   UseScratchRegisterScope temps(this);
1675   Register scratch = temps.Acquire();
1676   sdiv(scratch, lhs, rhs);
1677   // Check for division by zero.
1678   cmp(rhs, Operand(0));
1679   b(trap_div_by_zero, eq);
1680   // Compute remainder.
1681   mls(dst, scratch, rhs, lhs);
1682 }
1683 
emit_i32_remu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1684 void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
1685                                      Label* trap_div_by_zero) {
1686   if (!CpuFeatures::IsSupported(SUDIV)) {
1687     // When this case is handled, a check for ARMv7 is required to use mls.
1688     // Mls support is implied with SUDIV support.
1689     bailout(kMissingCPUFeature, "i32_remu");
1690     return;
1691   }
1692   CpuFeatureScope scope(this, SUDIV);
1693   // No need to check kMinInt / -1 because the result is kMinInt and then
1694   // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0.
1695   UseScratchRegisterScope temps(this);
1696   Register scratch = temps.Acquire();
1697   udiv(scratch, lhs, rhs);
1698   // Check for division by zero.
1699   cmp(rhs, Operand(0));
1700   b(trap_div_by_zero, eq);
1701   // Compute remainder.
1702   mls(dst, scratch, rhs, lhs);
1703 }
1704 
emit_i64_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1705 void LiftoffAssembler::emit_i64_add(LiftoffRegister dst, LiftoffRegister lhs,
1706                                     LiftoffRegister rhs) {
1707   liftoff::I64Binop<&Assembler::add, &Assembler::adc>(this, dst, lhs, rhs);
1708 }
1709 
emit_i64_addi(LiftoffRegister dst,LiftoffRegister lhs,int64_t imm)1710 void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs,
1711                                      int64_t imm) {
1712   liftoff::I64BinopI<&Assembler::add, &Assembler::adc>(this, dst, lhs, imm);
1713 }
1714 
emit_i64_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1715 void LiftoffAssembler::emit_i64_sub(LiftoffRegister dst, LiftoffRegister lhs,
1716                                     LiftoffRegister rhs) {
1717   liftoff::I64Binop<&Assembler::sub, &Assembler::sbc>(this, dst, lhs, rhs);
1718 }
1719 
emit_i64_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1720 void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs,
1721                                     LiftoffRegister rhs) {
1722   // Idea:
1723   //        [           lhs_hi  |           lhs_lo  ] * [  rhs_hi  |  rhs_lo  ]
1724   //    =   [  lhs_hi * rhs_lo  |                   ]  (32 bit mul, shift 32)
1725   //      + [  lhs_lo * rhs_hi  |                   ]  (32 bit mul, shift 32)
1726   //      + [             lhs_lo * rhs_lo           ]  (32x32->64 mul, shift 0)
1727   UseScratchRegisterScope temps(this);
1728   Register scratch = temps.Acquire();
1729   // scratch = lhs_hi * rhs_lo
1730   mul(scratch, lhs.high_gp(), rhs.low_gp());
1731   // scratch += lhs_lo * rhs_hi
1732   mla(scratch, lhs.low_gp(), rhs.high_gp(), scratch);
1733   // TODO(arm): use umlal once implemented correctly in the simulator.
1734   // [dst_hi|dst_lo] = lhs_lo * rhs_lo
1735   umull(dst.low_gp(), dst.high_gp(), lhs.low_gp(), rhs.low_gp());
1736   // dst_hi += scratch
1737   add(dst.high_gp(), dst.high_gp(), scratch);
1738 }
1739 
emit_i64_divs(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1740 bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
1741                                      LiftoffRegister rhs,
1742                                      Label* trap_div_by_zero,
1743                                      Label* trap_div_unrepresentable) {
1744   return false;
1745 }
1746 
emit_i64_divu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1747 bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
1748                                      LiftoffRegister rhs,
1749                                      Label* trap_div_by_zero) {
1750   return false;
1751 }
1752 
emit_i64_rems(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1753 bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
1754                                      LiftoffRegister rhs,
1755                                      Label* trap_div_by_zero) {
1756   return false;
1757 }
1758 
emit_i64_remu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1759 bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
1760                                      LiftoffRegister rhs,
1761                                      Label* trap_div_by_zero) {
1762   return false;
1763 }
1764 
emit_i64_shl(LiftoffRegister dst,LiftoffRegister src,Register amount)1765 void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
1766                                     Register amount) {
1767   liftoff::I64Shiftop<&TurboAssembler::LslPair, true>(this, dst, src, amount);
1768 }
1769 
emit_i64_shli(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1770 void LiftoffAssembler::emit_i64_shli(LiftoffRegister dst, LiftoffRegister src,
1771                                      int32_t amount) {
1772   UseScratchRegisterScope temps(this);
1773   // {src.low_gp()} will still be needed after writing {dst.high_gp()}.
1774   Register src_low =
1775       liftoff::EnsureNoAlias(this, src.low_gp(), dst.high_gp(), &temps);
1776 
1777   LslPair(dst.low_gp(), dst.high_gp(), src_low, src.high_gp(), amount & 63);
1778 }
1779 
emit_i64_sar(LiftoffRegister dst,LiftoffRegister src,Register amount)1780 void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
1781                                     Register amount) {
1782   liftoff::I64Shiftop<&TurboAssembler::AsrPair, false>(this, dst, src, amount);
1783 }
1784 
emit_i64_sari(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1785 void LiftoffAssembler::emit_i64_sari(LiftoffRegister dst, LiftoffRegister src,
1786                                      int32_t amount) {
1787   UseScratchRegisterScope temps(this);
1788   // {src.high_gp()} will still be needed after writing {dst.low_gp()}.
1789   Register src_high =
1790       liftoff::EnsureNoAlias(this, src.high_gp(), dst.low_gp(), &temps);
1791 
1792   AsrPair(dst.low_gp(), dst.high_gp(), src.low_gp(), src_high, amount & 63);
1793 }
1794 
emit_i64_shr(LiftoffRegister dst,LiftoffRegister src,Register amount)1795 void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
1796                                     Register amount) {
1797   liftoff::I64Shiftop<&TurboAssembler::LsrPair, false>(this, dst, src, amount);
1798 }
1799 
emit_i64_shri(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1800 void LiftoffAssembler::emit_i64_shri(LiftoffRegister dst, LiftoffRegister src,
1801                                      int32_t amount) {
1802   UseScratchRegisterScope temps(this);
1803   // {src.high_gp()} will still be needed after writing {dst.low_gp()}.
1804   Register src_high =
1805       liftoff::EnsureNoAlias(this, src.high_gp(), dst.low_gp(), &temps);
1806 
1807   LsrPair(dst.low_gp(), dst.high_gp(), src.low_gp(), src_high, amount & 63);
1808 }
1809 
emit_i64_clz(LiftoffRegister dst,LiftoffRegister src)1810 void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
1811   // return high == 0 ? 32 + CLZ32(low) : CLZ32(high);
1812   Label done;
1813   Label high_is_zero;
1814   cmp(src.high_gp(), Operand(0));
1815   b(&high_is_zero, eq);
1816 
1817   clz(dst.low_gp(), src.high_gp());
1818   jmp(&done);
1819 
1820   bind(&high_is_zero);
1821   clz(dst.low_gp(), src.low_gp());
1822   add(dst.low_gp(), dst.low_gp(), Operand(32));
1823 
1824   bind(&done);
1825   mov(dst.high_gp(), Operand(0));  // High word of result is always 0.
1826 }
1827 
emit_i64_ctz(LiftoffRegister dst,LiftoffRegister src)1828 void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
1829   // return low == 0 ? 32 + CTZ32(high) : CTZ32(low);
1830   // CTZ32(x) = CLZ(RBIT(x))
1831   Label done;
1832   Label low_is_zero;
1833   cmp(src.low_gp(), Operand(0));
1834   b(&low_is_zero, eq);
1835 
1836   rbit(dst.low_gp(), src.low_gp());
1837   clz(dst.low_gp(), dst.low_gp());
1838   jmp(&done);
1839 
1840   bind(&low_is_zero);
1841   rbit(dst.low_gp(), src.high_gp());
1842   clz(dst.low_gp(), dst.low_gp());
1843   add(dst.low_gp(), dst.low_gp(), Operand(32));
1844 
1845   bind(&done);
1846   mov(dst.high_gp(), Operand(0));  // High word of result is always 0.
1847 }
1848 
emit_i64_popcnt(LiftoffRegister dst,LiftoffRegister src)1849 bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
1850                                        LiftoffRegister src) {
1851   // Produce partial popcnts in the two dst registers, making sure not to
1852   // overwrite the second src register before using it.
1853   Register src1 = src.high_gp() == dst.low_gp() ? src.high_gp() : src.low_gp();
1854   Register src2 = src.high_gp() == dst.low_gp() ? src.low_gp() : src.high_gp();
1855   LiftoffRegList pinned = {dst, src2};
1856   Register scratch1 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
1857   Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
1858   liftoff::GeneratePopCnt(this, dst.low_gp(), src1, scratch1, scratch2);
1859   liftoff::GeneratePopCnt(this, dst.high_gp(), src2, scratch1, scratch2);
1860   // Now add the two into the lower dst reg and clear the higher dst reg.
1861   add(dst.low_gp(), dst.low_gp(), dst.high_gp());
1862   mov(dst.high_gp(), Operand(0));
1863   return true;
1864 }
1865 
IncrementSmi(LiftoffRegister dst,int offset)1866 void LiftoffAssembler::IncrementSmi(LiftoffRegister dst, int offset) {
1867   UseScratchRegisterScope temps(this);
1868   Register scratch = temps.Acquire();
1869   ldr(scratch, MemOperand(dst.gp(), offset));
1870   add(scratch, scratch, Operand(Smi::FromInt(1)));
1871   str(scratch, MemOperand(dst.gp(), offset));
1872 }
1873 
emit_f32_ceil(DoubleRegister dst,DoubleRegister src)1874 bool LiftoffAssembler::emit_f32_ceil(DoubleRegister dst, DoubleRegister src) {
1875   if (CpuFeatures::IsSupported(ARMv8)) {
1876     CpuFeatureScope scope(this, ARMv8);
1877     vrintp(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src));
1878     return true;
1879   }
1880   return false;
1881 }
1882 
emit_f32_floor(DoubleRegister dst,DoubleRegister src)1883 bool LiftoffAssembler::emit_f32_floor(DoubleRegister dst, DoubleRegister src) {
1884   if (CpuFeatures::IsSupported(ARMv8)) {
1885     CpuFeatureScope scope(this, ARMv8);
1886     vrintm(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src));
1887     return true;
1888   }
1889   return false;
1890 }
1891 
emit_f32_trunc(DoubleRegister dst,DoubleRegister src)1892 bool LiftoffAssembler::emit_f32_trunc(DoubleRegister dst, DoubleRegister src) {
1893   if (CpuFeatures::IsSupported(ARMv8)) {
1894     CpuFeatureScope scope(this, ARMv8);
1895     vrintz(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src));
1896     return true;
1897   }
1898   return false;
1899 }
1900 
emit_f32_nearest_int(DoubleRegister dst,DoubleRegister src)1901 bool LiftoffAssembler::emit_f32_nearest_int(DoubleRegister dst,
1902                                             DoubleRegister src) {
1903   if (CpuFeatures::IsSupported(ARMv8)) {
1904     CpuFeatureScope scope(this, ARMv8);
1905     vrintn(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src));
1906     return true;
1907   }
1908   return false;
1909 }
1910 
emit_f32_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1911 void LiftoffAssembler::emit_f32_min(DoubleRegister dst, DoubleRegister lhs,
1912                                     DoubleRegister rhs) {
1913   liftoff::EmitFloatMinOrMax(
1914       this, liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(lhs),
1915       liftoff::GetFloatRegister(rhs), liftoff::MinOrMax::kMin);
1916 }
1917 
emit_f32_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1918 void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs,
1919                                     DoubleRegister rhs) {
1920   liftoff::EmitFloatMinOrMax(
1921       this, liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(lhs),
1922       liftoff::GetFloatRegister(rhs), liftoff::MinOrMax::kMax);
1923 }
1924 
emit_f64_ceil(DoubleRegister dst,DoubleRegister src)1925 bool LiftoffAssembler::emit_f64_ceil(DoubleRegister dst, DoubleRegister src) {
1926   if (CpuFeatures::IsSupported(ARMv8)) {
1927     CpuFeatureScope scope(this, ARMv8);
1928     vrintp(dst, src);
1929     return true;
1930   }
1931   return false;
1932 }
1933 
emit_f64_floor(DoubleRegister dst,DoubleRegister src)1934 bool LiftoffAssembler::emit_f64_floor(DoubleRegister dst, DoubleRegister src) {
1935   if (CpuFeatures::IsSupported(ARMv8)) {
1936     CpuFeatureScope scope(this, ARMv8);
1937     vrintm(dst, src);
1938     return true;
1939   }
1940   return false;
1941 }
1942 
emit_f64_trunc(DoubleRegister dst,DoubleRegister src)1943 bool LiftoffAssembler::emit_f64_trunc(DoubleRegister dst, DoubleRegister src) {
1944   if (CpuFeatures::IsSupported(ARMv8)) {
1945     CpuFeatureScope scope(this, ARMv8);
1946     vrintz(dst, src);
1947     return true;
1948   }
1949   return false;
1950 }
1951 
emit_f64_nearest_int(DoubleRegister dst,DoubleRegister src)1952 bool LiftoffAssembler::emit_f64_nearest_int(DoubleRegister dst,
1953                                             DoubleRegister src) {
1954   if (CpuFeatures::IsSupported(ARMv8)) {
1955     CpuFeatureScope scope(this, ARMv8);
1956     vrintn(dst, src);
1957     return true;
1958   }
1959   return false;
1960 }
1961 
emit_f64_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1962 void LiftoffAssembler::emit_f64_min(DoubleRegister dst, DoubleRegister lhs,
1963                                     DoubleRegister rhs) {
1964   liftoff::EmitFloatMinOrMax(this, dst, lhs, rhs, liftoff::MinOrMax::kMin);
1965 }
1966 
emit_f64_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1967 void LiftoffAssembler::emit_f64_max(DoubleRegister dst, DoubleRegister lhs,
1968                                     DoubleRegister rhs) {
1969   liftoff::EmitFloatMinOrMax(this, dst, lhs, rhs, liftoff::MinOrMax::kMax);
1970 }
1971 
emit_f32_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1972 void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
1973                                          DoubleRegister rhs) {
1974   constexpr uint32_t kF32SignBit = uint32_t{1} << 31;
1975   UseScratchRegisterScope temps(this);
1976   Register scratch = GetUnusedRegister(kGpReg, {}).gp();
1977   Register scratch2 = temps.Acquire();
1978   VmovLow(scratch, lhs);
1979   // Clear sign bit in {scratch}.
1980   bic(scratch, scratch, Operand(kF32SignBit));
1981   VmovLow(scratch2, rhs);
1982   // Isolate sign bit in {scratch2}.
1983   and_(scratch2, scratch2, Operand(kF32SignBit));
1984   // Combine {scratch2} into {scratch}.
1985   orr(scratch, scratch, scratch2);
1986   VmovLow(dst, scratch);
1987 }
1988 
emit_f64_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1989 void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
1990                                          DoubleRegister rhs) {
1991   constexpr uint32_t kF64SignBitHighWord = uint32_t{1} << 31;
1992   // On arm, we cannot hold the whole f64 value in a gp register, so we just
1993   // operate on the upper half (UH).
1994   UseScratchRegisterScope temps(this);
1995   Register scratch = GetUnusedRegister(kGpReg, {}).gp();
1996   Register scratch2 = temps.Acquire();
1997   VmovHigh(scratch, lhs);
1998   // Clear sign bit in {scratch}.
1999   bic(scratch, scratch, Operand(kF64SignBitHighWord));
2000   VmovHigh(scratch2, rhs);
2001   // Isolate sign bit in {scratch2}.
2002   and_(scratch2, scratch2, Operand(kF64SignBitHighWord));
2003   // Combine {scratch2} into {scratch}.
2004   orr(scratch, scratch, scratch2);
2005   vmov(dst, lhs);
2006   VmovHigh(dst, scratch);
2007 }
2008 
emit_type_conversion(WasmOpcode opcode,LiftoffRegister dst,LiftoffRegister src,Label * trap)2009 bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
2010                                             LiftoffRegister dst,
2011                                             LiftoffRegister src, Label* trap) {
2012   switch (opcode) {
2013     case kExprI32ConvertI64:
2014       TurboAssembler::Move(dst.gp(), src.low_gp());
2015       return true;
2016     case kExprI32SConvertF32: {
2017       UseScratchRegisterScope temps(this);
2018       SwVfpRegister scratch_f = temps.AcquireS();
2019       vcvt_s32_f32(
2020           scratch_f,
2021           liftoff::GetFloatRegister(src.fp()));  // f32 -> i32 round to zero.
2022       vmov(dst.gp(), scratch_f);
2023       // Check underflow and NaN.
2024       vmov(scratch_f, Float32(static_cast<float>(INT32_MIN)));
2025       VFPCompareAndSetFlags(liftoff::GetFloatRegister(src.fp()), scratch_f);
2026       b(trap, lt);
2027       // Check overflow.
2028       cmp(dst.gp(), Operand(-1));
2029       b(trap, vs);
2030       return true;
2031     }
2032     case kExprI32UConvertF32: {
2033       UseScratchRegisterScope temps(this);
2034       SwVfpRegister scratch_f = temps.AcquireS();
2035       vcvt_u32_f32(
2036           scratch_f,
2037           liftoff::GetFloatRegister(src.fp()));  // f32 -> i32 round to zero.
2038       vmov(dst.gp(), scratch_f);
2039       // Check underflow and NaN.
2040       vmov(scratch_f, Float32(-1.0f));
2041       VFPCompareAndSetFlags(liftoff::GetFloatRegister(src.fp()), scratch_f);
2042       b(trap, le);
2043       // Check overflow.
2044       cmp(dst.gp(), Operand(-1));
2045       b(trap, eq);
2046       return true;
2047     }
2048     case kExprI32SConvertF64: {
2049       UseScratchRegisterScope temps(this);
2050       SwVfpRegister scratch_f = temps.AcquireS();
2051       vcvt_s32_f64(scratch_f, src.fp());  // f64 -> i32 round to zero.
2052       vmov(dst.gp(), scratch_f);
2053       // Check underflow and NaN.
2054       DwVfpRegister scratch_d = temps.AcquireD();
2055       vmov(scratch_d, base::Double(static_cast<double>(INT32_MIN - 1.0)));
2056       VFPCompareAndSetFlags(src.fp(), scratch_d);
2057       b(trap, le);
2058       // Check overflow.
2059       vmov(scratch_d, base::Double(static_cast<double>(INT32_MAX + 1.0)));
2060       VFPCompareAndSetFlags(src.fp(), scratch_d);
2061       b(trap, ge);
2062       return true;
2063     }
2064     case kExprI32UConvertF64: {
2065       UseScratchRegisterScope temps(this);
2066       SwVfpRegister scratch_f = temps.AcquireS();
2067       vcvt_u32_f64(scratch_f, src.fp());  // f64 -> i32 round to zero.
2068       vmov(dst.gp(), scratch_f);
2069       // Check underflow and NaN.
2070       DwVfpRegister scratch_d = temps.AcquireD();
2071       vmov(scratch_d, base::Double(static_cast<double>(-1.0)));
2072       VFPCompareAndSetFlags(src.fp(), scratch_d);
2073       b(trap, le);
2074       // Check overflow.
2075       vmov(scratch_d, base::Double(static_cast<double>(UINT32_MAX + 1.0)));
2076       VFPCompareAndSetFlags(src.fp(), scratch_d);
2077       b(trap, ge);
2078       return true;
2079     }
2080     case kExprI32SConvertSatF32: {
2081       UseScratchRegisterScope temps(this);
2082       SwVfpRegister scratch_f = temps.AcquireS();
2083       vcvt_s32_f32(
2084           scratch_f,
2085           liftoff::GetFloatRegister(src.fp()));  // f32 -> i32 round to zero.
2086       vmov(dst.gp(), scratch_f);
2087       return true;
2088     }
2089     case kExprI32UConvertSatF32: {
2090       UseScratchRegisterScope temps(this);
2091       SwVfpRegister scratch_f = temps.AcquireS();
2092       vcvt_u32_f32(
2093           scratch_f,
2094           liftoff::GetFloatRegister(src.fp()));  // f32 -> u32 round to zero.
2095       vmov(dst.gp(), scratch_f);
2096       return true;
2097     }
2098     case kExprI32SConvertSatF64: {
2099       UseScratchRegisterScope temps(this);
2100       SwVfpRegister scratch_f = temps.AcquireS();
2101       vcvt_s32_f64(scratch_f, src.fp());  // f64 -> i32 round to zero.
2102       vmov(dst.gp(), scratch_f);
2103       return true;
2104     }
2105     case kExprI32UConvertSatF64: {
2106       UseScratchRegisterScope temps(this);
2107       SwVfpRegister scratch_f = temps.AcquireS();
2108       vcvt_u32_f64(scratch_f, src.fp());  // f64 -> u32 round to zero.
2109       vmov(dst.gp(), scratch_f);
2110       return true;
2111     }
2112     case kExprI32ReinterpretF32:
2113       vmov(dst.gp(), liftoff::GetFloatRegister(src.fp()));
2114       return true;
2115     case kExprI64SConvertI32:
2116       if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp());
2117       mov(dst.high_gp(), Operand(src.gp(), ASR, 31));
2118       return true;
2119     case kExprI64UConvertI32:
2120       if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp());
2121       mov(dst.high_gp(), Operand(0));
2122       return true;
2123     case kExprI64ReinterpretF64:
2124       vmov(dst.low_gp(), dst.high_gp(), src.fp());
2125       return true;
2126     case kExprF32SConvertI32: {
2127       SwVfpRegister dst_float = liftoff::GetFloatRegister(dst.fp());
2128       vmov(dst_float, src.gp());
2129       vcvt_f32_s32(dst_float, dst_float);
2130       return true;
2131     }
2132     case kExprF32UConvertI32: {
2133       SwVfpRegister dst_float = liftoff::GetFloatRegister(dst.fp());
2134       vmov(dst_float, src.gp());
2135       vcvt_f32_u32(dst_float, dst_float);
2136       return true;
2137     }
2138     case kExprF32ConvertF64:
2139       vcvt_f32_f64(liftoff::GetFloatRegister(dst.fp()), src.fp());
2140       return true;
2141     case kExprF32ReinterpretI32:
2142       vmov(liftoff::GetFloatRegister(dst.fp()), src.gp());
2143       return true;
2144     case kExprF64SConvertI32: {
2145       vmov(liftoff::GetFloatRegister(dst.fp()), src.gp());
2146       vcvt_f64_s32(dst.fp(), liftoff::GetFloatRegister(dst.fp()));
2147       return true;
2148     }
2149     case kExprF64UConvertI32: {
2150       vmov(liftoff::GetFloatRegister(dst.fp()), src.gp());
2151       vcvt_f64_u32(dst.fp(), liftoff::GetFloatRegister(dst.fp()));
2152       return true;
2153     }
2154     case kExprF64ConvertF32:
2155       vcvt_f64_f32(dst.fp(), liftoff::GetFloatRegister(src.fp()));
2156       return true;
2157     case kExprF64ReinterpretI64:
2158       vmov(dst.fp(), src.low_gp(), src.high_gp());
2159       return true;
2160     case kExprF64SConvertI64:
2161     case kExprF64UConvertI64:
2162     case kExprI64SConvertF32:
2163     case kExprI64UConvertF32:
2164     case kExprI64SConvertSatF32:
2165     case kExprI64UConvertSatF32:
2166     case kExprF32SConvertI64:
2167     case kExprF32UConvertI64:
2168     case kExprI64SConvertF64:
2169     case kExprI64UConvertF64:
2170     case kExprI64SConvertSatF64:
2171     case kExprI64UConvertSatF64:
2172       // These cases can be handled by the C fallback function.
2173       return false;
2174     default:
2175       UNREACHABLE();
2176   }
2177 }
2178 
emit_i32_signextend_i8(Register dst,Register src)2179 void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
2180   sxtb(dst, src);
2181 }
2182 
emit_i32_signextend_i16(Register dst,Register src)2183 void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
2184   sxth(dst, src);
2185 }
2186 
emit_i64_signextend_i8(LiftoffRegister dst,LiftoffRegister src)2187 void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
2188                                               LiftoffRegister src) {
2189   emit_i32_signextend_i8(dst.low_gp(), src.low_gp());
2190   mov(dst.high_gp(), Operand(dst.low_gp(), ASR, 31));
2191 }
2192 
emit_i64_signextend_i16(LiftoffRegister dst,LiftoffRegister src)2193 void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
2194                                                LiftoffRegister src) {
2195   emit_i32_signextend_i16(dst.low_gp(), src.low_gp());
2196   mov(dst.high_gp(), Operand(dst.low_gp(), ASR, 31));
2197 }
2198 
emit_i64_signextend_i32(LiftoffRegister dst,LiftoffRegister src)2199 void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
2200                                                LiftoffRegister src) {
2201   TurboAssembler::Move(dst.low_gp(), src.low_gp());
2202   mov(dst.high_gp(), Operand(src.low_gp(), ASR, 31));
2203 }
2204 
emit_jump(Label * label)2205 void LiftoffAssembler::emit_jump(Label* label) { b(label); }
2206 
emit_jump(Register target)2207 void LiftoffAssembler::emit_jump(Register target) { bx(target); }
2208 
emit_cond_jump(LiftoffCondition liftoff_cond,Label * label,ValueKind kind,Register lhs,Register rhs)2209 void LiftoffAssembler::emit_cond_jump(LiftoffCondition liftoff_cond,
2210                                       Label* label, ValueKind kind,
2211                                       Register lhs, Register rhs) {
2212   Condition cond = liftoff::ToCondition(liftoff_cond);
2213 
2214   if (rhs == no_reg) {
2215     DCHECK_EQ(kind, kI32);
2216     cmp(lhs, Operand(0));
2217   } else {
2218     DCHECK(kind == kI32 || (is_reference(kind) && (liftoff_cond == kEqual ||
2219                                                    liftoff_cond == kUnequal)));
2220     cmp(lhs, rhs);
2221   }
2222   b(label, cond);
2223 }
2224 
emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,Label * label,Register lhs,int32_t imm)2225 void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,
2226                                            Label* label, Register lhs,
2227                                            int32_t imm) {
2228   Condition cond = liftoff::ToCondition(liftoff_cond);
2229   cmp(lhs, Operand(imm));
2230   b(label, cond);
2231 }
2232 
emit_i32_subi_jump_negative(Register value,int subtrahend,Label * result_negative)2233 void LiftoffAssembler::emit_i32_subi_jump_negative(Register value,
2234                                                    int subtrahend,
2235                                                    Label* result_negative) {
2236   sub(value, value, Operand(subtrahend), SetCC);
2237   b(result_negative, mi);
2238 }
2239 
emit_i32_eqz(Register dst,Register src)2240 void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
2241   clz(dst, src);
2242   mov(dst, Operand(dst, LSR, kRegSizeInBitsLog2));
2243 }
2244 
emit_i32_set_cond(LiftoffCondition liftoff_cond,Register dst,Register lhs,Register rhs)2245 void LiftoffAssembler::emit_i32_set_cond(LiftoffCondition liftoff_cond,
2246                                          Register dst, Register lhs,
2247                                          Register rhs) {
2248   Condition cond = liftoff::ToCondition(liftoff_cond);
2249   cmp(lhs, rhs);
2250   mov(dst, Operand(0), LeaveCC);
2251   mov(dst, Operand(1), LeaveCC, cond);
2252 }
2253 
emit_i64_eqz(Register dst,LiftoffRegister src)2254 void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
2255   orr(dst, src.low_gp(), src.high_gp());
2256   clz(dst, dst);
2257   mov(dst, Operand(dst, LSR, 5));
2258 }
2259 
emit_i64_set_cond(LiftoffCondition liftoff_cond,Register dst,LiftoffRegister lhs,LiftoffRegister rhs)2260 void LiftoffAssembler::emit_i64_set_cond(LiftoffCondition liftoff_cond,
2261                                          Register dst, LiftoffRegister lhs,
2262                                          LiftoffRegister rhs) {
2263   // For signed i64 comparisons, we still need to use unsigned comparison for
2264   // the low word (the only bit carrying signedness information is the MSB in
2265   // the high word).
2266   Condition cond = liftoff::ToCondition(liftoff_cond);
2267   Condition unsigned_cond =
2268       liftoff::ToCondition(liftoff::MakeUnsigned(liftoff_cond));
2269   Label set_cond;
2270   Label cont;
2271   LiftoffRegister dest = LiftoffRegister(dst);
2272   bool speculative_move = !dest.overlaps(lhs) && !dest.overlaps(rhs);
2273   if (speculative_move) {
2274     mov(dst, Operand(0));
2275   }
2276   // Compare high word first. If it differs, use it for the set_cond. If it's
2277   // equal, compare the low word and use that for set_cond.
2278   cmp(lhs.high_gp(), rhs.high_gp());
2279   if (unsigned_cond == cond) {
2280     cmp(lhs.low_gp(), rhs.low_gp(), eq);
2281     if (!speculative_move) {
2282       mov(dst, Operand(0));
2283     }
2284     mov(dst, Operand(1), LeaveCC, cond);
2285   } else {
2286     // If the condition predicate for the low differs from that for the high
2287     // word, the conditional move instructions must be separated.
2288     b(ne, &set_cond);
2289     cmp(lhs.low_gp(), rhs.low_gp());
2290     if (!speculative_move) {
2291       mov(dst, Operand(0));
2292     }
2293     mov(dst, Operand(1), LeaveCC, unsigned_cond);
2294     b(&cont);
2295     bind(&set_cond);
2296     if (!speculative_move) {
2297       mov(dst, Operand(0));
2298     }
2299     mov(dst, Operand(1), LeaveCC, cond);
2300     bind(&cont);
2301   }
2302 }
2303 
emit_f32_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2304 void LiftoffAssembler::emit_f32_set_cond(LiftoffCondition liftoff_cond,
2305                                          Register dst, DoubleRegister lhs,
2306                                          DoubleRegister rhs) {
2307   Condition cond = liftoff::ToCondition(liftoff_cond);
2308   VFPCompareAndSetFlags(liftoff::GetFloatRegister(lhs),
2309                         liftoff::GetFloatRegister(rhs));
2310   mov(dst, Operand(0), LeaveCC);
2311   mov(dst, Operand(1), LeaveCC, cond);
2312   if (cond != ne) {
2313     // If V flag set, at least one of the arguments was a Nan -> false.
2314     mov(dst, Operand(0), LeaveCC, vs);
2315   }
2316 }
2317 
emit_f64_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2318 void LiftoffAssembler::emit_f64_set_cond(LiftoffCondition liftoff_cond,
2319                                          Register dst, DoubleRegister lhs,
2320                                          DoubleRegister rhs) {
2321   Condition cond = liftoff::ToCondition(liftoff_cond);
2322   VFPCompareAndSetFlags(lhs, rhs);
2323   mov(dst, Operand(0), LeaveCC);
2324   mov(dst, Operand(1), LeaveCC, cond);
2325   if (cond != ne) {
2326     // If V flag set, at least one of the arguments was a Nan -> false.
2327     mov(dst, Operand(0), LeaveCC, vs);
2328   }
2329 }
2330 
emit_select(LiftoffRegister dst,Register condition,LiftoffRegister true_value,LiftoffRegister false_value,ValueKind kind)2331 bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
2332                                    LiftoffRegister true_value,
2333                                    LiftoffRegister false_value,
2334                                    ValueKind kind) {
2335   return false;
2336 }
2337 
emit_smi_check(Register obj,Label * target,SmiCheckMode mode)2338 void LiftoffAssembler::emit_smi_check(Register obj, Label* target,
2339                                       SmiCheckMode mode) {
2340   tst(obj, Operand(kSmiTagMask));
2341   Condition condition = mode == kJumpOnSmi ? eq : ne;
2342   b(condition, target);
2343 }
2344 
LoadTransform(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LoadTransformationKind transform,uint32_t * protected_load_pc)2345 void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
2346                                      Register offset_reg, uintptr_t offset_imm,
2347                                      LoadType type,
2348                                      LoadTransformationKind transform,
2349                                      uint32_t* protected_load_pc) {
2350   UseScratchRegisterScope temps(this);
2351   Register actual_src_addr = liftoff::CalculateActualAddress(
2352       this, &temps, src_addr, offset_reg, offset_imm);
2353   *protected_load_pc = pc_offset();
2354   MachineType memtype = type.mem_type();
2355 
2356   if (transform == LoadTransformationKind::kExtend) {
2357     if (memtype == MachineType::Int8()) {
2358       vld1(Neon8, NeonListOperand(dst.low_fp()),
2359            NeonMemOperand(actual_src_addr));
2360       vmovl(NeonS8, liftoff::GetSimd128Register(dst), dst.low_fp());
2361     } else if (memtype == MachineType::Uint8()) {
2362       vld1(Neon8, NeonListOperand(dst.low_fp()),
2363            NeonMemOperand(actual_src_addr));
2364       vmovl(NeonU8, liftoff::GetSimd128Register(dst), dst.low_fp());
2365     } else if (memtype == MachineType::Int16()) {
2366       vld1(Neon16, NeonListOperand(dst.low_fp()),
2367            NeonMemOperand(actual_src_addr));
2368       vmovl(NeonS16, liftoff::GetSimd128Register(dst), dst.low_fp());
2369     } else if (memtype == MachineType::Uint16()) {
2370       vld1(Neon16, NeonListOperand(dst.low_fp()),
2371            NeonMemOperand(actual_src_addr));
2372       vmovl(NeonU16, liftoff::GetSimd128Register(dst), dst.low_fp());
2373     } else if (memtype == MachineType::Int32()) {
2374       vld1(Neon32, NeonListOperand(dst.low_fp()),
2375            NeonMemOperand(actual_src_addr));
2376       vmovl(NeonS32, liftoff::GetSimd128Register(dst), dst.low_fp());
2377     } else if (memtype == MachineType::Uint32()) {
2378       vld1(Neon32, NeonListOperand(dst.low_fp()),
2379            NeonMemOperand(actual_src_addr));
2380       vmovl(NeonU32, liftoff::GetSimd128Register(dst), dst.low_fp());
2381     }
2382   } else if (transform == LoadTransformationKind::kZeroExtend) {
2383     Simd128Register dest = liftoff::GetSimd128Register(dst);
2384     if (memtype == MachineType::Int32()) {
2385       vmov(dest, 0);
2386       vld1s(Neon32, NeonListOperand(dst.low_fp()), 0,
2387             NeonMemOperand(actual_src_addr));
2388     } else {
2389       DCHECK_EQ(MachineType::Int64(), memtype);
2390       vmov(dest.high(), 0);
2391       vld1(Neon64, NeonListOperand(dest.low()),
2392            NeonMemOperand(actual_src_addr));
2393     }
2394   } else {
2395     DCHECK_EQ(LoadTransformationKind::kSplat, transform);
2396     if (memtype == MachineType::Int8()) {
2397       vld1r(Neon8, NeonListOperand(liftoff::GetSimd128Register(dst)),
2398             NeonMemOperand(actual_src_addr));
2399     } else if (memtype == MachineType::Int16()) {
2400       vld1r(Neon16, NeonListOperand(liftoff::GetSimd128Register(dst)),
2401             NeonMemOperand(actual_src_addr));
2402     } else if (memtype == MachineType::Int32()) {
2403       vld1r(Neon32, NeonListOperand(liftoff::GetSimd128Register(dst)),
2404             NeonMemOperand(actual_src_addr));
2405     } else if (memtype == MachineType::Int64()) {
2406       vld1(Neon32, NeonListOperand(dst.low_fp()),
2407            NeonMemOperand(actual_src_addr));
2408       TurboAssembler::Move(dst.high_fp(), dst.low_fp());
2409     }
2410   }
2411 }
2412 
LoadLane(LiftoffRegister dst,LiftoffRegister src,Register addr,Register offset_reg,uintptr_t offset_imm,LoadType type,uint8_t laneidx,uint32_t * protected_load_pc)2413 void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
2414                                 Register addr, Register offset_reg,
2415                                 uintptr_t offset_imm, LoadType type,
2416                                 uint8_t laneidx, uint32_t* protected_load_pc) {
2417   UseScratchRegisterScope temps(this);
2418   Register actual_src_addr = liftoff::CalculateActualAddress(
2419       this, &temps, addr, offset_reg, offset_imm);
2420   TurboAssembler::Move(liftoff::GetSimd128Register(dst),
2421                        liftoff::GetSimd128Register(src));
2422   *protected_load_pc = pc_offset();
2423   LoadStoreLaneParams load_params(type.mem_type().representation(), laneidx);
2424   NeonListOperand dst_op =
2425       NeonListOperand(load_params.low_op ? dst.low_fp() : dst.high_fp());
2426   TurboAssembler::LoadLane(load_params.sz, dst_op, load_params.laneidx,
2427                            NeonMemOperand(actual_src_addr));
2428 }
2429 
StoreLane(Register dst,Register offset,uintptr_t offset_imm,LiftoffRegister src,StoreType type,uint8_t laneidx,uint32_t * protected_store_pc)2430 void LiftoffAssembler::StoreLane(Register dst, Register offset,
2431                                  uintptr_t offset_imm, LiftoffRegister src,
2432                                  StoreType type, uint8_t laneidx,
2433                                  uint32_t* protected_store_pc) {
2434   UseScratchRegisterScope temps(this);
2435   Register actual_dst_addr =
2436       liftoff::CalculateActualAddress(this, &temps, dst, offset, offset_imm);
2437   *protected_store_pc = pc_offset();
2438 
2439   LoadStoreLaneParams store_params(type.mem_rep(), laneidx);
2440   NeonListOperand src_op =
2441       NeonListOperand(store_params.low_op ? src.low_fp() : src.high_fp());
2442   TurboAssembler::StoreLane(store_params.sz, src_op, store_params.laneidx,
2443                             NeonMemOperand(actual_dst_addr));
2444 }
2445 
emit_i8x16_swizzle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2446 void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
2447                                           LiftoffRegister lhs,
2448                                           LiftoffRegister rhs) {
2449   UseScratchRegisterScope temps(this);
2450 
2451   NeonListOperand table(liftoff::GetSimd128Register(lhs));
2452   if (dst == lhs) {
2453     // dst will be overwritten, so keep the table somewhere else.
2454     QwNeonRegister tbl = temps.AcquireQ();
2455     TurboAssembler::Move(tbl, liftoff::GetSimd128Register(lhs));
2456     table = NeonListOperand(tbl);
2457   }
2458 
2459   vtbl(dst.low_fp(), table, rhs.low_fp());
2460   vtbl(dst.high_fp(), table, rhs.high_fp());
2461 }
2462 
emit_f64x2_splat(LiftoffRegister dst,LiftoffRegister src)2463 void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
2464                                         LiftoffRegister src) {
2465   TurboAssembler::Move(dst.low_fp(), src.fp());
2466   TurboAssembler::Move(dst.high_fp(), src.fp());
2467 }
2468 
emit_f64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)2469 void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
2470                                                LiftoffRegister lhs,
2471                                                uint8_t imm_lane_idx) {
2472   ExtractLane(dst.fp(), liftoff::GetSimd128Register(lhs), imm_lane_idx);
2473 }
2474 
emit_f64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)2475 void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
2476                                                LiftoffRegister src1,
2477                                                LiftoffRegister src2,
2478                                                uint8_t imm_lane_idx) {
2479   ReplaceLane(liftoff::GetSimd128Register(dst),
2480               liftoff::GetSimd128Register(src1), src2.fp(), imm_lane_idx);
2481 }
2482 
emit_f64x2_abs(LiftoffRegister dst,LiftoffRegister src)2483 void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
2484                                       LiftoffRegister src) {
2485   vabs(dst.low_fp(), src.low_fp());
2486   vabs(dst.high_fp(), src.high_fp());
2487 }
2488 
emit_f64x2_neg(LiftoffRegister dst,LiftoffRegister src)2489 void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
2490                                       LiftoffRegister src) {
2491   vneg(dst.low_fp(), src.low_fp());
2492   vneg(dst.high_fp(), src.high_fp());
2493 }
2494 
emit_f64x2_sqrt(LiftoffRegister dst,LiftoffRegister src)2495 void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
2496                                        LiftoffRegister src) {
2497   vsqrt(dst.low_fp(), src.low_fp());
2498   vsqrt(dst.high_fp(), src.high_fp());
2499 }
2500 
emit_f64x2_ceil(LiftoffRegister dst,LiftoffRegister src)2501 bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
2502                                        LiftoffRegister src) {
2503   if (!CpuFeatures::IsSupported(ARMv8)) {
2504     return false;
2505   }
2506 
2507   CpuFeatureScope scope(this, ARMv8);
2508   vrintp(dst.low_fp(), src.low_fp());
2509   vrintp(dst.high_fp(), src.high_fp());
2510   return true;
2511 }
2512 
emit_f64x2_floor(LiftoffRegister dst,LiftoffRegister src)2513 bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
2514                                         LiftoffRegister src) {
2515   if (!CpuFeatures::IsSupported(ARMv8)) {
2516     return false;
2517   }
2518 
2519   CpuFeatureScope scope(this, ARMv8);
2520   vrintm(dst.low_fp(), src.low_fp());
2521   vrintm(dst.high_fp(), src.high_fp());
2522   return true;
2523 }
2524 
emit_f64x2_trunc(LiftoffRegister dst,LiftoffRegister src)2525 bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
2526                                         LiftoffRegister src) {
2527   if (!CpuFeatures::IsSupported(ARMv8)) {
2528     return false;
2529   }
2530 
2531   CpuFeatureScope scope(this, ARMv8);
2532   vrintz(dst.low_fp(), src.low_fp());
2533   vrintz(dst.high_fp(), src.high_fp());
2534   return true;
2535 }
2536 
emit_f64x2_nearest_int(LiftoffRegister dst,LiftoffRegister src)2537 bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
2538                                               LiftoffRegister src) {
2539   if (!CpuFeatures::IsSupported(ARMv8)) {
2540     return false;
2541   }
2542 
2543   CpuFeatureScope scope(this, ARMv8);
2544   vrintn(dst.low_fp(), src.low_fp());
2545   vrintn(dst.high_fp(), src.high_fp());
2546   return true;
2547 }
2548 
emit_f64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2549 void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
2550                                       LiftoffRegister rhs) {
2551   vadd(dst.low_fp(), lhs.low_fp(), rhs.low_fp());
2552   vadd(dst.high_fp(), lhs.high_fp(), rhs.high_fp());
2553 }
2554 
emit_f64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2555 void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
2556                                       LiftoffRegister rhs) {
2557   vsub(dst.low_fp(), lhs.low_fp(), rhs.low_fp());
2558   vsub(dst.high_fp(), lhs.high_fp(), rhs.high_fp());
2559 }
2560 
emit_f64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2561 void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
2562                                       LiftoffRegister rhs) {
2563   vmul(dst.low_fp(), lhs.low_fp(), rhs.low_fp());
2564   vmul(dst.high_fp(), lhs.high_fp(), rhs.high_fp());
2565 }
2566 
emit_f64x2_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2567 void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
2568                                       LiftoffRegister rhs) {
2569   vdiv(dst.low_fp(), lhs.low_fp(), rhs.low_fp());
2570   vdiv(dst.high_fp(), lhs.high_fp(), rhs.high_fp());
2571 }
2572 
emit_f64x2_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2573 void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
2574                                       LiftoffRegister rhs) {
2575   Simd128Register dest = liftoff::GetSimd128Register(dst);
2576   Simd128Register left = liftoff::GetSimd128Register(lhs);
2577   Simd128Register right = liftoff::GetSimd128Register(rhs);
2578 
2579   liftoff::EmitFloatMinOrMax(this, dest.low(), left.low(), right.low(),
2580                              liftoff::MinOrMax::kMin);
2581   liftoff::EmitFloatMinOrMax(this, dest.high(), left.high(), right.high(),
2582                              liftoff::MinOrMax::kMin);
2583 }
2584 
emit_f64x2_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2585 void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
2586                                       LiftoffRegister rhs) {
2587   Simd128Register dest = liftoff::GetSimd128Register(dst);
2588   Simd128Register left = liftoff::GetSimd128Register(lhs);
2589   Simd128Register right = liftoff::GetSimd128Register(rhs);
2590 
2591   liftoff::EmitFloatMinOrMax(this, dest.low(), left.low(), right.low(),
2592                              liftoff::MinOrMax::kMax);
2593   liftoff::EmitFloatMinOrMax(this, dest.high(), left.high(), right.high(),
2594                              liftoff::MinOrMax::kMax);
2595 }
2596 
emit_f64x2_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2597 void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
2598                                        LiftoffRegister rhs) {
2599   QwNeonRegister dest = liftoff::GetSimd128Register(dst);
2600   QwNeonRegister left = liftoff::GetSimd128Register(lhs);
2601   QwNeonRegister right = liftoff::GetSimd128Register(rhs);
2602 
2603   if (dst != rhs) {
2604     vmov(dest, left);
2605   }
2606 
2607   VFPCompareAndSetFlags(right.low(), left.low());
2608   vmov(dest.low(), right.low(), mi);
2609   VFPCompareAndSetFlags(right.high(), left.high());
2610   vmov(dest.high(), right.high(), mi);
2611 }
2612 
emit_f64x2_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2613 void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
2614                                        LiftoffRegister rhs) {
2615   QwNeonRegister dest = liftoff::GetSimd128Register(dst);
2616   QwNeonRegister left = liftoff::GetSimd128Register(lhs);
2617   QwNeonRegister right = liftoff::GetSimd128Register(rhs);
2618 
2619   if (dst != rhs) {
2620     vmov(dest, left);
2621   }
2622 
2623   VFPCompareAndSetFlags(right.low(), left.low());
2624   vmov(dest.low(), right.low(), gt);
2625   VFPCompareAndSetFlags(right.high(), left.high());
2626   vmov(dest.high(), right.high(), gt);
2627 }
2628 
emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src)2629 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
2630                                                       LiftoffRegister src) {
2631   F64x2ConvertLowI32x4S(liftoff::GetSimd128Register(dst),
2632                         liftoff::GetSimd128Register(src));
2633 }
2634 
emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src)2635 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
2636                                                       LiftoffRegister src) {
2637   F64x2ConvertLowI32x4U(liftoff::GetSimd128Register(dst),
2638                         liftoff::GetSimd128Register(src));
2639 }
2640 
emit_f64x2_promote_low_f32x4(LiftoffRegister dst,LiftoffRegister src)2641 void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
2642                                                     LiftoffRegister src) {
2643   F64x2PromoteLowF32x4(liftoff::GetSimd128Register(dst),
2644                        liftoff::GetSimd128Register(src));
2645 }
2646 
emit_f32x4_splat(LiftoffRegister dst,LiftoffRegister src)2647 void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
2648                                         LiftoffRegister src) {
2649   vdup(Neon32, liftoff::GetSimd128Register(dst), src.fp(), 0);
2650 }
2651 
emit_f32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)2652 void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
2653                                                LiftoffRegister lhs,
2654                                                uint8_t imm_lane_idx) {
2655   ExtractLane(liftoff::GetFloatRegister(dst.fp()),
2656               liftoff::GetSimd128Register(lhs), imm_lane_idx);
2657 }
2658 
emit_f32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)2659 void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
2660                                                LiftoffRegister src1,
2661                                                LiftoffRegister src2,
2662                                                uint8_t imm_lane_idx) {
2663   ReplaceLane(liftoff::GetSimd128Register(dst),
2664               liftoff::GetSimd128Register(src1),
2665               liftoff::GetFloatRegister(src2.fp()), imm_lane_idx);
2666 }
2667 
emit_f32x4_abs(LiftoffRegister dst,LiftoffRegister src)2668 void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
2669                                       LiftoffRegister src) {
2670   vabs(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
2671 }
2672 
emit_f32x4_neg(LiftoffRegister dst,LiftoffRegister src)2673 void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
2674                                       LiftoffRegister src) {
2675   vneg(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
2676 }
2677 
emit_f32x4_sqrt(LiftoffRegister dst,LiftoffRegister src)2678 void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
2679                                        LiftoffRegister src) {
2680   // The list of d registers available to us is from d0 to d15, which always
2681   // maps to 2 s registers.
2682   LowDwVfpRegister dst_low = LowDwVfpRegister::from_code(dst.low_fp().code());
2683   LowDwVfpRegister src_low = LowDwVfpRegister::from_code(src.low_fp().code());
2684 
2685   LowDwVfpRegister dst_high = LowDwVfpRegister::from_code(dst.high_fp().code());
2686   LowDwVfpRegister src_high = LowDwVfpRegister::from_code(src.high_fp().code());
2687 
2688   vsqrt(dst_low.low(), src_low.low());
2689   vsqrt(dst_low.high(), src_low.high());
2690   vsqrt(dst_high.low(), src_high.low());
2691   vsqrt(dst_high.high(), src_high.high());
2692 }
2693 
emit_f32x4_ceil(LiftoffRegister dst,LiftoffRegister src)2694 bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
2695                                        LiftoffRegister src) {
2696   if (!CpuFeatures::IsSupported(ARMv8)) {
2697     return false;
2698   }
2699 
2700   CpuFeatureScope scope(this, ARMv8);
2701   vrintp(NeonS32, liftoff::GetSimd128Register(dst),
2702          liftoff::GetSimd128Register(src));
2703   return true;
2704 }
2705 
emit_f32x4_floor(LiftoffRegister dst,LiftoffRegister src)2706 bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
2707                                         LiftoffRegister src) {
2708   if (!CpuFeatures::IsSupported(ARMv8)) {
2709     return false;
2710   }
2711 
2712   CpuFeatureScope scope(this, ARMv8);
2713   vrintm(NeonS32, liftoff::GetSimd128Register(dst),
2714          liftoff::GetSimd128Register(src));
2715   return true;
2716 }
2717 
emit_f32x4_trunc(LiftoffRegister dst,LiftoffRegister src)2718 bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
2719                                         LiftoffRegister src) {
2720   if (!CpuFeatures::IsSupported(ARMv8)) {
2721     return false;
2722   }
2723 
2724   CpuFeatureScope scope(this, ARMv8);
2725   vrintz(NeonS32, liftoff::GetSimd128Register(dst),
2726          liftoff::GetSimd128Register(src));
2727   return true;
2728 }
2729 
emit_f32x4_nearest_int(LiftoffRegister dst,LiftoffRegister src)2730 bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
2731                                               LiftoffRegister src) {
2732   if (!CpuFeatures::IsSupported(ARMv8)) {
2733     return false;
2734   }
2735 
2736   CpuFeatureScope scope(this, ARMv8);
2737   vrintn(NeonS32, liftoff::GetSimd128Register(dst),
2738          liftoff::GetSimd128Register(src));
2739   return true;
2740 }
2741 
emit_f32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2742 void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
2743                                       LiftoffRegister rhs) {
2744   vadd(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
2745        liftoff::GetSimd128Register(rhs));
2746 }
2747 
emit_f32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2748 void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
2749                                       LiftoffRegister rhs) {
2750   vsub(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
2751        liftoff::GetSimd128Register(rhs));
2752 }
2753 
emit_f32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2754 void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
2755                                       LiftoffRegister rhs) {
2756   vmul(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
2757        liftoff::GetSimd128Register(rhs));
2758 }
2759 
emit_f32x4_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2760 void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
2761                                       LiftoffRegister rhs) {
2762   // The list of d registers available to us is from d0 to d15, which always
2763   // maps to 2 s registers.
2764   LowDwVfpRegister dst_low = LowDwVfpRegister::from_code(dst.low_fp().code());
2765   LowDwVfpRegister lhs_low = LowDwVfpRegister::from_code(lhs.low_fp().code());
2766   LowDwVfpRegister rhs_low = LowDwVfpRegister::from_code(rhs.low_fp().code());
2767 
2768   LowDwVfpRegister dst_high = LowDwVfpRegister::from_code(dst.high_fp().code());
2769   LowDwVfpRegister lhs_high = LowDwVfpRegister::from_code(lhs.high_fp().code());
2770   LowDwVfpRegister rhs_high = LowDwVfpRegister::from_code(rhs.high_fp().code());
2771 
2772   vdiv(dst_low.low(), lhs_low.low(), rhs_low.low());
2773   vdiv(dst_low.high(), lhs_low.high(), rhs_low.high());
2774   vdiv(dst_high.low(), lhs_high.low(), rhs_high.low());
2775   vdiv(dst_high.high(), lhs_high.high(), rhs_high.high());
2776 }
2777 
emit_f32x4_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2778 void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
2779                                       LiftoffRegister rhs) {
2780   vmin(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
2781        liftoff::GetSimd128Register(rhs));
2782 }
2783 
emit_f32x4_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2784 void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
2785                                       LiftoffRegister rhs) {
2786   vmax(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
2787        liftoff::GetSimd128Register(rhs));
2788 }
2789 
emit_f32x4_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2790 void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
2791                                        LiftoffRegister rhs) {
2792   UseScratchRegisterScope temps(this);
2793 
2794   QwNeonRegister tmp = liftoff::GetSimd128Register(dst);
2795   if (dst == lhs || dst == rhs) {
2796     tmp = temps.AcquireQ();
2797   }
2798 
2799   QwNeonRegister left = liftoff::GetSimd128Register(lhs);
2800   QwNeonRegister right = liftoff::GetSimd128Register(rhs);
2801   vcgt(tmp, left, right);
2802   vbsl(tmp, right, left);
2803 
2804   if (dst == lhs || dst == rhs) {
2805     vmov(liftoff::GetSimd128Register(dst), tmp);
2806   }
2807 }
2808 
emit_f32x4_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2809 void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
2810                                        LiftoffRegister rhs) {
2811   UseScratchRegisterScope temps(this);
2812 
2813   QwNeonRegister tmp = liftoff::GetSimd128Register(dst);
2814   if (dst == lhs || dst == rhs) {
2815     tmp = temps.AcquireQ();
2816   }
2817 
2818   QwNeonRegister left = liftoff::GetSimd128Register(lhs);
2819   QwNeonRegister right = liftoff::GetSimd128Register(rhs);
2820   vcgt(tmp, right, left);
2821   vbsl(tmp, right, left);
2822 
2823   if (dst == lhs || dst == rhs) {
2824     vmov(liftoff::GetSimd128Register(dst), tmp);
2825   }
2826 }
2827 
emit_i64x2_splat(LiftoffRegister dst,LiftoffRegister src)2828 void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
2829                                         LiftoffRegister src) {
2830   Simd128Register dst_simd = liftoff::GetSimd128Register(dst);
2831   vdup(Neon32, dst_simd, src.low_gp());
2832   ReplaceLane(dst_simd, dst_simd, src.high_gp(), NeonS32, 1);
2833   ReplaceLane(dst_simd, dst_simd, src.high_gp(), NeonS32, 3);
2834 }
2835 
emit_i64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)2836 void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
2837                                                LiftoffRegister lhs,
2838                                                uint8_t imm_lane_idx) {
2839   ExtractLane(dst.low_gp(), liftoff::GetSimd128Register(lhs), NeonS32,
2840               imm_lane_idx * 2);
2841   ExtractLane(dst.high_gp(), liftoff::GetSimd128Register(lhs), NeonS32,
2842               imm_lane_idx * 2 + 1);
2843 }
2844 
emit_i64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)2845 void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
2846                                                LiftoffRegister src1,
2847                                                LiftoffRegister src2,
2848                                                uint8_t imm_lane_idx) {
2849   Simd128Register dst_simd = liftoff::GetSimd128Register(dst);
2850   Simd128Register src1_simd = liftoff::GetSimd128Register(src1);
2851   ReplaceLane(dst_simd, src1_simd, src2.low_gp(), NeonS32, imm_lane_idx * 2);
2852   ReplaceLane(dst_simd, dst_simd, src2.high_gp(), NeonS32,
2853               imm_lane_idx * 2 + 1);
2854 }
2855 
emit_i64x2_neg(LiftoffRegister dst,LiftoffRegister src)2856 void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
2857                                       LiftoffRegister src) {
2858   UseScratchRegisterScope temps(this);
2859   QwNeonRegister zero =
2860       dst == src ? temps.AcquireQ() : liftoff::GetSimd128Register(dst);
2861   vmov(zero, uint64_t{0});
2862   vsub(Neon64, liftoff::GetSimd128Register(dst), zero,
2863        liftoff::GetSimd128Register(src));
2864 }
2865 
emit_i64x2_alltrue(LiftoffRegister dst,LiftoffRegister src)2866 void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst,
2867                                           LiftoffRegister src) {
2868   I64x2AllTrue(dst.gp(), liftoff::GetSimd128Register(src));
2869 }
2870 
emit_i64x2_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2871 void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
2872                                       LiftoffRegister rhs) {
2873   liftoff::EmitSimdShift<liftoff::kLeft, NeonS64, Neon32>(this, dst, lhs, rhs);
2874 }
2875 
emit_i64x2_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2876 void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
2877                                        int32_t rhs) {
2878   vshl(NeonS64, liftoff::GetSimd128Register(dst),
2879        liftoff::GetSimd128Register(lhs), rhs & 63);
2880 }
2881 
emit_i64x2_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2882 void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
2883                                         LiftoffRegister lhs,
2884                                         LiftoffRegister rhs) {
2885   liftoff::EmitSimdShift<liftoff::kRight, NeonS64, Neon32>(this, dst, lhs, rhs);
2886 }
2887 
emit_i64x2_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2888 void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
2889                                          LiftoffRegister lhs, int32_t rhs) {
2890   liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS64>(this, dst, lhs,
2891                                                             rhs);
2892 }
2893 
emit_i64x2_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2894 void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
2895                                         LiftoffRegister lhs,
2896                                         LiftoffRegister rhs) {
2897   liftoff::EmitSimdShift<liftoff::kRight, NeonU64, Neon32>(this, dst, lhs, rhs);
2898 }
2899 
emit_i64x2_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2900 void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
2901                                          LiftoffRegister lhs, int32_t rhs) {
2902   liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU64>(this, dst, lhs,
2903                                                             rhs);
2904 }
2905 
emit_i64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2906 void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
2907                                       LiftoffRegister rhs) {
2908   vadd(Neon64, liftoff::GetSimd128Register(dst),
2909        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
2910 }
2911 
emit_i64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2912 void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
2913                                       LiftoffRegister rhs) {
2914   vsub(Neon64, liftoff::GetSimd128Register(dst),
2915        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
2916 }
2917 
emit_i64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2918 void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
2919                                       LiftoffRegister rhs) {
2920   UseScratchRegisterScope temps(this);
2921 
2922   QwNeonRegister dst_neon = liftoff::GetSimd128Register(dst);
2923   QwNeonRegister left = liftoff::GetSimd128Register(lhs);
2924   QwNeonRegister right = liftoff::GetSimd128Register(rhs);
2925 
2926   // These temporary registers will be modified. We can directly modify lhs and
2927   // rhs if they are not uesd, saving on temporaries.
2928   QwNeonRegister tmp1 = left;
2929   QwNeonRegister tmp2 = right;
2930 
2931   LiftoffRegList used_plus_dst =
2932       cache_state()->used_registers | LiftoffRegList{dst};
2933 
2934   if (used_plus_dst.has(lhs) && used_plus_dst.has(rhs)) {
2935     tmp1 = temps.AcquireQ();
2936     // We only have 1 scratch Q register, so acquire another ourselves.
2937     LiftoffRegList pinned = {dst};
2938     LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
2939     tmp2 = liftoff::GetSimd128Register(unused_pair);
2940   } else if (used_plus_dst.has(lhs)) {
2941     tmp1 = temps.AcquireQ();
2942   } else if (used_plus_dst.has(rhs)) {
2943     tmp2 = temps.AcquireQ();
2944   }
2945 
2946   // Algorithm from code-generator-arm.cc, refer to comments there for details.
2947   if (tmp1 != left) {
2948     vmov(tmp1, left);
2949   }
2950   if (tmp2 != right) {
2951     vmov(tmp2, right);
2952   }
2953 
2954   vtrn(Neon32, tmp1.low(), tmp1.high());
2955   vtrn(Neon32, tmp2.low(), tmp2.high());
2956 
2957   vmull(NeonU32, dst_neon, tmp1.low(), tmp2.high());
2958   vmlal(NeonU32, dst_neon, tmp1.high(), tmp2.low());
2959   vshl(NeonU64, dst_neon, dst_neon, 32);
2960 
2961   vmlal(NeonU32, dst_neon, tmp1.low(), tmp2.low());
2962 }
2963 
emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2964 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
2965                                                      LiftoffRegister src1,
2966                                                      LiftoffRegister src2) {
2967   vmull(NeonS32, liftoff::GetSimd128Register(dst), src1.low_fp(),
2968         src2.low_fp());
2969 }
2970 
emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2971 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
2972                                                      LiftoffRegister src1,
2973                                                      LiftoffRegister src2) {
2974   vmull(NeonU32, liftoff::GetSimd128Register(dst), src1.low_fp(),
2975         src2.low_fp());
2976 }
2977 
emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2978 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
2979                                                       LiftoffRegister src1,
2980                                                       LiftoffRegister src2) {
2981   vmull(NeonS32, liftoff::GetSimd128Register(dst), src1.high_fp(),
2982         src2.high_fp());
2983 }
2984 
emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2985 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
2986                                                       LiftoffRegister src1,
2987                                                       LiftoffRegister src2) {
2988   vmull(NeonU32, liftoff::GetSimd128Register(dst), src1.high_fp(),
2989         src2.high_fp());
2990 }
2991 
emit_i64x2_bitmask(LiftoffRegister dst,LiftoffRegister src)2992 void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
2993                                           LiftoffRegister src) {
2994   I64x2BitMask(dst.gp(), liftoff::GetSimd128Register(src));
2995 }
2996 
emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)2997 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
2998                                                      LiftoffRegister src) {
2999   vmovl(NeonS32, liftoff::GetSimd128Register(dst), src.low_fp());
3000 }
3001 
emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)3002 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
3003                                                       LiftoffRegister src) {
3004   vmovl(NeonS32, liftoff::GetSimd128Register(dst), src.high_fp());
3005 }
3006 
emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)3007 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
3008                                                      LiftoffRegister src) {
3009   vmovl(NeonU32, liftoff::GetSimd128Register(dst), src.low_fp());
3010 }
3011 
emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)3012 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
3013                                                       LiftoffRegister src) {
3014   vmovl(NeonU32, liftoff::GetSimd128Register(dst), src.high_fp());
3015 }
3016 
emit_i32x4_splat(LiftoffRegister dst,LiftoffRegister src)3017 void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
3018                                         LiftoffRegister src) {
3019   vdup(Neon32, liftoff::GetSimd128Register(dst), src.gp());
3020 }
3021 
emit_i32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3022 void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
3023                                                LiftoffRegister lhs,
3024                                                uint8_t imm_lane_idx) {
3025   ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS32,
3026               imm_lane_idx);
3027 }
3028 
emit_i32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3029 void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
3030                                                LiftoffRegister src1,
3031                                                LiftoffRegister src2,
3032                                                uint8_t imm_lane_idx) {
3033   ReplaceLane(liftoff::GetSimd128Register(dst),
3034               liftoff::GetSimd128Register(src1), src2.gp(), NeonS32,
3035               imm_lane_idx);
3036 }
3037 
emit_i32x4_neg(LiftoffRegister dst,LiftoffRegister src)3038 void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
3039                                       LiftoffRegister src) {
3040   vneg(Neon32, liftoff::GetSimd128Register(dst),
3041        liftoff::GetSimd128Register(src));
3042 }
3043 
emit_i32x4_alltrue(LiftoffRegister dst,LiftoffRegister src)3044 void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
3045                                           LiftoffRegister src) {
3046   UseScratchRegisterScope temps(this);
3047   DwVfpRegister scratch = temps.AcquireD();
3048   vpmin(NeonU32, scratch, src.low_fp(), src.high_fp());
3049   vpmin(NeonU32, scratch, scratch, scratch);
3050   ExtractLane(dst.gp(), scratch, NeonS32, 0);
3051   cmp(dst.gp(), Operand(0));
3052   mov(dst.gp(), Operand(1), LeaveCC, ne);
3053 }
3054 
emit_i32x4_bitmask(LiftoffRegister dst,LiftoffRegister src)3055 void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
3056                                           LiftoffRegister src) {
3057   UseScratchRegisterScope temps(this);
3058   Simd128Register tmp = liftoff::GetSimd128Register(src);
3059   Simd128Register mask = temps.AcquireQ();
3060 
3061   if (cache_state()->is_used(src)) {
3062     // We only have 1 scratch Q register, so try and reuse src.
3063     LiftoffRegList pinned = {src};
3064     LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
3065     mask = liftoff::GetSimd128Register(unused_pair);
3066   }
3067 
3068   vshr(NeonS32, tmp, liftoff::GetSimd128Register(src), 31);
3069   // Set i-th bit of each lane i. When AND with tmp, the lanes that
3070   // are signed will have i-th bit set, unsigned will be 0.
3071   vmov(mask.low(), base::Double((uint64_t)0x0000'0002'0000'0001));
3072   vmov(mask.high(), base::Double((uint64_t)0x0000'0008'0000'0004));
3073   vand(tmp, mask, tmp);
3074   vpadd(Neon32, tmp.low(), tmp.low(), tmp.high());
3075   vpadd(Neon32, tmp.low(), tmp.low(), kDoubleRegZero);
3076   VmovLow(dst.gp(), tmp.low());
3077 }
3078 
emit_i32x4_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3079 void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
3080                                       LiftoffRegister rhs) {
3081   liftoff::EmitSimdShift<liftoff::kLeft, NeonS32, Neon32>(this, dst, lhs, rhs);
3082 }
3083 
emit_i32x4_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3084 void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
3085                                        int32_t rhs) {
3086   vshl(NeonS32, liftoff::GetSimd128Register(dst),
3087        liftoff::GetSimd128Register(lhs), rhs & 31);
3088 }
3089 
emit_i32x4_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3090 void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
3091                                         LiftoffRegister lhs,
3092                                         LiftoffRegister rhs) {
3093   liftoff::EmitSimdShift<liftoff::kRight, NeonS32, Neon32>(this, dst, lhs, rhs);
3094 }
3095 
emit_i32x4_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3096 void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
3097                                          LiftoffRegister lhs, int32_t rhs) {
3098   liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS32>(this, dst, lhs,
3099                                                             rhs);
3100 }
3101 
emit_i32x4_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3102 void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
3103                                         LiftoffRegister lhs,
3104                                         LiftoffRegister rhs) {
3105   liftoff::EmitSimdShift<liftoff::kRight, NeonU32, Neon32>(this, dst, lhs, rhs);
3106 }
3107 
emit_i32x4_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3108 void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
3109                                          LiftoffRegister lhs, int32_t rhs) {
3110   liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU32>(this, dst, lhs,
3111                                                             rhs);
3112 }
3113 
emit_i32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3114 void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3115                                       LiftoffRegister rhs) {
3116   vadd(Neon32, liftoff::GetSimd128Register(dst),
3117        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3118 }
3119 
emit_i32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3120 void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3121                                       LiftoffRegister rhs) {
3122   vsub(Neon32, liftoff::GetSimd128Register(dst),
3123        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3124 }
3125 
emit_i32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3126 void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3127                                       LiftoffRegister rhs) {
3128   vmul(Neon32, liftoff::GetSimd128Register(dst),
3129        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3130 }
3131 
emit_i32x4_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3132 void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
3133                                         LiftoffRegister lhs,
3134                                         LiftoffRegister rhs) {
3135   vmin(NeonS32, liftoff::GetSimd128Register(dst),
3136        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3137 }
3138 
emit_i32x4_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3139 void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
3140                                         LiftoffRegister lhs,
3141                                         LiftoffRegister rhs) {
3142   vmin(NeonU32, liftoff::GetSimd128Register(dst),
3143        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3144 }
3145 
emit_i32x4_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3146 void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
3147                                         LiftoffRegister lhs,
3148                                         LiftoffRegister rhs) {
3149   vmax(NeonS32, liftoff::GetSimd128Register(dst),
3150        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3151 }
3152 
emit_i32x4_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3153 void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
3154                                         LiftoffRegister lhs,
3155                                         LiftoffRegister rhs) {
3156   vmax(NeonU32, liftoff::GetSimd128Register(dst),
3157        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3158 }
3159 
emit_i32x4_dot_i16x8_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3160 void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
3161                                               LiftoffRegister lhs,
3162                                               LiftoffRegister rhs) {
3163   QwNeonRegister dest = liftoff::GetSimd128Register(dst);
3164   QwNeonRegister left = liftoff::GetSimd128Register(lhs);
3165   QwNeonRegister right = liftoff::GetSimd128Register(rhs);
3166 
3167   UseScratchRegisterScope temps(this);
3168   Simd128Register scratch = temps.AcquireQ();
3169 
3170   vmull(NeonS16, scratch, left.low(), right.low());
3171   vpadd(Neon32, dest.low(), scratch.low(), scratch.high());
3172 
3173   vmull(NeonS16, scratch, left.high(), right.high());
3174   vpadd(Neon32, dest.high(), scratch.low(), scratch.high());
3175 }
3176 
emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,LiftoffRegister src)3177 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
3178                                                           LiftoffRegister src) {
3179   vpaddl(NeonS16, liftoff::GetSimd128Register(dst),
3180          liftoff::GetSimd128Register(src));
3181 }
3182 
emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,LiftoffRegister src)3183 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
3184                                                           LiftoffRegister src) {
3185   vpaddl(NeonU16, liftoff::GetSimd128Register(dst),
3186          liftoff::GetSimd128Register(src));
3187 }
3188 
emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3189 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
3190                                                      LiftoffRegister src1,
3191                                                      LiftoffRegister src2) {
3192   vmull(NeonS16, liftoff::GetSimd128Register(dst), src1.low_fp(),
3193         src2.low_fp());
3194 }
3195 
emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3196 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
3197                                                      LiftoffRegister src1,
3198                                                      LiftoffRegister src2) {
3199   vmull(NeonU16, liftoff::GetSimd128Register(dst), src1.low_fp(),
3200         src2.low_fp());
3201 }
3202 
emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3203 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
3204                                                       LiftoffRegister src1,
3205                                                       LiftoffRegister src2) {
3206   vmull(NeonS16, liftoff::GetSimd128Register(dst), src1.high_fp(),
3207         src2.high_fp());
3208 }
3209 
emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3210 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
3211                                                       LiftoffRegister src1,
3212                                                       LiftoffRegister src2) {
3213   vmull(NeonU16, liftoff::GetSimd128Register(dst), src1.high_fp(),
3214         src2.high_fp());
3215 }
3216 
emit_i16x8_splat(LiftoffRegister dst,LiftoffRegister src)3217 void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
3218                                         LiftoffRegister src) {
3219   vdup(Neon16, liftoff::GetSimd128Register(dst), src.gp());
3220 }
3221 
emit_i16x8_neg(LiftoffRegister dst,LiftoffRegister src)3222 void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
3223                                       LiftoffRegister src) {
3224   vneg(Neon16, liftoff::GetSimd128Register(dst),
3225        liftoff::GetSimd128Register(src));
3226 }
3227 
emit_i16x8_alltrue(LiftoffRegister dst,LiftoffRegister src)3228 void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
3229                                           LiftoffRegister src) {
3230   UseScratchRegisterScope temps(this);
3231   DwVfpRegister scratch = temps.AcquireD();
3232   vpmin(NeonU16, scratch, src.low_fp(), src.high_fp());
3233   vpmin(NeonU16, scratch, scratch, scratch);
3234   vpmin(NeonU16, scratch, scratch, scratch);
3235   ExtractLane(dst.gp(), scratch, NeonS16, 0);
3236   cmp(dst.gp(), Operand(0));
3237   mov(dst.gp(), Operand(1), LeaveCC, ne);
3238 }
3239 
emit_i16x8_bitmask(LiftoffRegister dst,LiftoffRegister src)3240 void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
3241                                           LiftoffRegister src) {
3242   UseScratchRegisterScope temps(this);
3243   Simd128Register tmp = liftoff::GetSimd128Register(src);
3244   Simd128Register mask = temps.AcquireQ();
3245 
3246   if (cache_state()->is_used(src)) {
3247     // We only have 1 scratch Q register, so try and reuse src.
3248     LiftoffRegList pinned = {src};
3249     LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
3250     mask = liftoff::GetSimd128Register(unused_pair);
3251   }
3252 
3253   vshr(NeonS16, tmp, liftoff::GetSimd128Register(src), 15);
3254   // Set i-th bit of each lane i. When AND with tmp, the lanes that
3255   // are signed will have i-th bit set, unsigned will be 0.
3256   vmov(mask.low(), base::Double((uint64_t)0x0008'0004'0002'0001));
3257   vmov(mask.high(), base::Double((uint64_t)0x0080'0040'0020'0010));
3258   vand(tmp, mask, tmp);
3259   vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
3260   vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
3261   vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
3262   vmov(NeonU16, dst.gp(), tmp.low(), 0);
3263 }
3264 
emit_i16x8_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3265 void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
3266                                       LiftoffRegister rhs) {
3267   liftoff::EmitSimdShift<liftoff::kLeft, NeonS16, Neon16>(this, dst, lhs, rhs);
3268 }
3269 
emit_i16x8_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3270 void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
3271                                        int32_t rhs) {
3272   vshl(NeonS16, liftoff::GetSimd128Register(dst),
3273        liftoff::GetSimd128Register(lhs), rhs & 15);
3274 }
3275 
emit_i16x8_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3276 void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
3277                                         LiftoffRegister lhs,
3278                                         LiftoffRegister rhs) {
3279   liftoff::EmitSimdShift<liftoff::kRight, NeonS16, Neon16>(this, dst, lhs, rhs);
3280 }
3281 
emit_i16x8_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3282 void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
3283                                          LiftoffRegister lhs, int32_t rhs) {
3284   liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS16>(this, dst, lhs,
3285                                                             rhs);
3286 }
3287 
emit_i16x8_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3288 void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
3289                                         LiftoffRegister lhs,
3290                                         LiftoffRegister rhs) {
3291   liftoff::EmitSimdShift<liftoff::kRight, NeonU16, Neon16>(this, dst, lhs, rhs);
3292 }
3293 
emit_i16x8_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3294 void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
3295                                          LiftoffRegister lhs, int32_t rhs) {
3296   liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU16>(this, dst, lhs,
3297                                                             rhs);
3298 }
3299 
emit_i16x8_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3300 void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
3301                                       LiftoffRegister rhs) {
3302   vadd(Neon16, liftoff::GetSimd128Register(dst),
3303        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3304 }
3305 
emit_i16x8_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3306 void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
3307                                             LiftoffRegister lhs,
3308                                             LiftoffRegister rhs) {
3309   vqadd(NeonS16, liftoff::GetSimd128Register(dst),
3310         liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3311 }
3312 
emit_i16x8_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3313 void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
3314                                       LiftoffRegister rhs) {
3315   vsub(Neon16, liftoff::GetSimd128Register(dst),
3316        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3317 }
3318 
emit_i16x8_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3319 void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
3320                                             LiftoffRegister lhs,
3321                                             LiftoffRegister rhs) {
3322   vqsub(NeonS16, liftoff::GetSimd128Register(dst),
3323         liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3324 }
3325 
emit_i16x8_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3326 void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
3327                                             LiftoffRegister lhs,
3328                                             LiftoffRegister rhs) {
3329   vqsub(NeonU16, liftoff::GetSimd128Register(dst),
3330         liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3331 }
3332 
emit_i16x8_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3333 void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
3334                                       LiftoffRegister rhs) {
3335   vmul(Neon16, liftoff::GetSimd128Register(dst),
3336        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3337 }
3338 
emit_i16x8_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3339 void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
3340                                             LiftoffRegister lhs,
3341                                             LiftoffRegister rhs) {
3342   vqadd(NeonU16, liftoff::GetSimd128Register(dst),
3343         liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3344 }
3345 
emit_i16x8_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3346 void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
3347                                         LiftoffRegister lhs,
3348                                         LiftoffRegister rhs) {
3349   vmin(NeonS16, liftoff::GetSimd128Register(dst),
3350        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3351 }
3352 
emit_i16x8_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3353 void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
3354                                         LiftoffRegister lhs,
3355                                         LiftoffRegister rhs) {
3356   vmin(NeonU16, liftoff::GetSimd128Register(dst),
3357        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3358 }
3359 
emit_i16x8_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3360 void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
3361                                         LiftoffRegister lhs,
3362                                         LiftoffRegister rhs) {
3363   vmax(NeonS16, liftoff::GetSimd128Register(dst),
3364        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3365 }
3366 
emit_i16x8_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3367 void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
3368                                         LiftoffRegister lhs,
3369                                         LiftoffRegister rhs) {
3370   vmax(NeonU16, liftoff::GetSimd128Register(dst),
3371        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3372 }
3373 
emit_i16x8_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3374 void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
3375                                                  LiftoffRegister lhs,
3376                                                  uint8_t imm_lane_idx) {
3377   ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonU16,
3378               imm_lane_idx);
3379 }
3380 
emit_i16x8_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3381 void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
3382                                                  LiftoffRegister lhs,
3383                                                  uint8_t imm_lane_idx) {
3384   ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS16,
3385               imm_lane_idx);
3386 }
3387 
emit_i16x8_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3388 void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
3389                                                LiftoffRegister src1,
3390                                                LiftoffRegister src2,
3391                                                uint8_t imm_lane_idx) {
3392   ReplaceLane(liftoff::GetSimd128Register(dst),
3393               liftoff::GetSimd128Register(src1), src2.gp(), NeonS16,
3394               imm_lane_idx);
3395 }
3396 
emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,LiftoffRegister src)3397 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
3398                                                           LiftoffRegister src) {
3399   vpaddl(NeonS8, liftoff::GetSimd128Register(dst),
3400          liftoff::GetSimd128Register(src));
3401 }
3402 
emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,LiftoffRegister src)3403 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
3404                                                           LiftoffRegister src) {
3405   vpaddl(NeonU8, liftoff::GetSimd128Register(dst),
3406          liftoff::GetSimd128Register(src));
3407 }
3408 
emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3409 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
3410                                                      LiftoffRegister src1,
3411                                                      LiftoffRegister src2) {
3412   vmull(NeonS8, liftoff::GetSimd128Register(dst), src1.low_fp(), src2.low_fp());
3413 }
3414 
emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3415 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
3416                                                      LiftoffRegister src1,
3417                                                      LiftoffRegister src2) {
3418   vmull(NeonU8, liftoff::GetSimd128Register(dst), src1.low_fp(), src2.low_fp());
3419 }
3420 
emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3421 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
3422                                                       LiftoffRegister src1,
3423                                                       LiftoffRegister src2) {
3424   vmull(NeonS8, liftoff::GetSimd128Register(dst), src1.high_fp(),
3425         src2.high_fp());
3426 }
3427 
emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3428 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
3429                                                       LiftoffRegister src1,
3430                                                       LiftoffRegister src2) {
3431   vmull(NeonU8, liftoff::GetSimd128Register(dst), src1.high_fp(),
3432         src2.high_fp());
3433 }
3434 
emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3435 void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
3436                                                 LiftoffRegister src1,
3437                                                 LiftoffRegister src2) {
3438   vqrdmulh(NeonS16, liftoff::GetSimd128Register(dst),
3439            liftoff::GetSimd128Register(src1),
3440            liftoff::GetSimd128Register(src2));
3441 }
3442 
emit_i8x16_shuffle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,const uint8_t shuffle[16],bool is_swizzle)3443 void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
3444                                           LiftoffRegister lhs,
3445                                           LiftoffRegister rhs,
3446                                           const uint8_t shuffle[16],
3447                                           bool is_swizzle) {
3448   Simd128Register dest = liftoff::GetSimd128Register(dst);
3449   Simd128Register src1 = liftoff::GetSimd128Register(lhs);
3450   Simd128Register src2 = liftoff::GetSimd128Register(rhs);
3451   UseScratchRegisterScope temps(this);
3452   Simd128Register scratch = temps.AcquireQ();
3453   if ((src1 != src2) && src1.code() + 1 != src2.code()) {
3454     // vtbl requires the operands to be consecutive or the same.
3455     // If they are the same, we build a smaller list operand (table_size = 2).
3456     // If they are not the same, and not consecutive, we move the src1 and src2
3457     // to q14 and q15, which will be unused since they are not allocatable in
3458     // Liftoff. If the operands are the same, then we build a smaller list
3459     // operand below.
3460     static_assert(!kLiftoffAssemblerFpCacheRegs.has(d28),
3461                   "This only works if q14-q15 (d28-d31) are not used.");
3462     static_assert(!kLiftoffAssemblerFpCacheRegs.has(d29),
3463                   "This only works if q14-q15 (d28-d31) are not used.");
3464     static_assert(!kLiftoffAssemblerFpCacheRegs.has(d30),
3465                   "This only works if q14-q15 (d28-d31) are not used.");
3466     static_assert(!kLiftoffAssemblerFpCacheRegs.has(d31),
3467                   "This only works if q14-q15 (d28-d31) are not used.");
3468     vmov(q14, src1);
3469     src1 = q14;
3470     vmov(q15, src2);
3471     src2 = q15;
3472   }
3473 
3474   int table_size = src1 == src2 ? 2 : 4;
3475 
3476   int scratch_s_base = scratch.code() * 4;
3477   for (int j = 0; j < 4; j++) {
3478     uint32_t imm = 0;
3479     for (int i = 3; i >= 0; i--) {
3480       imm = (imm << 8) | shuffle[j * 4 + i];
3481     }
3482     DCHECK_EQ(0, imm & (table_size == 2 ? 0xF0F0F0F0 : 0xE0E0E0E0));
3483     // Ensure indices are in [0,15] if table_size is 2, or [0,31] if 4.
3484     vmov(SwVfpRegister::from_code(scratch_s_base + j), Float32::FromBits(imm));
3485   }
3486 
3487   DwVfpRegister table_base = src1.low();
3488   NeonListOperand table(table_base, table_size);
3489 
3490   if (dest != src1 && dest != src2) {
3491     vtbl(dest.low(), table, scratch.low());
3492     vtbl(dest.high(), table, scratch.high());
3493   } else {
3494     vtbl(scratch.low(), table, scratch.low());
3495     vtbl(scratch.high(), table, scratch.high());
3496     vmov(dest, scratch);
3497   }
3498 }
3499 
emit_i8x16_popcnt(LiftoffRegister dst,LiftoffRegister src)3500 void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
3501                                          LiftoffRegister src) {
3502   vcnt(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
3503 }
3504 
emit_i8x16_splat(LiftoffRegister dst,LiftoffRegister src)3505 void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
3506                                         LiftoffRegister src) {
3507   vdup(Neon8, liftoff::GetSimd128Register(dst), src.gp());
3508 }
3509 
emit_i8x16_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3510 void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
3511                                                  LiftoffRegister lhs,
3512                                                  uint8_t imm_lane_idx) {
3513   ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonU8, imm_lane_idx);
3514 }
3515 
emit_i8x16_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3516 void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
3517                                                  LiftoffRegister lhs,
3518                                                  uint8_t imm_lane_idx) {
3519   ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS8, imm_lane_idx);
3520 }
3521 
emit_i8x16_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3522 void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
3523                                                LiftoffRegister src1,
3524                                                LiftoffRegister src2,
3525                                                uint8_t imm_lane_idx) {
3526   ReplaceLane(liftoff::GetSimd128Register(dst),
3527               liftoff::GetSimd128Register(src1), src2.gp(), NeonS8,
3528               imm_lane_idx);
3529 }
3530 
emit_i8x16_neg(LiftoffRegister dst,LiftoffRegister src)3531 void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
3532                                       LiftoffRegister src) {
3533   vneg(Neon8, liftoff::GetSimd128Register(dst),
3534        liftoff::GetSimd128Register(src));
3535 }
3536 
emit_v128_anytrue(LiftoffRegister dst,LiftoffRegister src)3537 void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst,
3538                                          LiftoffRegister src) {
3539   liftoff::EmitAnyTrue(this, dst, src);
3540 }
3541 
emit_i8x16_alltrue(LiftoffRegister dst,LiftoffRegister src)3542 void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst,
3543                                           LiftoffRegister src) {
3544   UseScratchRegisterScope temps(this);
3545   DwVfpRegister scratch = temps.AcquireD();
3546   vpmin(NeonU8, scratch, src.low_fp(), src.high_fp());
3547   vpmin(NeonU8, scratch, scratch, scratch);
3548   vpmin(NeonU8, scratch, scratch, scratch);
3549   vpmin(NeonU8, scratch, scratch, scratch);
3550   ExtractLane(dst.gp(), scratch, NeonS8, 0);
3551   cmp(dst.gp(), Operand(0));
3552   mov(dst.gp(), Operand(1), LeaveCC, ne);
3553 }
3554 
emit_i8x16_bitmask(LiftoffRegister dst,LiftoffRegister src)3555 void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
3556                                           LiftoffRegister src) {
3557   UseScratchRegisterScope temps(this);
3558   Simd128Register tmp = liftoff::GetSimd128Register(src);
3559   Simd128Register mask = temps.AcquireQ();
3560 
3561   if (cache_state()->is_used(src)) {
3562     // We only have 1 scratch Q register, so try and reuse src.
3563     LiftoffRegList pinned = {src};
3564     LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
3565     mask = liftoff::GetSimd128Register(unused_pair);
3566   }
3567 
3568   vshr(NeonS8, tmp, liftoff::GetSimd128Register(src), 7);
3569   // Set i-th bit of each lane i. When AND with tmp, the lanes that
3570   // are signed will have i-th bit set, unsigned will be 0.
3571   vmov(mask.low(), base::Double((uint64_t)0x8040'2010'0804'0201));
3572   vmov(mask.high(), base::Double((uint64_t)0x8040'2010'0804'0201));
3573   vand(tmp, mask, tmp);
3574   vext(mask, tmp, tmp, 8);
3575   vzip(Neon8, mask, tmp);
3576   vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
3577   vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
3578   vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
3579   vmov(NeonU16, dst.gp(), tmp.low(), 0);
3580 }
3581 
emit_i8x16_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3582 void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
3583                                       LiftoffRegister rhs) {
3584   liftoff::EmitSimdShift<liftoff::kLeft, NeonS8, Neon8>(this, dst, lhs, rhs);
3585 }
3586 
emit_i8x16_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3587 void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
3588                                        int32_t rhs) {
3589   vshl(NeonS8, liftoff::GetSimd128Register(dst),
3590        liftoff::GetSimd128Register(lhs), rhs & 7);
3591 }
3592 
emit_i8x16_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3593 void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
3594                                         LiftoffRegister lhs,
3595                                         LiftoffRegister rhs) {
3596   liftoff::EmitSimdShift<liftoff::kRight, NeonS8, Neon8>(this, dst, lhs, rhs);
3597 }
3598 
emit_i8x16_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3599 void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
3600                                          LiftoffRegister lhs, int32_t rhs) {
3601   liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS8>(this, dst, lhs, rhs);
3602 }
3603 
emit_i8x16_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3604 void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
3605                                         LiftoffRegister lhs,
3606                                         LiftoffRegister rhs) {
3607   liftoff::EmitSimdShift<liftoff::kRight, NeonU8, Neon8>(this, dst, lhs, rhs);
3608 }
3609 
emit_i8x16_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3610 void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
3611                                          LiftoffRegister lhs, int32_t rhs) {
3612   liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU8>(this, dst, lhs, rhs);
3613 }
3614 
emit_i8x16_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3615 void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
3616                                       LiftoffRegister rhs) {
3617   vadd(Neon8, liftoff::GetSimd128Register(dst),
3618        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3619 }
3620 
emit_i8x16_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3621 void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
3622                                             LiftoffRegister lhs,
3623                                             LiftoffRegister rhs) {
3624   vqadd(NeonS8, liftoff::GetSimd128Register(dst),
3625         liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3626 }
3627 
emit_i8x16_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3628 void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
3629                                       LiftoffRegister rhs) {
3630   vsub(Neon8, liftoff::GetSimd128Register(dst),
3631        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3632 }
3633 
emit_i8x16_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3634 void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
3635                                             LiftoffRegister lhs,
3636                                             LiftoffRegister rhs) {
3637   vqsub(NeonS8, liftoff::GetSimd128Register(dst),
3638         liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3639 }
3640 
emit_i8x16_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3641 void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst,
3642                                             LiftoffRegister lhs,
3643                                             LiftoffRegister rhs) {
3644   vqsub(NeonU8, liftoff::GetSimd128Register(dst),
3645         liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3646 }
3647 
emit_i8x16_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3648 void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst,
3649                                             LiftoffRegister lhs,
3650                                             LiftoffRegister rhs) {
3651   vqadd(NeonU8, liftoff::GetSimd128Register(dst),
3652         liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3653 }
3654 
emit_i8x16_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3655 void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
3656                                         LiftoffRegister lhs,
3657                                         LiftoffRegister rhs) {
3658   vmin(NeonS8, liftoff::GetSimd128Register(dst),
3659        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3660 }
3661 
emit_i8x16_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3662 void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
3663                                         LiftoffRegister lhs,
3664                                         LiftoffRegister rhs) {
3665   vmin(NeonU8, liftoff::GetSimd128Register(dst),
3666        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3667 }
3668 
emit_i8x16_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3669 void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
3670                                         LiftoffRegister lhs,
3671                                         LiftoffRegister rhs) {
3672   vmax(NeonS8, liftoff::GetSimd128Register(dst),
3673        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3674 }
3675 
emit_i8x16_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3676 void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
3677                                         LiftoffRegister lhs,
3678                                         LiftoffRegister rhs) {
3679   vmax(NeonU8, liftoff::GetSimd128Register(dst),
3680        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3681 }
3682 
emit_i8x16_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3683 void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
3684                                      LiftoffRegister rhs) {
3685   vceq(Neon8, liftoff::GetSimd128Register(dst),
3686        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3687 }
3688 
emit_i8x16_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3689 void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
3690                                      LiftoffRegister rhs) {
3691   vceq(Neon8, liftoff::GetSimd128Register(dst),
3692        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3693   vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst));
3694 }
3695 
emit_i8x16_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3696 void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3697                                        LiftoffRegister rhs) {
3698   vcgt(NeonS8, liftoff::GetSimd128Register(dst),
3699        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3700 }
3701 
emit_i8x16_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3702 void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3703                                        LiftoffRegister rhs) {
3704   vcgt(NeonU8, liftoff::GetSimd128Register(dst),
3705        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3706 }
3707 
emit_i8x16_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3708 void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3709                                        LiftoffRegister rhs) {
3710   vcge(NeonS8, liftoff::GetSimd128Register(dst),
3711        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3712 }
3713 
emit_i8x16_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3714 void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3715                                        LiftoffRegister rhs) {
3716   vcge(NeonU8, liftoff::GetSimd128Register(dst),
3717        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3718 }
3719 
emit_i16x8_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3720 void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
3721                                      LiftoffRegister rhs) {
3722   vceq(Neon16, liftoff::GetSimd128Register(dst),
3723        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3724 }
3725 
emit_i16x8_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3726 void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
3727                                      LiftoffRegister rhs) {
3728   vceq(Neon16, liftoff::GetSimd128Register(dst),
3729        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3730   vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst));
3731 }
3732 
emit_i16x8_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3733 void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3734                                        LiftoffRegister rhs) {
3735   vcgt(NeonS16, liftoff::GetSimd128Register(dst),
3736        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3737 }
3738 
emit_i16x8_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3739 void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3740                                        LiftoffRegister rhs) {
3741   vcgt(NeonU16, liftoff::GetSimd128Register(dst),
3742        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3743 }
3744 
emit_i16x8_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3745 void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3746                                        LiftoffRegister rhs) {
3747   vcge(NeonS16, liftoff::GetSimd128Register(dst),
3748        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3749 }
3750 
emit_i16x8_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3751 void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3752                                        LiftoffRegister rhs) {
3753   vcge(NeonU16, liftoff::GetSimd128Register(dst),
3754        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3755 }
3756 
emit_i32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3757 void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
3758                                      LiftoffRegister rhs) {
3759   vceq(Neon32, liftoff::GetSimd128Register(dst),
3760        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3761 }
3762 
emit_i32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3763 void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
3764                                      LiftoffRegister rhs) {
3765   vceq(Neon32, liftoff::GetSimd128Register(dst),
3766        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3767   vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst));
3768 }
3769 
emit_i32x4_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3770 void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3771                                        LiftoffRegister rhs) {
3772   vcgt(NeonS32, liftoff::GetSimd128Register(dst),
3773        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3774 }
3775 
emit_i32x4_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3776 void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3777                                        LiftoffRegister rhs) {
3778   vcgt(NeonU32, liftoff::GetSimd128Register(dst),
3779        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3780 }
3781 
emit_i32x4_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3782 void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3783                                        LiftoffRegister rhs) {
3784   vcge(NeonS32, liftoff::GetSimd128Register(dst),
3785        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3786 }
3787 
emit_i32x4_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3788 void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3789                                        LiftoffRegister rhs) {
3790   vcge(NeonU32, liftoff::GetSimd128Register(dst),
3791        liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3792 }
3793 
emit_i64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3794 void LiftoffAssembler::emit_i64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
3795                                      LiftoffRegister rhs) {
3796   I64x2Eq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3797           liftoff::GetSimd128Register(rhs));
3798 }
3799 
emit_i64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3800 void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
3801                                      LiftoffRegister rhs) {
3802   I64x2Ne(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3803           liftoff::GetSimd128Register(rhs));
3804 }
3805 
emit_i64x2_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3806 void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3807                                        LiftoffRegister rhs) {
3808   I64x2GtS(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3809            liftoff::GetSimd128Register(rhs));
3810 }
3811 
emit_i64x2_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3812 void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3813                                        LiftoffRegister rhs) {
3814   I64x2GeS(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3815            liftoff::GetSimd128Register(rhs));
3816 }
3817 
emit_f32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3818 void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
3819                                      LiftoffRegister rhs) {
3820   vceq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3821        liftoff::GetSimd128Register(rhs));
3822 }
3823 
emit_f32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3824 void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
3825                                      LiftoffRegister rhs) {
3826   vceq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3827        liftoff::GetSimd128Register(rhs));
3828   vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst));
3829 }
3830 
emit_f32x4_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3831 void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
3832                                      LiftoffRegister rhs) {
3833   vcgt(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(rhs),
3834        liftoff::GetSimd128Register(lhs));
3835 }
3836 
emit_f32x4_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3837 void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
3838                                      LiftoffRegister rhs) {
3839   vcge(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(rhs),
3840        liftoff::GetSimd128Register(lhs));
3841 }
3842 
emit_f64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3843 void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
3844                                      LiftoffRegister rhs) {
3845   liftoff::F64x2Compare(this, dst, lhs, rhs, eq);
3846 }
3847 
emit_f64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3848 void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
3849                                      LiftoffRegister rhs) {
3850   liftoff::F64x2Compare(this, dst, lhs, rhs, ne);
3851 }
3852 
emit_f64x2_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3853 void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
3854                                      LiftoffRegister rhs) {
3855   liftoff::F64x2Compare(this, dst, lhs, rhs, lt);
3856 }
3857 
emit_f64x2_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3858 void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
3859                                      LiftoffRegister rhs) {
3860   liftoff::F64x2Compare(this, dst, lhs, rhs, le);
3861 }
3862 
emit_s128_const(LiftoffRegister dst,const uint8_t imms[16])3863 void LiftoffAssembler::emit_s128_const(LiftoffRegister dst,
3864                                        const uint8_t imms[16]) {
3865   uint64_t vals[2];
3866   memcpy(vals, imms, sizeof(vals));
3867   vmov(dst.low_fp(), base::Double(vals[0]));
3868   vmov(dst.high_fp(), base::Double(vals[1]));
3869 }
3870 
emit_s128_not(LiftoffRegister dst,LiftoffRegister src)3871 void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
3872   vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
3873 }
3874 
emit_s128_and(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3875 void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
3876                                      LiftoffRegister rhs) {
3877   vand(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3878        liftoff::GetSimd128Register(rhs));
3879 }
3880 
emit_s128_or(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3881 void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
3882                                     LiftoffRegister rhs) {
3883   vorr(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3884        liftoff::GetSimd128Register(rhs));
3885 }
3886 
emit_s128_xor(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3887 void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
3888                                      LiftoffRegister rhs) {
3889   veor(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3890        liftoff::GetSimd128Register(rhs));
3891 }
3892 
emit_s128_select(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,LiftoffRegister mask)3893 void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
3894                                         LiftoffRegister src1,
3895                                         LiftoffRegister src2,
3896                                         LiftoffRegister mask) {
3897   if (dst != mask) {
3898     vmov(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(mask));
3899   }
3900   vbsl(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src1),
3901        liftoff::GetSimd128Register(src2));
3902 }
3903 
emit_i32x4_sconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)3904 void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
3905                                                  LiftoffRegister src) {
3906   vcvt_s32_f32(liftoff::GetSimd128Register(dst),
3907                liftoff::GetSimd128Register(src));
3908 }
3909 
emit_i32x4_uconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)3910 void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
3911                                                  LiftoffRegister src) {
3912   vcvt_u32_f32(liftoff::GetSimd128Register(dst),
3913                liftoff::GetSimd128Register(src));
3914 }
3915 
emit_f32x4_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)3916 void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
3917                                                  LiftoffRegister src) {
3918   vcvt_f32_s32(liftoff::GetSimd128Register(dst),
3919                liftoff::GetSimd128Register(src));
3920 }
3921 
emit_f32x4_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)3922 void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
3923                                                  LiftoffRegister src) {
3924   vcvt_f32_u32(liftoff::GetSimd128Register(dst),
3925                liftoff::GetSimd128Register(src));
3926 }
3927 
emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,LiftoffRegister src)3928 void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
3929                                                     LiftoffRegister src) {
3930   LowDwVfpRegister dst_d = LowDwVfpRegister::from_code(dst.low_fp().code());
3931   vcvt_f32_f64(dst_d.low(), src.low_fp());
3932   vcvt_f32_f64(dst_d.high(), src.high_fp());
3933   vmov(dst.high_fp(), 0);
3934 }
3935 
emit_i8x16_sconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3936 void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
3937                                                  LiftoffRegister lhs,
3938                                                  LiftoffRegister rhs) {
3939   liftoff::S128NarrowOp(this, NeonS8, NeonS8, dst, lhs, rhs);
3940 }
3941 
emit_i8x16_uconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3942 void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
3943                                                  LiftoffRegister lhs,
3944                                                  LiftoffRegister rhs) {
3945   liftoff::S128NarrowOp(this, NeonU8, NeonS8, dst, lhs, rhs);
3946 }
3947 
emit_i16x8_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3948 void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
3949                                                  LiftoffRegister lhs,
3950                                                  LiftoffRegister rhs) {
3951   liftoff::S128NarrowOp(this, NeonS16, NeonS16, dst, lhs, rhs);
3952 }
3953 
emit_i16x8_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3954 void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
3955                                                  LiftoffRegister lhs,
3956                                                  LiftoffRegister rhs) {
3957   liftoff::S128NarrowOp(this, NeonU16, NeonS16, dst, lhs, rhs);
3958 }
3959 
emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)3960 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
3961                                                      LiftoffRegister src) {
3962   vmovl(NeonS8, liftoff::GetSimd128Register(dst), src.low_fp());
3963 }
3964 
emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)3965 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
3966                                                       LiftoffRegister src) {
3967   vmovl(NeonS8, liftoff::GetSimd128Register(dst), src.high_fp());
3968 }
3969 
emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)3970 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
3971                                                      LiftoffRegister src) {
3972   vmovl(NeonU8, liftoff::GetSimd128Register(dst), src.low_fp());
3973 }
3974 
emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)3975 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
3976                                                       LiftoffRegister src) {
3977   vmovl(NeonU8, liftoff::GetSimd128Register(dst), src.high_fp());
3978 }
3979 
emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)3980 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
3981                                                      LiftoffRegister src) {
3982   vmovl(NeonS16, liftoff::GetSimd128Register(dst), src.low_fp());
3983 }
3984 
emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)3985 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
3986                                                       LiftoffRegister src) {
3987   vmovl(NeonS16, liftoff::GetSimd128Register(dst), src.high_fp());
3988 }
3989 
emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)3990 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
3991                                                      LiftoffRegister src) {
3992   vmovl(NeonU16, liftoff::GetSimd128Register(dst), src.low_fp());
3993 }
3994 
emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)3995 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
3996                                                       LiftoffRegister src) {
3997   vmovl(NeonU16, liftoff::GetSimd128Register(dst), src.high_fp());
3998 }
3999 
emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,LiftoffRegister src)4000 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
4001                                                          LiftoffRegister src) {
4002   LowDwVfpRegister dst_d = LowDwVfpRegister::from_code(dst.low_fp().code());
4003   vcvt_s32_f64(dst_d.low(), src.low_fp());
4004   vcvt_s32_f64(dst_d.high(), src.high_fp());
4005   vmov(dst.high_fp(), 0);
4006 }
4007 
emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,LiftoffRegister src)4008 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
4009                                                          LiftoffRegister src) {
4010   LowDwVfpRegister dst_d = LowDwVfpRegister::from_code(dst.low_fp().code());
4011   vcvt_u32_f64(dst_d.low(), src.low_fp());
4012   vcvt_u32_f64(dst_d.high(), src.high_fp());
4013   vmov(dst.high_fp(), 0);
4014 }
4015 
emit_s128_and_not(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4016 void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
4017                                          LiftoffRegister lhs,
4018                                          LiftoffRegister rhs) {
4019   vbic(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
4020        liftoff::GetSimd128Register(rhs));
4021 }
4022 
emit_i8x16_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4023 void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
4024                                                      LiftoffRegister lhs,
4025                                                      LiftoffRegister rhs) {
4026   vrhadd(NeonU8, liftoff::GetSimd128Register(dst),
4027          liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
4028 }
4029 
emit_i16x8_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4030 void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
4031                                                      LiftoffRegister lhs,
4032                                                      LiftoffRegister rhs) {
4033   vrhadd(NeonU16, liftoff::GetSimd128Register(dst),
4034          liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
4035 }
4036 
emit_i8x16_abs(LiftoffRegister dst,LiftoffRegister src)4037 void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
4038                                       LiftoffRegister src) {
4039   vabs(Neon8, liftoff::GetSimd128Register(dst),
4040        liftoff::GetSimd128Register(src));
4041 }
4042 
emit_i16x8_abs(LiftoffRegister dst,LiftoffRegister src)4043 void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
4044                                       LiftoffRegister src) {
4045   vabs(Neon16, liftoff::GetSimd128Register(dst),
4046        liftoff::GetSimd128Register(src));
4047 }
4048 
emit_i32x4_abs(LiftoffRegister dst,LiftoffRegister src)4049 void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
4050                                       LiftoffRegister src) {
4051   vabs(Neon32, liftoff::GetSimd128Register(dst),
4052        liftoff::GetSimd128Register(src));
4053 }
4054 
emit_i64x2_abs(LiftoffRegister dst,LiftoffRegister src)4055 void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
4056                                       LiftoffRegister src) {
4057   I64x2Abs(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
4058 }
4059 
StackCheck(Label * ool_code,Register limit_address)4060 void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
4061   ldr(limit_address, MemOperand(limit_address));
4062   cmp(sp, limit_address);
4063   b(ool_code, ls);
4064 }
4065 
CallTrapCallbackForTesting()4066 void LiftoffAssembler::CallTrapCallbackForTesting() {
4067   PrepareCallCFunction(0, 0);
4068   CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
4069 }
4070 
AssertUnreachable(AbortReason reason)4071 void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
4072   // Asserts unreachable within the wasm code.
4073   TurboAssembler::AssertUnreachable(reason);
4074 }
4075 
PushRegisters(LiftoffRegList regs)4076 void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
4077   RegList core_regs = regs.GetGpList();
4078   if (!core_regs.is_empty()) {
4079     stm(db_w, sp, core_regs);
4080   }
4081   LiftoffRegList fp_regs = regs & kFpCacheRegList;
4082   while (!fp_regs.is_empty()) {
4083     LiftoffRegister reg = fp_regs.GetFirstRegSet();
4084     DoubleRegister first = reg.fp();
4085     DoubleRegister last = first;
4086     fp_regs.clear(reg);
4087     while (!fp_regs.is_empty()) {
4088       LiftoffRegister reg = fp_regs.GetFirstRegSet();
4089       int code = reg.fp().code();
4090       // vstm can not push more than 16 registers. We have to make sure the
4091       // condition is met.
4092       if ((code != last.code() + 1) || ((code - first.code() + 1) > 16)) break;
4093       last = reg.fp();
4094       fp_regs.clear(reg);
4095     }
4096     vstm(db_w, sp, first, last);
4097   }
4098 }
4099 
PopRegisters(LiftoffRegList regs)4100 void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
4101   LiftoffRegList fp_regs = regs & kFpCacheRegList;
4102   while (!fp_regs.is_empty()) {
4103     LiftoffRegister reg = fp_regs.GetLastRegSet();
4104     DoubleRegister last = reg.fp();
4105     DoubleRegister first = last;
4106     fp_regs.clear(reg);
4107     while (!fp_regs.is_empty()) {
4108       LiftoffRegister reg = fp_regs.GetLastRegSet();
4109       int code = reg.fp().code();
4110       if ((code != first.code() - 1) || ((last.code() - code + 1) > 16)) break;
4111       first = reg.fp();
4112       fp_regs.clear(reg);
4113     }
4114     vldm(ia_w, sp, first, last);
4115   }
4116   RegList core_regs = regs.GetGpList();
4117   if (!core_regs.is_empty()) {
4118     ldm(ia_w, sp, core_regs);
4119   }
4120 }
4121 
RecordSpillsInSafepoint(SafepointTableBuilder::Safepoint & safepoint,LiftoffRegList all_spills,LiftoffRegList ref_spills,int spill_offset)4122 void LiftoffAssembler::RecordSpillsInSafepoint(
4123     SafepointTableBuilder::Safepoint& safepoint, LiftoffRegList all_spills,
4124     LiftoffRegList ref_spills, int spill_offset) {
4125   int spill_space_size = 0;
4126   while (!all_spills.is_empty()) {
4127     LiftoffRegister reg = all_spills.GetLastRegSet();
4128     if (ref_spills.has(reg)) {
4129       safepoint.DefineTaggedStackSlot(spill_offset);
4130     }
4131     all_spills.clear(reg);
4132     ++spill_offset;
4133     spill_space_size += kSystemPointerSize;
4134   }
4135   // Record the number of additional spill slots.
4136   RecordOolSpillSpaceSize(spill_space_size);
4137 }
4138 
DropStackSlotsAndRet(uint32_t num_stack_slots)4139 void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
4140   Drop(num_stack_slots);
4141   Ret();
4142 }
4143 
CallC(const ValueKindSig * sig,const LiftoffRegister * args,const LiftoffRegister * rets,ValueKind out_argument_kind,int stack_bytes,ExternalReference ext_ref)4144 void LiftoffAssembler::CallC(const ValueKindSig* sig,
4145                              const LiftoffRegister* args,
4146                              const LiftoffRegister* rets,
4147                              ValueKind out_argument_kind, int stack_bytes,
4148                              ExternalReference ext_ref) {
4149   // Arguments are passed by pushing them all to the stack and then passing
4150   // a pointer to them.
4151   DCHECK(IsAligned(stack_bytes, kSystemPointerSize));
4152   // Reserve space in the stack.
4153   AllocateStackSpace(stack_bytes);
4154 
4155   int arg_bytes = 0;
4156   for (ValueKind param_kind : sig->parameters()) {
4157     switch (param_kind) {
4158       case kI32:
4159         str(args->gp(), MemOperand(sp, arg_bytes));
4160         break;
4161       case kI64:
4162         str(args->low_gp(), MemOperand(sp, arg_bytes));
4163         str(args->high_gp(), MemOperand(sp, arg_bytes + kSystemPointerSize));
4164         break;
4165       case kF32:
4166         vstr(liftoff::GetFloatRegister(args->fp()), MemOperand(sp, arg_bytes));
4167         break;
4168       case kF64:
4169         vstr(args->fp(), MemOperand(sp, arg_bytes));
4170         break;
4171       case kS128:
4172         vstr(args->low_fp(), MemOperand(sp, arg_bytes));
4173         vstr(args->high_fp(),
4174              MemOperand(sp, arg_bytes + 2 * kSystemPointerSize));
4175         break;
4176       default:
4177         UNREACHABLE();
4178     }
4179     args++;
4180     arg_bytes += value_kind_size(param_kind);
4181   }
4182   DCHECK_LE(arg_bytes, stack_bytes);
4183 
4184   // Pass a pointer to the buffer with the arguments to the C function.
4185   mov(r0, sp);
4186 
4187   // Now call the C function.
4188   constexpr int kNumCCallArgs = 1;
4189   PrepareCallCFunction(kNumCCallArgs);
4190   CallCFunction(ext_ref, kNumCCallArgs);
4191 
4192   // Move return value to the right register.
4193   const LiftoffRegister* result_reg = rets;
4194   if (sig->return_count() > 0) {
4195     DCHECK_EQ(1, sig->return_count());
4196     constexpr Register kReturnReg = r0;
4197     if (kReturnReg != rets->gp()) {
4198       Move(*rets, LiftoffRegister(kReturnReg), sig->GetReturn(0));
4199     }
4200     result_reg++;
4201   }
4202 
4203   // Load potential output value from the buffer on the stack.
4204   if (out_argument_kind != kVoid) {
4205     switch (out_argument_kind) {
4206       case kI32:
4207         ldr(result_reg->gp(), MemOperand(sp));
4208         break;
4209       case kI64:
4210         ldr(result_reg->low_gp(), MemOperand(sp));
4211         ldr(result_reg->high_gp(), MemOperand(sp, kSystemPointerSize));
4212         break;
4213       case kF32:
4214         vldr(liftoff::GetFloatRegister(result_reg->fp()), MemOperand(sp));
4215         break;
4216       case kF64:
4217         vldr(result_reg->fp(), MemOperand(sp));
4218         break;
4219       case kS128:
4220         vld1(Neon8, NeonListOperand(result_reg->low_fp(), 2),
4221              NeonMemOperand(sp));
4222         break;
4223       default:
4224         UNREACHABLE();
4225     }
4226   }
4227   add(sp, sp, Operand(stack_bytes));
4228 }
4229 
CallNativeWasmCode(Address addr)4230 void LiftoffAssembler::CallNativeWasmCode(Address addr) {
4231   Call(addr, RelocInfo::WASM_CALL);
4232 }
4233 
TailCallNativeWasmCode(Address addr)4234 void LiftoffAssembler::TailCallNativeWasmCode(Address addr) {
4235   Jump(addr, RelocInfo::WASM_CALL);
4236 }
4237 
CallIndirect(const ValueKindSig * sig,compiler::CallDescriptor * call_descriptor,Register target)4238 void LiftoffAssembler::CallIndirect(const ValueKindSig* sig,
4239                                     compiler::CallDescriptor* call_descriptor,
4240                                     Register target) {
4241   DCHECK(target != no_reg);
4242   Call(target);
4243 }
4244 
TailCallIndirect(Register target)4245 void LiftoffAssembler::TailCallIndirect(Register target) {
4246   DCHECK(target != no_reg);
4247   Jump(target);
4248 }
4249 
CallRuntimeStub(WasmCode::RuntimeStubId sid)4250 void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) {
4251   // A direct call to a wasm runtime stub defined in this module.
4252   // Just encode the stub index. This will be patched at relocation.
4253   Call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL);
4254 }
4255 
AllocateStackSlot(Register addr,uint32_t size)4256 void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
4257   AllocateStackSpace(size);
4258   mov(addr, sp);
4259 }
4260 
DeallocateStackSlot(uint32_t size)4261 void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
4262   add(sp, sp, Operand(size));
4263 }
4264 
MaybeOSR()4265 void LiftoffAssembler::MaybeOSR() {}
4266 
emit_set_if_nan(Register dst,DoubleRegister src,ValueKind kind)4267 void LiftoffAssembler::emit_set_if_nan(Register dst, DoubleRegister src,
4268                                        ValueKind kind) {
4269   if (kind == kF32) {
4270     FloatRegister src_f = liftoff::GetFloatRegister(src);
4271     VFPCompareAndSetFlags(src_f, src_f);
4272   } else {
4273     DCHECK_EQ(kind, kF64);
4274     VFPCompareAndSetFlags(src, src);
4275   }
4276 
4277   // Store a non-zero value if src is NaN.
4278   str(dst, MemOperand(dst), ne);  // x != x iff isnan(x)
4279 }
4280 
emit_s128_set_if_nan(Register dst,LiftoffRegister src,Register tmp_gp,LiftoffRegister tmp_s128,ValueKind lane_kind)4281 void LiftoffAssembler::emit_s128_set_if_nan(Register dst, LiftoffRegister src,
4282                                             Register tmp_gp,
4283                                             LiftoffRegister tmp_s128,
4284                                             ValueKind lane_kind) {
4285   QwNeonRegister src_q = liftoff::GetSimd128Register(src);
4286   QwNeonRegister tmp_q = liftoff::GetSimd128Register(tmp_s128);
4287   if (lane_kind == kF32) {
4288     vpadd(tmp_q.low(), src_q.low(), src_q.high());
4289     LowDwVfpRegister tmp_d =
4290         LowDwVfpRegister::from_code(tmp_s128.low_fp().code());
4291     vadd(tmp_d.low(), tmp_d.low(), tmp_d.high());
4292   } else {
4293     DCHECK_EQ(lane_kind, kF64);
4294     vadd(tmp_q.low(), src_q.low(), src_q.high());
4295   }
4296   emit_set_if_nan(dst, tmp_q.low(), lane_kind);
4297 }
4298 
Construct(int param_slots)4299 void LiftoffStackSlots::Construct(int param_slots) {
4300   DCHECK_LT(0, slots_.size());
4301   SortInPushOrder();
4302   int last_stack_slot = param_slots;
4303   for (auto& slot : slots_) {
4304     const int stack_slot = slot.dst_slot_;
4305     int stack_decrement = (last_stack_slot - stack_slot) * kSystemPointerSize;
4306     DCHECK_LT(0, stack_decrement);
4307     last_stack_slot = stack_slot;
4308     const LiftoffAssembler::VarState& src = slot.src_;
4309     switch (src.loc()) {
4310       case LiftoffAssembler::VarState::kStack: {
4311         switch (src.kind()) {
4312           // i32 and i64 can be treated as similar cases, i64 being previously
4313           // split into two i32 registers
4314           case kI32:
4315           case kI64:
4316           case kF32:
4317           case kRef:
4318           case kOptRef: {
4319             asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4320             UseScratchRegisterScope temps(asm_);
4321             Register scratch = temps.Acquire();
4322             asm_->ldr(scratch,
4323                       liftoff::GetHalfStackSlot(slot.src_offset_, slot.half_));
4324             asm_->Push(scratch);
4325           } break;
4326           case kF64: {
4327             asm_->AllocateStackSpace(stack_decrement - kDoubleSize);
4328             UseScratchRegisterScope temps(asm_);
4329             DwVfpRegister scratch = temps.AcquireD();
4330             asm_->vldr(scratch, liftoff::GetStackSlot(slot.src_offset_));
4331             asm_->vpush(scratch);
4332           } break;
4333           case kS128: {
4334             asm_->AllocateStackSpace(stack_decrement - kSimd128Size);
4335             MemOperand mem_op = liftoff::GetStackSlot(slot.src_offset_);
4336             UseScratchRegisterScope temps(asm_);
4337             Register addr = liftoff::CalculateActualAddress(
4338                 asm_, &temps, mem_op.rn(), no_reg, mem_op.offset());
4339             QwNeonRegister scratch = temps.AcquireQ();
4340             asm_->vld1(Neon8, NeonListOperand(scratch), NeonMemOperand(addr));
4341             asm_->vpush(scratch);
4342             break;
4343           }
4344           default:
4345             UNREACHABLE();
4346         }
4347         break;
4348       }
4349       case LiftoffAssembler::VarState::kRegister: {
4350         int pushed_bytes = SlotSizeInBytes(slot);
4351         asm_->AllocateStackSpace(stack_decrement - pushed_bytes);
4352         switch (src.kind()) {
4353           case kI64: {
4354             LiftoffRegister reg =
4355                 slot.half_ == kLowWord ? src.reg().low() : src.reg().high();
4356             asm_->push(reg.gp());
4357           } break;
4358           case kI32:
4359           case kRef:
4360           case kOptRef:
4361             asm_->push(src.reg().gp());
4362             break;
4363           case kF32:
4364             asm_->vpush(liftoff::GetFloatRegister(src.reg().fp()));
4365             break;
4366           case kF64:
4367             asm_->vpush(src.reg().fp());
4368             break;
4369           case kS128:
4370             asm_->vpush(liftoff::GetSimd128Register(src.reg()));
4371             break;
4372           default:
4373             UNREACHABLE();
4374         }
4375         break;
4376       }
4377       case LiftoffAssembler::VarState::kIntConst: {
4378         asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4379         DCHECK(src.kind() == kI32 || src.kind() == kI64);
4380         UseScratchRegisterScope temps(asm_);
4381         Register scratch = temps.Acquire();
4382         // The high word is the sign extension of the low word.
4383         asm_->mov(scratch,
4384                   Operand(slot.half_ == kLowWord ? src.i32_const()
4385                                                  : src.i32_const() >> 31));
4386         asm_->push(scratch);
4387         break;
4388       }
4389     }
4390   }
4391 }
4392 
4393 }  // namespace wasm
4394 }  // namespace internal
4395 }  // namespace v8
4396 
4397 #endif  // V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_
4398