1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_
6 #define V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_
7
8 #include "src/base/platform/wrappers.h"
9 #include "src/codegen/arm/register-arm.h"
10 #include "src/heap/memory-chunk.h"
11 #include "src/wasm/baseline/liftoff-assembler.h"
12 #include "src/wasm/baseline/liftoff-register.h"
13 #include "src/wasm/wasm-objects.h"
14
15 namespace v8 {
16 namespace internal {
17 namespace wasm {
18
19 namespace liftoff {
20
ToCondition(LiftoffCondition liftoff_cond)21 inline constexpr Condition ToCondition(LiftoffCondition liftoff_cond) {
22 switch (liftoff_cond) {
23 case kEqual:
24 return eq;
25 case kUnequal:
26 return ne;
27 case kSignedLessThan:
28 return lt;
29 case kSignedLessEqual:
30 return le;
31 case kSignedGreaterThan:
32 return gt;
33 case kSignedGreaterEqual:
34 return ge;
35 case kUnsignedLessThan:
36 return lo;
37 case kUnsignedLessEqual:
38 return ls;
39 case kUnsignedGreaterThan:
40 return hi;
41 case kUnsignedGreaterEqual:
42 return hs;
43 }
44 }
45
46 // half
47 // slot Frame
48 // -----+--------------------+---------------------------
49 // n+3 | parameter n |
50 // ... | ... |
51 // 4 | parameter 1 | or parameter 2
52 // 3 | parameter 0 | or parameter 1
53 // 2 | (result address) | or parameter 0
54 // -----+--------------------+---------------------------
55 // 1 | return addr (lr) |
56 // 0 | previous frame (fp)|
57 // -----+--------------------+ <-- frame ptr (fp)
58 // -1 | StackFrame::WASM |
59 // -2 | instance |
60 // -3 | feedback vector |
61 // -4 | tiering budget |
62 // -----+--------------------+---------------------------
63 // -5 | slot 0 (high) | ^
64 // -6 | slot 0 (low) | |
65 // -7 | slot 1 (high) | Frame slots
66 // -8 | slot 1 (low) | |
67 // | | v
68 // -----+--------------------+ <-- stack ptr (sp)
69 //
70 static_assert(2 * kSystemPointerSize == LiftoffAssembler::kStackSlotSize,
71 "Slot size should be twice the size of the 32 bit pointer.");
72 constexpr int kInstanceOffset = 2 * kSystemPointerSize;
73 constexpr int kFeedbackVectorOffset = 3 * kSystemPointerSize;
74 constexpr int kTierupBudgetOffset = 4 * kSystemPointerSize;
75 // kPatchInstructionsRequired sets a maximum limit of how many instructions that
76 // PatchPrepareStackFrame will use in order to increase the stack appropriately.
77 // Three instructions are required to sub a large constant, movw + movt + sub.
78 constexpr int32_t kPatchInstructionsRequired = 3;
79 constexpr int kHalfStackSlotSize = LiftoffAssembler::kStackSlotSize >> 1;
80
GetStackSlot(int offset)81 inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); }
82
GetHalfStackSlot(int offset,RegPairHalf half)83 inline MemOperand GetHalfStackSlot(int offset, RegPairHalf half) {
84 int32_t half_offset =
85 half == kLowWord ? 0 : LiftoffAssembler::kStackSlotSize / 2;
86 return MemOperand(offset > 0 ? fp : sp, -offset + half_offset);
87 }
88
GetInstanceOperand()89 inline MemOperand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); }
90
GetMemOp(LiftoffAssembler * assm,UseScratchRegisterScope * temps,Register addr,Register offset,int32_t offset_imm)91 inline MemOperand GetMemOp(LiftoffAssembler* assm,
92 UseScratchRegisterScope* temps, Register addr,
93 Register offset, int32_t offset_imm) {
94 if (offset != no_reg) {
95 if (offset_imm == 0) return MemOperand(addr, offset);
96 Register tmp = temps->Acquire();
97 assm->add(tmp, offset, Operand(offset_imm));
98 return MemOperand(addr, tmp);
99 }
100 return MemOperand(addr, offset_imm);
101 }
102
103 inline Register CalculateActualAddress(LiftoffAssembler* assm,
104 UseScratchRegisterScope* temps,
105 Register addr_reg, Register offset_reg,
106 uintptr_t offset_imm,
107 Register result_reg = no_reg) {
108 if (offset_reg == no_reg && offset_imm == 0) {
109 if (result_reg == no_reg) {
110 return addr_reg;
111 } else {
112 assm->mov(result_reg, addr_reg);
113 return result_reg;
114 }
115 }
116 Register actual_addr_reg =
117 result_reg != no_reg ? result_reg : temps->Acquire();
118 if (offset_reg == no_reg) {
119 assm->add(actual_addr_reg, addr_reg, Operand(offset_imm));
120 } else {
121 assm->add(actual_addr_reg, addr_reg, Operand(offset_reg));
122 if (offset_imm != 0) {
123 assm->add(actual_addr_reg, actual_addr_reg, Operand(offset_imm));
124 }
125 }
126 return actual_addr_reg;
127 }
128
MakeUnsigned(LiftoffCondition cond)129 inline LiftoffCondition MakeUnsigned(LiftoffCondition cond) {
130 switch (cond) {
131 case kSignedLessThan:
132 return kUnsignedLessThan;
133 case kSignedLessEqual:
134 return kUnsignedLessEqual;
135 case kSignedGreaterThan:
136 return kUnsignedGreaterThan;
137 case kSignedGreaterEqual:
138 return kUnsignedGreaterEqual;
139 case kEqual:
140 case kUnequal:
141 case kUnsignedLessThan:
142 case kUnsignedLessEqual:
143 case kUnsignedGreaterThan:
144 case kUnsignedGreaterEqual:
145 return cond;
146 default:
147 UNREACHABLE();
148 }
149 }
150
151 template <void (Assembler::*op)(Register, Register, Register, SBit, Condition),
152 void (Assembler::*op_with_carry)(Register, Register, const Operand&,
153 SBit, Condition)>
I64Binop(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)154 inline void I64Binop(LiftoffAssembler* assm, LiftoffRegister dst,
155 LiftoffRegister lhs, LiftoffRegister rhs) {
156 Register dst_low = dst.low_gp();
157 if (dst_low == lhs.high_gp() || dst_low == rhs.high_gp()) {
158 dst_low =
159 assm->GetUnusedRegister(kGpReg, LiftoffRegList{lhs, rhs, dst.high_gp()})
160 .gp();
161 }
162 (assm->*op)(dst_low, lhs.low_gp(), rhs.low_gp(), SetCC, al);
163 (assm->*op_with_carry)(dst.high_gp(), lhs.high_gp(), Operand(rhs.high_gp()),
164 LeaveCC, al);
165 if (dst_low != dst.low_gp()) assm->mov(dst.low_gp(), dst_low);
166 }
167
168 template <void (Assembler::*op)(Register, Register, const Operand&, SBit,
169 Condition),
170 void (Assembler::*op_with_carry)(Register, Register, const Operand&,
171 SBit, Condition)>
I64BinopI(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister lhs,int64_t imm)172 inline void I64BinopI(LiftoffAssembler* assm, LiftoffRegister dst,
173 LiftoffRegister lhs, int64_t imm) {
174 // The compiler allocated registers such that either {dst == lhs} or there is
175 // no overlap between the two.
176 DCHECK_NE(dst.low_gp(), lhs.high_gp());
177 int32_t imm_low_word = static_cast<int32_t>(imm);
178 int32_t imm_high_word = static_cast<int32_t>(imm >> 32);
179 (assm->*op)(dst.low_gp(), lhs.low_gp(), Operand(imm_low_word), SetCC, al);
180 (assm->*op_with_carry)(dst.high_gp(), lhs.high_gp(), Operand(imm_high_word),
181 LeaveCC, al);
182 }
183
184 template <void (TurboAssembler::*op)(Register, Register, Register, Register,
185 Register),
186 bool is_left_shift>
I64Shiftop(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister src,Register amount)187 inline void I64Shiftop(LiftoffAssembler* assm, LiftoffRegister dst,
188 LiftoffRegister src, Register amount) {
189 Register src_low = src.low_gp();
190 Register src_high = src.high_gp();
191 Register dst_low = dst.low_gp();
192 Register dst_high = dst.high_gp();
193 // Left shift writes {dst_high} then {dst_low}, right shifts write {dst_low}
194 // then {dst_high}.
195 Register clobbered_dst_reg = is_left_shift ? dst_high : dst_low;
196 LiftoffRegList pinned = {clobbered_dst_reg, src};
197 Register amount_capped =
198 pinned.set(assm->GetUnusedRegister(kGpReg, pinned)).gp();
199 assm->and_(amount_capped, amount, Operand(0x3F));
200
201 // Ensure that writing the first half of {dst} does not overwrite the still
202 // needed half of {src}.
203 Register* later_src_reg = is_left_shift ? &src_low : &src_high;
204 if (*later_src_reg == clobbered_dst_reg) {
205 *later_src_reg = assm->GetUnusedRegister(kGpReg, pinned).gp();
206 assm->TurboAssembler::Move(*later_src_reg, clobbered_dst_reg);
207 }
208
209 (assm->*op)(dst_low, dst_high, src_low, src_high, amount_capped);
210 }
211
GetFloatRegister(DoubleRegister reg)212 inline FloatRegister GetFloatRegister(DoubleRegister reg) {
213 DCHECK_LT(reg.code(), kDoubleCode_d16);
214 return LowDwVfpRegister::from_code(reg.code()).low();
215 }
216
GetSimd128Register(DoubleRegister reg)217 inline Simd128Register GetSimd128Register(DoubleRegister reg) {
218 return QwNeonRegister::from_code(reg.code() / 2);
219 }
220
GetSimd128Register(LiftoffRegister reg)221 inline Simd128Register GetSimd128Register(LiftoffRegister reg) {
222 return liftoff::GetSimd128Register(reg.low_fp());
223 }
224
225 enum class MinOrMax : uint8_t { kMin, kMax };
226 template <typename RegisterType>
EmitFloatMinOrMax(LiftoffAssembler * assm,RegisterType dst,RegisterType lhs,RegisterType rhs,MinOrMax min_or_max)227 inline void EmitFloatMinOrMax(LiftoffAssembler* assm, RegisterType dst,
228 RegisterType lhs, RegisterType rhs,
229 MinOrMax min_or_max) {
230 DCHECK(RegisterType::kSizeInBytes == 4 || RegisterType::kSizeInBytes == 8);
231 if (lhs == rhs) {
232 assm->TurboAssembler::Move(dst, lhs);
233 return;
234 }
235 Label done, is_nan;
236 if (min_or_max == MinOrMax::kMin) {
237 assm->TurboAssembler::FloatMin(dst, lhs, rhs, &is_nan);
238 } else {
239 assm->TurboAssembler::FloatMax(dst, lhs, rhs, &is_nan);
240 }
241 assm->b(&done);
242 assm->bind(&is_nan);
243 // Create a NaN output.
244 assm->vadd(dst, lhs, rhs);
245 assm->bind(&done);
246 }
247
EnsureNoAlias(Assembler * assm,Register reg,Register must_not_alias,UseScratchRegisterScope * temps)248 inline Register EnsureNoAlias(Assembler* assm, Register reg,
249 Register must_not_alias,
250 UseScratchRegisterScope* temps) {
251 if (reg != must_not_alias) return reg;
252 Register tmp = temps->Acquire();
253 DCHECK_NE(reg, tmp);
254 assm->mov(tmp, reg);
255 return tmp;
256 }
257
S128NarrowOp(LiftoffAssembler * assm,NeonDataType dt,NeonDataType sdt,LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)258 inline void S128NarrowOp(LiftoffAssembler* assm, NeonDataType dt,
259 NeonDataType sdt, LiftoffRegister dst,
260 LiftoffRegister lhs, LiftoffRegister rhs) {
261 if (dst == lhs) {
262 assm->vqmovn(dt, sdt, dst.low_fp(), liftoff::GetSimd128Register(lhs));
263 assm->vqmovn(dt, sdt, dst.high_fp(), liftoff::GetSimd128Register(rhs));
264 } else {
265 assm->vqmovn(dt, sdt, dst.high_fp(), liftoff::GetSimd128Register(rhs));
266 assm->vqmovn(dt, sdt, dst.low_fp(), liftoff::GetSimd128Register(lhs));
267 }
268 }
269
F64x2Compare(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Condition cond)270 inline void F64x2Compare(LiftoffAssembler* assm, LiftoffRegister dst,
271 LiftoffRegister lhs, LiftoffRegister rhs,
272 Condition cond) {
273 DCHECK(cond == eq || cond == ne || cond == lt || cond == le);
274
275 QwNeonRegister dest = liftoff::GetSimd128Register(dst);
276 QwNeonRegister left = liftoff::GetSimd128Register(lhs);
277 QwNeonRegister right = liftoff::GetSimd128Register(rhs);
278 UseScratchRegisterScope temps(assm);
279 Register scratch = temps.Acquire();
280
281 assm->mov(scratch, Operand(0));
282 assm->VFPCompareAndSetFlags(left.low(), right.low());
283 assm->mov(scratch, Operand(-1), LeaveCC, cond);
284 if (cond == lt || cond == le) {
285 // Check for NaN.
286 assm->mov(scratch, Operand(0), LeaveCC, vs);
287 }
288 assm->vmov(dest.low(), scratch, scratch);
289
290 assm->mov(scratch, Operand(0));
291 assm->VFPCompareAndSetFlags(left.high(), right.high());
292 assm->mov(scratch, Operand(-1), LeaveCC, cond);
293 if (cond == lt || cond == le) {
294 // Check for NaN.
295 assm->mov(scratch, Operand(0), LeaveCC, vs);
296 }
297 assm->vmov(dest.high(), scratch, scratch);
298 }
299
Store(LiftoffAssembler * assm,LiftoffRegister src,MemOperand dst,ValueKind kind)300 inline void Store(LiftoffAssembler* assm, LiftoffRegister src, MemOperand dst,
301 ValueKind kind) {
302 #ifdef DEBUG
303 // The {str} instruction needs a temp register when the immediate in the
304 // provided MemOperand does not fit into 12 bits. This happens for large stack
305 // frames. This DCHECK checks that the temp register is available when needed.
306 DCHECK(UseScratchRegisterScope{assm}.CanAcquire());
307 #endif
308 switch (kind) {
309 case kI32:
310 case kOptRef:
311 case kRef:
312 case kRtt:
313 assm->str(src.gp(), dst);
314 break;
315 case kI64:
316 // Positive offsets should be lowered to kI32.
317 assm->str(src.low_gp(), MemOperand(dst.rn(), dst.offset()));
318 assm->str(
319 src.high_gp(),
320 MemOperand(dst.rn(), dst.offset() + liftoff::kHalfStackSlotSize));
321 break;
322 case kF32:
323 assm->vstr(liftoff::GetFloatRegister(src.fp()), dst);
324 break;
325 case kF64:
326 assm->vstr(src.fp(), dst);
327 break;
328 case kS128: {
329 UseScratchRegisterScope temps(assm);
330 Register addr = liftoff::CalculateActualAddress(assm, &temps, dst.rn(),
331 no_reg, dst.offset());
332 assm->vst1(Neon8, NeonListOperand(src.low_fp(), 2), NeonMemOperand(addr));
333 break;
334 }
335 default:
336 UNREACHABLE();
337 }
338 }
339
Load(LiftoffAssembler * assm,LiftoffRegister dst,MemOperand src,ValueKind kind)340 inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, MemOperand src,
341 ValueKind kind) {
342 switch (kind) {
343 case kI32:
344 case kOptRef:
345 case kRef:
346 case kRtt:
347 assm->ldr(dst.gp(), src);
348 break;
349 case kI64:
350 assm->ldr(dst.low_gp(), MemOperand(src.rn(), src.offset()));
351 assm->ldr(
352 dst.high_gp(),
353 MemOperand(src.rn(), src.offset() + liftoff::kHalfStackSlotSize));
354 break;
355 case kF32:
356 assm->vldr(liftoff::GetFloatRegister(dst.fp()), src);
357 break;
358 case kF64:
359 assm->vldr(dst.fp(), src);
360 break;
361 case kS128: {
362 // Get memory address of slot to fill from.
363 UseScratchRegisterScope temps(assm);
364 Register addr = liftoff::CalculateActualAddress(assm, &temps, src.rn(),
365 no_reg, src.offset());
366 assm->vld1(Neon8, NeonListOperand(dst.low_fp(), 2), NeonMemOperand(addr));
367 break;
368 }
369 default:
370 UNREACHABLE();
371 }
372 }
373
MaskFromNeonDataType(NeonDataType dt)374 constexpr int MaskFromNeonDataType(NeonDataType dt) {
375 switch (dt) {
376 case NeonS8:
377 case NeonU8:
378 return 7;
379 case NeonS16:
380 case NeonU16:
381 return 15;
382 case NeonS32:
383 case NeonU32:
384 return 31;
385 case NeonS64:
386 case NeonU64:
387 return 63;
388 }
389 }
390
391 enum ShiftDirection { kLeft, kRight };
392
393 template <ShiftDirection dir = kLeft, NeonDataType dt, NeonSize sz>
EmitSimdShift(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)394 inline void EmitSimdShift(LiftoffAssembler* assm, LiftoffRegister dst,
395 LiftoffRegister lhs, LiftoffRegister rhs) {
396 constexpr int mask = MaskFromNeonDataType(dt);
397 UseScratchRegisterScope temps(assm);
398 QwNeonRegister tmp = temps.AcquireQ();
399 Register shift = temps.Acquire();
400 assm->and_(shift, rhs.gp(), Operand(mask));
401 assm->vdup(sz, tmp, shift);
402 if (dir == kRight) {
403 assm->vneg(sz, tmp, tmp);
404 }
405 assm->vshl(dt, liftoff::GetSimd128Register(dst),
406 liftoff::GetSimd128Register(lhs), tmp);
407 }
408
409 template <ShiftDirection dir, NeonDataType dt>
EmitSimdShiftImmediate(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)410 inline void EmitSimdShiftImmediate(LiftoffAssembler* assm, LiftoffRegister dst,
411 LiftoffRegister lhs, int32_t rhs) {
412 // vshr by 0 is not allowed, so check for it, and only move if dst != lhs.
413 int32_t shift = rhs & MaskFromNeonDataType(dt);
414 if (shift) {
415 if (dir == kLeft) {
416 assm->vshl(dt, liftoff::GetSimd128Register(dst),
417 liftoff::GetSimd128Register(lhs), shift);
418 } else {
419 assm->vshr(dt, liftoff::GetSimd128Register(dst),
420 liftoff::GetSimd128Register(lhs), shift);
421 }
422 } else if (dst != lhs) {
423 assm->vmov(liftoff::GetSimd128Register(dst),
424 liftoff::GetSimd128Register(lhs));
425 }
426 }
427
EmitAnyTrue(LiftoffAssembler * assm,LiftoffRegister dst,LiftoffRegister src)428 inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst,
429 LiftoffRegister src) {
430 UseScratchRegisterScope temps(assm);
431 DwVfpRegister scratch = temps.AcquireD();
432 assm->vpmax(NeonU32, scratch, src.low_fp(), src.high_fp());
433 assm->vpmax(NeonU32, scratch, scratch, scratch);
434 assm->ExtractLane(dst.gp(), scratch, NeonS32, 0);
435 assm->cmp(dst.gp(), Operand(0));
436 assm->mov(dst.gp(), Operand(1), LeaveCC, ne);
437 }
438
439 } // namespace liftoff
440
PrepareStackFrame()441 int LiftoffAssembler::PrepareStackFrame() {
442 if (!CpuFeatures::IsSupported(ARMv7)) {
443 bailout(kUnsupportedArchitecture, "Liftoff needs ARMv7");
444 return 0;
445 }
446 uint32_t offset = static_cast<uint32_t>(pc_offset());
447 // PatchPrepareStackFrame will patch this in order to increase the stack
448 // appropriately. Additional nops are required as the bytes operand might
449 // require extra moves to encode.
450 for (int i = 0; i < liftoff::kPatchInstructionsRequired; i++) {
451 nop();
452 }
453 DCHECK_EQ(offset + liftoff::kPatchInstructionsRequired * kInstrSize,
454 pc_offset());
455 return offset;
456 }
457
PrepareTailCall(int num_callee_stack_params,int stack_param_delta)458 void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params,
459 int stack_param_delta) {
460 UseScratchRegisterScope temps(this);
461 Register scratch = temps.Acquire();
462
463 // Push the return address and frame pointer to complete the stack frame.
464 sub(sp, sp, Operand(8));
465 ldr(scratch, MemOperand(fp, 4));
466 str(scratch, MemOperand(sp, 4));
467 ldr(scratch, MemOperand(fp, 0));
468 str(scratch, MemOperand(sp, 0));
469
470 // Shift the whole frame upwards.
471 int slot_count = num_callee_stack_params + 2;
472 for (int i = slot_count - 1; i >= 0; --i) {
473 ldr(scratch, MemOperand(sp, i * 4));
474 str(scratch, MemOperand(fp, (i - stack_param_delta) * 4));
475 }
476
477 // Set the new stack and frame pointer.
478 sub(sp, fp, Operand(stack_param_delta * 4));
479 Pop(lr, fp);
480 }
481
AlignFrameSize()482 void LiftoffAssembler::AlignFrameSize() {}
483
PatchPrepareStackFrame(int offset,SafepointTableBuilder * safepoint_table_builder)484 void LiftoffAssembler::PatchPrepareStackFrame(
485 int offset, SafepointTableBuilder* safepoint_table_builder) {
486 // The frame_size includes the frame marker and the instance slot. Both are
487 // pushed as part of frame construction, so we don't need to allocate memory
488 // for them anymore.
489 int frame_size = GetTotalFrameSize() - 2 * kSystemPointerSize;
490
491 PatchingAssembler patching_assembler(AssemblerOptions{},
492 buffer_start_ + offset,
493 liftoff::kPatchInstructionsRequired);
494 if (V8_LIKELY(frame_size < 4 * KB)) {
495 // This is the standard case for small frames: just subtract from SP and be
496 // done with it.
497 patching_assembler.sub(sp, sp, Operand(frame_size));
498 patching_assembler.PadWithNops();
499 return;
500 }
501
502 // The frame size is bigger than 4KB, so we might overflow the available stack
503 // space if we first allocate the frame and then do the stack check (we will
504 // need some remaining stack space for throwing the exception). That's why we
505 // check the available stack space before we allocate the frame. To do this we
506 // replace the {__ sub(sp, sp, framesize)} with a jump to OOL code that does
507 // this "extended stack check".
508 //
509 // The OOL code can simply be generated here with the normal assembler,
510 // because all other code generation, including OOL code, has already finished
511 // when {PatchPrepareStackFrame} is called. The function prologue then jumps
512 // to the current {pc_offset()} to execute the OOL code for allocating the
513 // large frame.
514
515 // Emit the unconditional branch in the function prologue (from {offset} to
516 // {pc_offset()}).
517 patching_assembler.b(pc_offset() - offset - Instruction::kPcLoadDelta);
518 patching_assembler.PadWithNops();
519
520 // If the frame is bigger than the stack, we throw the stack overflow
521 // exception unconditionally. Thereby we can avoid the integer overflow
522 // check in the condition code.
523 RecordComment("OOL: stack check for large frame");
524 Label continuation;
525 if (frame_size < FLAG_stack_size * 1024) {
526 UseScratchRegisterScope temps(this);
527 Register stack_limit = temps.Acquire();
528 ldr(stack_limit,
529 FieldMemOperand(kWasmInstanceRegister,
530 WasmInstanceObject::kRealStackLimitAddressOffset));
531 ldr(stack_limit, MemOperand(stack_limit));
532 add(stack_limit, stack_limit, Operand(frame_size));
533 cmp(sp, stack_limit);
534 b(cs /* higher or same */, &continuation);
535 }
536
537 Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
538 // The call will not return; just define an empty safepoint.
539 safepoint_table_builder->DefineSafepoint(this);
540 if (FLAG_debug_code) stop();
541
542 bind(&continuation);
543
544 // Now allocate the stack space. Note that this might do more than just
545 // decrementing the SP; consult {TurboAssembler::AllocateStackSpace}.
546 AllocateStackSpace(frame_size);
547
548 // Jump back to the start of the function, from {pc_offset()} to
549 // right after the reserved space for the {__ sub(sp, sp, framesize)} (which
550 // is a branch now).
551 int func_start_offset =
552 offset + liftoff::kPatchInstructionsRequired * kInstrSize;
553 b(func_start_offset - pc_offset() - Instruction::kPcLoadDelta);
554 }
555
FinishCode()556 void LiftoffAssembler::FinishCode() { CheckConstPool(true, false); }
557
AbortCompilation()558 void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); }
559
560 // static
StaticStackFrameSize()561 constexpr int LiftoffAssembler::StaticStackFrameSize() {
562 return liftoff::kTierupBudgetOffset;
563 }
564
SlotSizeForType(ValueKind kind)565 int LiftoffAssembler::SlotSizeForType(ValueKind kind) {
566 switch (kind) {
567 case kS128:
568 return value_kind_size(kind);
569 default:
570 return kStackSlotSize;
571 }
572 }
573
NeedsAlignment(ValueKind kind)574 bool LiftoffAssembler::NeedsAlignment(ValueKind kind) {
575 return kind == kS128 || is_reference(kind);
576 }
577
LoadConstant(LiftoffRegister reg,WasmValue value,RelocInfo::Mode rmode)578 void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value,
579 RelocInfo::Mode rmode) {
580 switch (value.type().kind()) {
581 case kI32:
582 TurboAssembler::Move(reg.gp(), Operand(value.to_i32(), rmode));
583 break;
584 case kI64: {
585 DCHECK(RelocInfo::IsNoInfo(rmode));
586 int32_t low_word = value.to_i64();
587 int32_t high_word = value.to_i64() >> 32;
588 TurboAssembler::Move(reg.low_gp(), Operand(low_word));
589 TurboAssembler::Move(reg.high_gp(), Operand(high_word));
590 break;
591 }
592 case kF32:
593 vmov(liftoff::GetFloatRegister(reg.fp()), value.to_f32_boxed());
594 break;
595 case kF64: {
596 Register extra_scratch = GetUnusedRegister(kGpReg, {}).gp();
597 vmov(reg.fp(), base::Double(value.to_f64_boxed().get_bits()),
598 extra_scratch);
599 break;
600 }
601 default:
602 UNREACHABLE();
603 }
604 }
605
LoadInstanceFromFrame(Register dst)606 void LiftoffAssembler::LoadInstanceFromFrame(Register dst) {
607 ldr(dst, liftoff::GetInstanceOperand());
608 }
609
LoadFromInstance(Register dst,Register instance,int offset,int size)610 void LiftoffAssembler::LoadFromInstance(Register dst, Register instance,
611 int offset, int size) {
612 DCHECK_LE(0, offset);
613 MemOperand src{instance, offset};
614 switch (size) {
615 case 1:
616 ldrb(dst, src);
617 break;
618 case 4:
619 ldr(dst, src);
620 break;
621 default:
622 UNIMPLEMENTED();
623 }
624 }
625
LoadTaggedPointerFromInstance(Register dst,Register instance,int offset)626 void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst,
627 Register instance,
628 int offset) {
629 STATIC_ASSERT(kTaggedSize == kSystemPointerSize);
630 ldr(dst, MemOperand{instance, offset});
631 }
632
SpillInstance(Register instance)633 void LiftoffAssembler::SpillInstance(Register instance) {
634 str(instance, liftoff::GetInstanceOperand());
635 }
636
ResetOSRTarget()637 void LiftoffAssembler::ResetOSRTarget() {}
638
639 namespace liftoff {
640 #define __ lasm->
641 inline void LoadInternal(LiftoffAssembler* lasm, LiftoffRegister dst,
642 Register src_addr, Register offset_reg,
643 int32_t offset_imm, LoadType type,
644 LiftoffRegList pinned,
645 uint32_t* protected_load_pc = nullptr,
646 bool is_load_mem = false) {
647 DCHECK_IMPLIES(type.value_type() == kWasmI64, dst.is_gp_pair());
648 UseScratchRegisterScope temps(lasm);
649 if (type.value() == LoadType::kF64Load ||
650 type.value() == LoadType::kF32Load ||
651 type.value() == LoadType::kS128Load) {
652 Register actual_src_addr = liftoff::CalculateActualAddress(
653 lasm, &temps, src_addr, offset_reg, offset_imm);
654 if (type.value() == LoadType::kF64Load) {
655 // Armv6 is not supported so Neon can be used to avoid alignment issues.
656 CpuFeatureScope scope(lasm, NEON);
657 __ vld1(Neon64, NeonListOperand(dst.fp()),
658 NeonMemOperand(actual_src_addr));
659 } else if (type.value() == LoadType::kF32Load) {
660 // TODO(arm): Use vld1 for f32 when implemented in simulator as used for
661 // f64. It supports unaligned access.
662 Register scratch =
663 (actual_src_addr == src_addr) ? temps.Acquire() : actual_src_addr;
664 __ ldr(scratch, MemOperand(actual_src_addr));
665 __ vmov(liftoff::GetFloatRegister(dst.fp()), scratch);
666 } else {
667 // Armv6 is not supported so Neon can be used to avoid alignment issues.
668 CpuFeatureScope scope(lasm, NEON);
669 __ vld1(Neon8, NeonListOperand(dst.low_fp(), 2),
670 NeonMemOperand(actual_src_addr));
671 }
672 } else {
673 MemOperand src_op =
674 liftoff::GetMemOp(lasm, &temps, src_addr, offset_reg, offset_imm);
675 if (protected_load_pc) *protected_load_pc = __ pc_offset();
676 switch (type.value()) {
677 case LoadType::kI32Load8U:
678 __ ldrb(dst.gp(), src_op);
679 break;
680 case LoadType::kI64Load8U:
681 __ ldrb(dst.low_gp(), src_op);
682 __ mov(dst.high_gp(), Operand(0));
683 break;
684 case LoadType::kI32Load8S:
685 __ ldrsb(dst.gp(), src_op);
686 break;
687 case LoadType::kI64Load8S:
688 __ ldrsb(dst.low_gp(), src_op);
689 __ asr(dst.high_gp(), dst.low_gp(), Operand(31));
690 break;
691 case LoadType::kI32Load16U:
692 __ ldrh(dst.gp(), src_op);
693 break;
694 case LoadType::kI64Load16U:
695 __ ldrh(dst.low_gp(), src_op);
696 __ mov(dst.high_gp(), Operand(0));
697 break;
698 case LoadType::kI32Load16S:
699 __ ldrsh(dst.gp(), src_op);
700 break;
701 case LoadType::kI32Load:
702 __ ldr(dst.gp(), src_op);
703 break;
704 case LoadType::kI64Load16S:
705 __ ldrsh(dst.low_gp(), src_op);
706 __ asr(dst.high_gp(), dst.low_gp(), Operand(31));
707 break;
708 case LoadType::kI64Load32U:
709 __ ldr(dst.low_gp(), src_op);
710 __ mov(dst.high_gp(), Operand(0));
711 break;
712 case LoadType::kI64Load32S:
713 __ ldr(dst.low_gp(), src_op);
714 __ asr(dst.high_gp(), dst.low_gp(), Operand(31));
715 break;
716 case LoadType::kI64Load:
717 __ ldr(dst.low_gp(), src_op);
718 // GetMemOp may use a scratch register as the offset register, in which
719 // case, calling GetMemOp again will fail due to the assembler having
720 // ran out of scratch registers.
721 if (temps.CanAcquire()) {
722 src_op = liftoff::GetMemOp(lasm, &temps, src_addr, offset_reg,
723 offset_imm + kSystemPointerSize);
724 } else {
725 __ add(src_op.rm(), src_op.rm(), Operand(kSystemPointerSize));
726 }
727 __ ldr(dst.high_gp(), src_op);
728 break;
729 default:
730 UNREACHABLE();
731 }
732 }
733 }
734 #undef __
735 } // namespace liftoff
736
LoadTaggedPointer(Register dst,Register src_addr,Register offset_reg,int32_t offset_imm,LiftoffRegList pinned)737 void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr,
738 Register offset_reg,
739 int32_t offset_imm,
740 LiftoffRegList pinned) {
741 STATIC_ASSERT(kTaggedSize == kInt32Size);
742 liftoff::LoadInternal(this, LiftoffRegister(dst), src_addr, offset_reg,
743 offset_imm, LoadType::kI32Load, pinned);
744 }
745
LoadFullPointer(Register dst,Register src_addr,int32_t offset_imm)746 void LiftoffAssembler::LoadFullPointer(Register dst, Register src_addr,
747 int32_t offset_imm) {
748 UseScratchRegisterScope temps(this);
749 MemOperand src_op =
750 liftoff::GetMemOp(this, &temps, src_addr, no_reg, offset_imm);
751 ldr(dst, src_op);
752 }
753
StoreTaggedPointer(Register dst_addr,Register offset_reg,int32_t offset_imm,LiftoffRegister src,LiftoffRegList pinned,SkipWriteBarrier skip_write_barrier)754 void LiftoffAssembler::StoreTaggedPointer(Register dst_addr,
755 Register offset_reg,
756 int32_t offset_imm,
757 LiftoffRegister src,
758 LiftoffRegList pinned,
759 SkipWriteBarrier skip_write_barrier) {
760 STATIC_ASSERT(kTaggedSize == kInt32Size);
761 Register actual_offset_reg = offset_reg;
762 if (offset_reg != no_reg && offset_imm != 0) {
763 if (cache_state()->is_used(LiftoffRegister(offset_reg))) {
764 actual_offset_reg = GetUnusedRegister(kGpReg, pinned).gp();
765 }
766 add(actual_offset_reg, offset_reg, Operand(offset_imm));
767 }
768 MemOperand dst_op = actual_offset_reg == no_reg
769 ? MemOperand(dst_addr, offset_imm)
770 : MemOperand(dst_addr, actual_offset_reg);
771 str(src.gp(), dst_op);
772
773 if (skip_write_barrier || FLAG_disable_write_barriers) return;
774
775 // The write barrier.
776 Label write_barrier;
777 Label exit;
778 CheckPageFlag(dst_addr, MemoryChunk::kPointersFromHereAreInterestingMask, ne,
779 &write_barrier);
780 b(&exit);
781 bind(&write_barrier);
782 JumpIfSmi(src.gp(), &exit);
783 CheckPageFlag(src.gp(), MemoryChunk::kPointersToHereAreInterestingMask, eq,
784 &exit);
785 CallRecordWriteStubSaveRegisters(
786 dst_addr,
787 actual_offset_reg == no_reg ? Operand(offset_imm)
788 : Operand(actual_offset_reg),
789 RememberedSetAction::kEmit, SaveFPRegsMode::kSave,
790 StubCallMode::kCallWasmRuntimeStub);
791 bind(&exit);
792 }
793
Load(LiftoffRegister dst,Register src_addr,Register offset_reg,uint32_t offset_imm,LoadType type,LiftoffRegList pinned,uint32_t * protected_load_pc,bool is_load_mem,bool i64_offset)794 void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
795 Register offset_reg, uint32_t offset_imm,
796 LoadType type, LiftoffRegList pinned,
797 uint32_t* protected_load_pc, bool is_load_mem,
798 bool i64_offset) {
799 // Offsets >=2GB are statically OOB on 32-bit systems.
800 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
801 liftoff::LoadInternal(this, dst, src_addr, offset_reg,
802 static_cast<int32_t>(offset_imm), type, pinned,
803 protected_load_pc, is_load_mem);
804 }
805
Store(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList pinned,uint32_t * protected_store_pc,bool is_store_mem)806 void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
807 uint32_t offset_imm, LiftoffRegister src,
808 StoreType type, LiftoffRegList pinned,
809 uint32_t* protected_store_pc, bool is_store_mem) {
810 // Offsets >=2GB are statically OOB on 32-bit systems.
811 DCHECK_LE(offset_imm, std::numeric_limits<int32_t>::max());
812 UseScratchRegisterScope temps(this);
813 if (type.value() == StoreType::kF64Store) {
814 Register actual_dst_addr = liftoff::CalculateActualAddress(
815 this, &temps, dst_addr, offset_reg, offset_imm);
816 // Armv6 is not supported so Neon can be used to avoid alignment issues.
817 CpuFeatureScope scope(this, NEON);
818 vst1(Neon64, NeonListOperand(src.fp()), NeonMemOperand(actual_dst_addr));
819 } else if (type.value() == StoreType::kS128Store) {
820 Register actual_dst_addr = liftoff::CalculateActualAddress(
821 this, &temps, dst_addr, offset_reg, offset_imm);
822 // Armv6 is not supported so Neon can be used to avoid alignment issues.
823 CpuFeatureScope scope(this, NEON);
824 vst1(Neon8, NeonListOperand(src.low_fp(), 2),
825 NeonMemOperand(actual_dst_addr));
826 } else if (type.value() == StoreType::kF32Store) {
827 // TODO(arm): Use vst1 for f32 when implemented in simulator as used for
828 // f64. It supports unaligned access.
829 // CalculateActualAddress will only not use a scratch register if the
830 // following condition holds, otherwise another register must be
831 // retrieved.
832 Register scratch = (offset_reg == no_reg && offset_imm == 0)
833 ? temps.Acquire()
834 : GetUnusedRegister(kGpReg, pinned).gp();
835 Register actual_dst_addr = liftoff::CalculateActualAddress(
836 this, &temps, dst_addr, offset_reg, offset_imm);
837 vmov(scratch, liftoff::GetFloatRegister(src.fp()));
838 str(scratch, MemOperand(actual_dst_addr));
839 } else {
840 MemOperand dst_op =
841 liftoff::GetMemOp(this, &temps, dst_addr, offset_reg, offset_imm);
842 if (protected_store_pc) *protected_store_pc = pc_offset();
843 switch (type.value()) {
844 case StoreType::kI64Store8:
845 src = src.low();
846 V8_FALLTHROUGH;
847 case StoreType::kI32Store8:
848 strb(src.gp(), dst_op);
849 break;
850 case StoreType::kI64Store16:
851 src = src.low();
852 V8_FALLTHROUGH;
853 case StoreType::kI32Store16:
854 strh(src.gp(), dst_op);
855 break;
856 case StoreType::kI64Store32:
857 src = src.low();
858 V8_FALLTHROUGH;
859 case StoreType::kI32Store:
860 str(src.gp(), dst_op);
861 break;
862 case StoreType::kI64Store:
863 str(src.low_gp(), dst_op);
864 // GetMemOp may use a scratch register as the offset register, in which
865 // case, calling GetMemOp again will fail due to the assembler having
866 // ran out of scratch registers.
867 if (temps.CanAcquire()) {
868 dst_op = liftoff::GetMemOp(this, &temps, dst_addr, offset_reg,
869 offset_imm + kSystemPointerSize);
870 } else {
871 add(dst_op.rm(), dst_op.rm(), Operand(kSystemPointerSize));
872 }
873 str(src.high_gp(), dst_op);
874 break;
875 default:
876 UNREACHABLE();
877 }
878 }
879 }
880
881 namespace liftoff {
882 #define __ lasm->
883
AtomicOp32(LiftoffAssembler * lasm,Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,LiftoffRegList pinned,void (Assembler::* load)(Register,Register,Condition),void (Assembler::* store)(Register,Register,Register,Condition),void (* op)(LiftoffAssembler *,Register,Register,Register))884 inline void AtomicOp32(
885 LiftoffAssembler* lasm, Register dst_addr, Register offset_reg,
886 uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result,
887 LiftoffRegList pinned,
888 void (Assembler::*load)(Register, Register, Condition),
889 void (Assembler::*store)(Register, Register, Register, Condition),
890 void (*op)(LiftoffAssembler*, Register, Register, Register)) {
891 Register store_result = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
892
893 // Allocate an additional {temp} register to hold the result that should be
894 // stored to memory. Note that {temp} and {store_result} are not allowed to be
895 // the same register.
896 Register temp = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
897
898 // {LiftoffCompiler::AtomicBinop} ensures that {result} is unique.
899 DCHECK(result.gp() != value.gp() && result.gp() != dst_addr &&
900 result.gp() != offset_reg);
901
902 UseScratchRegisterScope temps(lasm);
903 Register actual_addr = liftoff::CalculateActualAddress(
904 lasm, &temps, dst_addr, offset_reg, offset_imm);
905
906 __ dmb(ISH);
907 Label retry;
908 __ bind(&retry);
909 (lasm->*load)(result.gp(), actual_addr, al);
910 op(lasm, temp, result.gp(), value.gp());
911 (lasm->*store)(store_result, temp, actual_addr, al);
912 __ cmp(store_result, Operand(0));
913 __ b(ne, &retry);
914 __ dmb(ISH);
915 }
916
Add(LiftoffAssembler * lasm,Register dst,Register lhs,Register rhs)917 inline void Add(LiftoffAssembler* lasm, Register dst, Register lhs,
918 Register rhs) {
919 __ add(dst, lhs, rhs);
920 }
921
Sub(LiftoffAssembler * lasm,Register dst,Register lhs,Register rhs)922 inline void Sub(LiftoffAssembler* lasm, Register dst, Register lhs,
923 Register rhs) {
924 __ sub(dst, lhs, rhs);
925 }
926
And(LiftoffAssembler * lasm,Register dst,Register lhs,Register rhs)927 inline void And(LiftoffAssembler* lasm, Register dst, Register lhs,
928 Register rhs) {
929 __ and_(dst, lhs, rhs);
930 }
931
Or(LiftoffAssembler * lasm,Register dst,Register lhs,Register rhs)932 inline void Or(LiftoffAssembler* lasm, Register dst, Register lhs,
933 Register rhs) {
934 __ orr(dst, lhs, rhs);
935 }
936
Xor(LiftoffAssembler * lasm,Register dst,Register lhs,Register rhs)937 inline void Xor(LiftoffAssembler* lasm, Register dst, Register lhs,
938 Register rhs) {
939 __ eor(dst, lhs, rhs);
940 }
941
Exchange(LiftoffAssembler * lasm,Register dst,Register lhs,Register rhs)942 inline void Exchange(LiftoffAssembler* lasm, Register dst, Register lhs,
943 Register rhs) {
944 __ mov(dst, rhs);
945 }
946
AtomicBinop32(LiftoffAssembler * lasm,Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type,void (* op)(LiftoffAssembler *,Register,Register,Register))947 inline void AtomicBinop32(LiftoffAssembler* lasm, Register dst_addr,
948 Register offset_reg, uint32_t offset_imm,
949 LiftoffRegister value, LiftoffRegister result,
950 StoreType type,
951 void (*op)(LiftoffAssembler*, Register, Register,
952 Register)) {
953 LiftoffRegList pinned = {dst_addr, offset_reg, value, result};
954 switch (type.value()) {
955 case StoreType::kI64Store8:
956 __ LoadConstant(result.high(), WasmValue(0));
957 result = result.low();
958 value = value.low();
959 V8_FALLTHROUGH;
960 case StoreType::kI32Store8:
961 liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result,
962 pinned, &Assembler::ldrexb, &Assembler::strexb, op);
963 return;
964 case StoreType::kI64Store16:
965 __ LoadConstant(result.high(), WasmValue(0));
966 result = result.low();
967 value = value.low();
968 V8_FALLTHROUGH;
969 case StoreType::kI32Store16:
970 liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result,
971 pinned, &Assembler::ldrexh, &Assembler::strexh, op);
972 return;
973 case StoreType::kI64Store32:
974 __ LoadConstant(result.high(), WasmValue(0));
975 result = result.low();
976 value = value.low();
977 V8_FALLTHROUGH;
978 case StoreType::kI32Store:
979 liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result,
980 pinned, &Assembler::ldrex, &Assembler::strex, op);
981 return;
982 default:
983 UNREACHABLE();
984 }
985 }
986
AtomicOp64(LiftoffAssembler * lasm,Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,base::Optional<LiftoffRegister> result,void (* op)(LiftoffAssembler *,LiftoffRegister,LiftoffRegister,LiftoffRegister))987 inline void AtomicOp64(LiftoffAssembler* lasm, Register dst_addr,
988 Register offset_reg, uint32_t offset_imm,
989 LiftoffRegister value,
990 base::Optional<LiftoffRegister> result,
991 void (*op)(LiftoffAssembler*, LiftoffRegister,
992 LiftoffRegister, LiftoffRegister)) {
993 // strexd loads a 64 bit word into two registers. The first register needs
994 // to have an even index, e.g. r8, the second register needs to be the one
995 // with the next higher index, e.g. r9 if the first register is r8. In the
996 // following code we use the fixed register pair r8/r9 to make the code here
997 // simpler, even though other register pairs would also be possible.
998 constexpr Register dst_low = r8;
999 constexpr Register dst_high = r9;
1000
1001 // Make sure {dst_low} and {dst_high} are not occupied by any other value.
1002 Register value_low = value.low_gp();
1003 Register value_high = value.high_gp();
1004 LiftoffRegList pinned = {dst_addr, offset_reg, value_low,
1005 value_high, dst_low, dst_high};
1006 __ ClearRegister(dst_low, {&dst_addr, &offset_reg, &value_low, &value_high},
1007 pinned);
1008 pinned = pinned | LiftoffRegList{dst_addr, offset_reg, value_low, value_high};
1009 __ ClearRegister(dst_high, {&dst_addr, &offset_reg, &value_low, &value_high},
1010 pinned);
1011 pinned = pinned | LiftoffRegList{dst_addr, offset_reg, value_low, value_high};
1012
1013 // Make sure that {result}, if it exists, also does not overlap with
1014 // {dst_low} and {dst_high}. We don't have to transfer the value stored in
1015 // {result}.
1016 Register result_low = no_reg;
1017 Register result_high = no_reg;
1018 if (result.has_value()) {
1019 result_low = result.value().low_gp();
1020 if (pinned.has(result_low)) {
1021 result_low = __ GetUnusedRegister(kGpReg, pinned).gp();
1022 }
1023 pinned.set(result_low);
1024
1025 result_high = result.value().high_gp();
1026 if (pinned.has(result_high)) {
1027 result_high = __ GetUnusedRegister(kGpReg, pinned).gp();
1028 }
1029 pinned.set(result_high);
1030 }
1031
1032 Register store_result = __ GetUnusedRegister(kGpReg, pinned).gp();
1033
1034 UseScratchRegisterScope temps(lasm);
1035 Register actual_addr = liftoff::CalculateActualAddress(
1036 lasm, &temps, dst_addr, offset_reg, offset_imm);
1037
1038 __ dmb(ISH);
1039 Label retry;
1040 __ bind(&retry);
1041 // {ldrexd} is needed here so that the {strexd} instruction below can
1042 // succeed. We don't need the value we are reading. We use {dst_low} and
1043 // {dst_high} as the destination registers because {ldrexd} has the same
1044 // restrictions on registers as {strexd}, see the comment above.
1045 __ ldrexd(dst_low, dst_high, actual_addr);
1046 if (result.has_value()) {
1047 __ mov(result_low, dst_low);
1048 __ mov(result_high, dst_high);
1049 }
1050 op(lasm, LiftoffRegister::ForPair(dst_low, dst_high),
1051 LiftoffRegister::ForPair(dst_low, dst_high),
1052 LiftoffRegister::ForPair(value_low, value_high));
1053 __ strexd(store_result, dst_low, dst_high, actual_addr);
1054 __ cmp(store_result, Operand(0));
1055 __ b(ne, &retry);
1056 __ dmb(ISH);
1057
1058 if (result.has_value()) {
1059 if (result_low != result.value().low_gp()) {
1060 __ mov(result.value().low_gp(), result_low);
1061 }
1062 if (result_high != result.value().high_gp()) {
1063 __ mov(result.value().high_gp(), result_high);
1064 }
1065 }
1066 }
1067
I64Store(LiftoffAssembler * lasm,LiftoffRegister dst,LiftoffRegister,LiftoffRegister src)1068 inline void I64Store(LiftoffAssembler* lasm, LiftoffRegister dst,
1069 LiftoffRegister, LiftoffRegister src) {
1070 __ mov(dst.low_gp(), src.low_gp());
1071 __ mov(dst.high_gp(), src.high_gp());
1072 }
1073
1074 #undef __
1075 } // namespace liftoff
1076
AtomicLoad(LiftoffRegister dst,Register src_addr,Register offset_reg,uint32_t offset_imm,LoadType type,LiftoffRegList pinned)1077 void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr,
1078 Register offset_reg, uint32_t offset_imm,
1079 LoadType type, LiftoffRegList pinned) {
1080 if (type.value() != LoadType::kI64Load) {
1081 Load(dst, src_addr, offset_reg, offset_imm, type, pinned, nullptr, true);
1082 dmb(ISH);
1083 return;
1084 }
1085 // ldrexd loads a 64 bit word into two registers. The first register needs to
1086 // have an even index, e.g. r8, the second register needs to be the one with
1087 // the next higher index, e.g. r9 if the first register is r8. In the
1088 // following code we use the fixed register pair r8/r9 to make the code here
1089 // simpler, even though other register pairs would also be possible.
1090 constexpr Register dst_low = r8;
1091 constexpr Register dst_high = r9;
1092 SpillRegisters(dst_low, dst_high);
1093 {
1094 UseScratchRegisterScope temps(this);
1095 Register actual_addr = liftoff::CalculateActualAddress(
1096 this, &temps, src_addr, offset_reg, offset_imm);
1097 ldrexd(dst_low, dst_high, actual_addr);
1098 dmb(ISH);
1099 }
1100
1101 ParallelRegisterMove(
1102 {{dst, LiftoffRegister::ForPair(dst_low, dst_high), kI64}});
1103 }
1104
AtomicStore(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister src,StoreType type,LiftoffRegList pinned)1105 void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg,
1106 uint32_t offset_imm, LiftoffRegister src,
1107 StoreType type, LiftoffRegList pinned) {
1108 if (type.value() == StoreType::kI64Store) {
1109 liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, src, {},
1110 liftoff::I64Store);
1111 return;
1112 }
1113
1114 dmb(ISH);
1115 Store(dst_addr, offset_reg, offset_imm, src, type, pinned, nullptr, true);
1116 dmb(ISH);
1117 return;
1118 }
1119
AtomicAdd(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)1120 void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg,
1121 uint32_t offset_imm, LiftoffRegister value,
1122 LiftoffRegister result, StoreType type) {
1123 if (type.value() == StoreType::kI64Store) {
1124 liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1125 liftoff::I64Binop<&Assembler::add, &Assembler::adc>);
1126 return;
1127 }
1128 liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1129 type, &liftoff::Add);
1130 }
1131
AtomicSub(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)1132 void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg,
1133 uint32_t offset_imm, LiftoffRegister value,
1134 LiftoffRegister result, StoreType type) {
1135 if (type.value() == StoreType::kI64Store) {
1136 liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1137 liftoff::I64Binop<&Assembler::sub, &Assembler::sbc>);
1138 return;
1139 }
1140 liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1141 type, &liftoff::Sub);
1142 }
1143
AtomicAnd(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)1144 void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg,
1145 uint32_t offset_imm, LiftoffRegister value,
1146 LiftoffRegister result, StoreType type) {
1147 if (type.value() == StoreType::kI64Store) {
1148 liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1149 liftoff::I64Binop<&Assembler::and_, &Assembler::and_>);
1150 return;
1151 }
1152 liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1153 type, &liftoff::And);
1154 }
1155
AtomicOr(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)1156 void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg,
1157 uint32_t offset_imm, LiftoffRegister value,
1158 LiftoffRegister result, StoreType type) {
1159 if (type.value() == StoreType::kI64Store) {
1160 liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1161 liftoff::I64Binop<&Assembler::orr, &Assembler::orr>);
1162 return;
1163 }
1164 liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1165 type, &liftoff::Or);
1166 }
1167
AtomicXor(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)1168 void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg,
1169 uint32_t offset_imm, LiftoffRegister value,
1170 LiftoffRegister result, StoreType type) {
1171 if (type.value() == StoreType::kI64Store) {
1172 liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1173 liftoff::I64Binop<&Assembler::eor, &Assembler::eor>);
1174 return;
1175 }
1176 liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1177 type, &liftoff::Xor);
1178 }
1179
AtomicExchange(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister value,LiftoffRegister result,StoreType type)1180 void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
1181 uint32_t offset_imm,
1182 LiftoffRegister value,
1183 LiftoffRegister result, StoreType type) {
1184 if (type.value() == StoreType::kI64Store) {
1185 liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result},
1186 liftoff::I64Store);
1187 return;
1188 }
1189 liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result,
1190 type, &liftoff::Exchange);
1191 }
1192
1193 namespace liftoff {
1194 #define __ lasm->
1195
AtomicI64CompareExchange(LiftoffAssembler * lasm,Register dst_addr_reg,Register offset_reg,uint32_t offset_imm,LiftoffRegister expected,LiftoffRegister new_value,LiftoffRegister result)1196 inline void AtomicI64CompareExchange(LiftoffAssembler* lasm,
1197 Register dst_addr_reg, Register offset_reg,
1198 uint32_t offset_imm,
1199 LiftoffRegister expected,
1200 LiftoffRegister new_value,
1201 LiftoffRegister result) {
1202 // To implement I64AtomicCompareExchange, we nearly need all registers, with
1203 // some registers having special constraints, e.g. like for {new_value} and
1204 // {result} the low-word register has to have an even register code, and the
1205 // high-word has to be in the next higher register. To avoid complicated
1206 // register allocation code here, we just assign fixed registers to all
1207 // values here, and then move all values into the correct register.
1208 Register dst_addr = r0;
1209 Register offset = r1;
1210 Register result_low = r4;
1211 Register result_high = r5;
1212 Register new_value_low = r2;
1213 Register new_value_high = r3;
1214 Register store_result = r6;
1215 Register expected_low = r8;
1216 Register expected_high = r9;
1217
1218 // We spill all registers, so that we can re-assign them afterwards.
1219 __ SpillRegisters(dst_addr, offset, result_low, result_high, new_value_low,
1220 new_value_high, store_result, expected_low, expected_high);
1221
1222 __ ParallelRegisterMove(
1223 {{LiftoffRegister::ForPair(new_value_low, new_value_high), new_value,
1224 kI64},
1225 {LiftoffRegister::ForPair(expected_low, expected_high), expected, kI64},
1226 {dst_addr, dst_addr_reg, kI32},
1227 {offset, offset_reg != no_reg ? offset_reg : offset, kI32}});
1228
1229 {
1230 UseScratchRegisterScope temps(lasm);
1231 Register temp = liftoff::CalculateActualAddress(
1232 lasm, &temps, dst_addr, offset_reg == no_reg ? no_reg : offset,
1233 offset_imm, dst_addr);
1234 // Make sure the actual address is stored in the right register.
1235 DCHECK_EQ(dst_addr, temp);
1236 USE(temp);
1237 }
1238
1239 Label retry;
1240 Label done;
1241 __ dmb(ISH);
1242 __ bind(&retry);
1243 __ ldrexd(result_low, result_high, dst_addr);
1244 __ cmp(result_low, expected_low);
1245 __ b(ne, &done);
1246 __ cmp(result_high, expected_high);
1247 __ b(ne, &done);
1248 __ strexd(store_result, new_value_low, new_value_high, dst_addr);
1249 __ cmp(store_result, Operand(0));
1250 __ b(ne, &retry);
1251 __ dmb(ISH);
1252 __ bind(&done);
1253
1254 __ ParallelRegisterMove(
1255 {{result, LiftoffRegister::ForPair(result_low, result_high), kI64}});
1256 }
1257 #undef __
1258 } // namespace liftoff
1259
AtomicCompareExchange(Register dst_addr,Register offset_reg,uint32_t offset_imm,LiftoffRegister expected,LiftoffRegister new_value,LiftoffRegister result,StoreType type)1260 void LiftoffAssembler::AtomicCompareExchange(
1261 Register dst_addr, Register offset_reg, uint32_t offset_imm,
1262 LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result,
1263 StoreType type) {
1264 if (type.value() == StoreType::kI64Store) {
1265 liftoff::AtomicI64CompareExchange(this, dst_addr, offset_reg, offset_imm,
1266 expected, new_value, result);
1267 return;
1268 }
1269
1270 // The other versions of CompareExchange can share code, but need special load
1271 // and store instructions.
1272 void (Assembler::*load)(Register, Register, Condition) = nullptr;
1273 void (Assembler::*store)(Register, Register, Register, Condition) = nullptr;
1274
1275 LiftoffRegList pinned = {dst_addr, offset_reg};
1276 // We need to remember the high word of {result}, so we can set it to zero in
1277 // the end if necessary.
1278 Register result_high = no_reg;
1279 switch (type.value()) {
1280 case StoreType::kI64Store8:
1281 result_high = result.high_gp();
1282 result = result.low();
1283 new_value = new_value.low();
1284 expected = expected.low();
1285 V8_FALLTHROUGH;
1286 case StoreType::kI32Store8:
1287 load = &Assembler::ldrexb;
1288 store = &Assembler::strexb;
1289 // We have to clear the high bits of {expected}, as we can only do a
1290 // 32-bit comparison. If the {expected} register is used, we spill it
1291 // first.
1292 if (cache_state()->is_used(expected)) {
1293 SpillRegister(expected);
1294 }
1295 uxtb(expected.gp(), expected.gp());
1296 break;
1297 case StoreType::kI64Store16:
1298 result_high = result.high_gp();
1299 result = result.low();
1300 new_value = new_value.low();
1301 expected = expected.low();
1302 V8_FALLTHROUGH;
1303 case StoreType::kI32Store16:
1304 load = &Assembler::ldrexh;
1305 store = &Assembler::strexh;
1306 // We have to clear the high bits of {expected}, as we can only do a
1307 // 32-bit comparison. If the {expected} register is used, we spill it
1308 // first.
1309 if (cache_state()->is_used(expected)) {
1310 SpillRegister(expected);
1311 }
1312 uxth(expected.gp(), expected.gp());
1313 break;
1314 case StoreType::kI64Store32:
1315 result_high = result.high_gp();
1316 result = result.low();
1317 new_value = new_value.low();
1318 expected = expected.low();
1319 V8_FALLTHROUGH;
1320 case StoreType::kI32Store:
1321 load = &Assembler::ldrex;
1322 store = &Assembler::strex;
1323 break;
1324 default:
1325 UNREACHABLE();
1326 }
1327 pinned.set(new_value);
1328 pinned.set(expected);
1329
1330 Register result_reg = result.gp();
1331 if (pinned.has(result)) {
1332 result_reg = GetUnusedRegister(kGpReg, pinned).gp();
1333 }
1334 pinned.set(LiftoffRegister(result));
1335 Register store_result = GetUnusedRegister(kGpReg, pinned).gp();
1336
1337 UseScratchRegisterScope temps(this);
1338 Register actual_addr = liftoff::CalculateActualAddress(
1339 this, &temps, dst_addr, offset_reg, offset_imm);
1340
1341 Label retry;
1342 Label done;
1343 dmb(ISH);
1344 bind(&retry);
1345 (this->*load)(result.gp(), actual_addr, al);
1346 cmp(result.gp(), expected.gp());
1347 b(ne, &done);
1348 (this->*store)(store_result, new_value.gp(), actual_addr, al);
1349 cmp(store_result, Operand(0));
1350 b(ne, &retry);
1351 dmb(ISH);
1352 bind(&done);
1353
1354 if (result.gp() != result_reg) {
1355 mov(result.gp(), result_reg);
1356 }
1357 if (result_high != no_reg) {
1358 LoadConstant(LiftoffRegister(result_high), WasmValue(0));
1359 }
1360 }
1361
AtomicFence()1362 void LiftoffAssembler::AtomicFence() { dmb(ISH); }
1363
LoadCallerFrameSlot(LiftoffRegister dst,uint32_t caller_slot_idx,ValueKind kind)1364 void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst,
1365 uint32_t caller_slot_idx,
1366 ValueKind kind) {
1367 MemOperand src(fp, (caller_slot_idx + 1) * kSystemPointerSize);
1368 liftoff::Load(this, dst, src, kind);
1369 }
1370
StoreCallerFrameSlot(LiftoffRegister src,uint32_t caller_slot_idx,ValueKind kind)1371 void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src,
1372 uint32_t caller_slot_idx,
1373 ValueKind kind) {
1374 MemOperand dst(fp, (caller_slot_idx + 1) * kSystemPointerSize);
1375 liftoff::Store(this, src, dst, kind);
1376 }
1377
LoadReturnStackSlot(LiftoffRegister dst,int offset,ValueKind kind)1378 void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister dst, int offset,
1379 ValueKind kind) {
1380 MemOperand src(sp, offset);
1381 liftoff::Load(this, dst, src, kind);
1382 }
1383
MoveStackValue(uint32_t dst_offset,uint32_t src_offset,ValueKind kind)1384 void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset,
1385 ValueKind kind) {
1386 DCHECK_NE(dst_offset, src_offset);
1387 LiftoffRegister reg = GetUnusedRegister(reg_class_for(kind), {});
1388 Fill(reg, src_offset, kind);
1389 Spill(dst_offset, reg, kind);
1390 }
1391
Move(Register dst,Register src,ValueKind kind)1392 void LiftoffAssembler::Move(Register dst, Register src, ValueKind kind) {
1393 DCHECK_NE(dst, src);
1394 DCHECK(kind == kI32 || is_reference(kind));
1395 TurboAssembler::Move(dst, src);
1396 }
1397
Move(DoubleRegister dst,DoubleRegister src,ValueKind kind)1398 void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src,
1399 ValueKind kind) {
1400 DCHECK_NE(dst, src);
1401 if (kind == kF32) {
1402 vmov(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src));
1403 } else if (kind == kF64) {
1404 vmov(dst, src);
1405 } else {
1406 DCHECK_EQ(kS128, kind);
1407 vmov(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
1408 }
1409 }
1410
Spill(int offset,LiftoffRegister reg,ValueKind kind)1411 void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueKind kind) {
1412 // The {str} instruction needs a temp register when the immediate in the
1413 // provided MemOperand does not fit into 12 bits. This happens for large stack
1414 // frames. This DCHECK checks that the temp register is available when needed.
1415 DCHECK(UseScratchRegisterScope{this}.CanAcquire());
1416 DCHECK_LT(0, offset);
1417 RecordUsedSpillOffset(offset);
1418 MemOperand dst(fp, -offset);
1419 liftoff::Store(this, reg, dst, kind);
1420 }
1421
Spill(int offset,WasmValue value)1422 void LiftoffAssembler::Spill(int offset, WasmValue value) {
1423 RecordUsedSpillOffset(offset);
1424 MemOperand dst = liftoff::GetStackSlot(offset);
1425 UseScratchRegisterScope temps(this);
1426 Register src = no_reg;
1427 // The scratch register will be required by str if multiple instructions
1428 // are required to encode the offset, and so we cannot use it in that case.
1429 if (!ImmediateFitsAddrMode2Instruction(dst.offset())) {
1430 src = GetUnusedRegister(kGpReg, {}).gp();
1431 } else {
1432 src = temps.Acquire();
1433 }
1434 switch (value.type().kind()) {
1435 case kI32:
1436 mov(src, Operand(value.to_i32()));
1437 str(src, dst);
1438 break;
1439 case kI64: {
1440 int32_t low_word = value.to_i64();
1441 mov(src, Operand(low_word));
1442 str(src, liftoff::GetHalfStackSlot(offset, kLowWord));
1443 int32_t high_word = value.to_i64() >> 32;
1444 mov(src, Operand(high_word));
1445 str(src, liftoff::GetHalfStackSlot(offset, kHighWord));
1446 break;
1447 }
1448 default:
1449 // We do not track f32 and f64 constants, hence they are unreachable.
1450 UNREACHABLE();
1451 }
1452 }
1453
Fill(LiftoffRegister reg,int offset,ValueKind kind)1454 void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueKind kind) {
1455 liftoff::Load(this, reg, liftoff::GetStackSlot(offset), kind);
1456 }
1457
FillI64Half(Register reg,int offset,RegPairHalf half)1458 void LiftoffAssembler::FillI64Half(Register reg, int offset, RegPairHalf half) {
1459 ldr(reg, liftoff::GetHalfStackSlot(offset, half));
1460 }
1461
FillStackSlotsWithZero(int start,int size)1462 void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) {
1463 DCHECK_LT(0, size);
1464 DCHECK_EQ(0, size % 4);
1465 RecordUsedSpillOffset(start + size);
1466
1467 // We need a zero reg. Always use r0 for that, and push it before to restore
1468 // its value afterwards.
1469 push(r0);
1470 mov(r0, Operand(0));
1471
1472 if (size <= 36) {
1473 // Special straight-line code for up to 9 words. Generates one
1474 // instruction per word.
1475 for (int offset = 4; offset <= size; offset += 4) {
1476 str(r0, liftoff::GetHalfStackSlot(start + offset, kLowWord));
1477 }
1478 } else {
1479 // General case for bigger counts (9 instructions).
1480 // Use r1 for start address (inclusive), r2 for end address (exclusive).
1481 push(r1);
1482 push(r2);
1483 sub(r1, fp, Operand(start + size));
1484 sub(r2, fp, Operand(start));
1485
1486 Label loop;
1487 bind(&loop);
1488 str(r0, MemOperand(r1, /* offset */ kSystemPointerSize, PostIndex));
1489 cmp(r1, r2);
1490 b(&loop, ne);
1491
1492 pop(r2);
1493 pop(r1);
1494 }
1495
1496 pop(r0);
1497 }
1498
1499 #define I32_BINOP(name, instruction) \
1500 void LiftoffAssembler::emit_##name(Register dst, Register lhs, \
1501 Register rhs) { \
1502 instruction(dst, lhs, rhs); \
1503 }
1504 #define I32_BINOP_I(name, instruction) \
1505 I32_BINOP(name, instruction) \
1506 void LiftoffAssembler::emit_##name##i(Register dst, Register lhs, \
1507 int32_t imm) { \
1508 instruction(dst, lhs, Operand(imm)); \
1509 }
1510 #define I32_SHIFTOP(name, instruction) \
1511 void LiftoffAssembler::emit_##name(Register dst, Register src, \
1512 Register amount) { \
1513 UseScratchRegisterScope temps(this); \
1514 Register scratch = temps.Acquire(); \
1515 and_(scratch, amount, Operand(0x1f)); \
1516 instruction(dst, src, Operand(scratch)); \
1517 } \
1518 void LiftoffAssembler::emit_##name##i(Register dst, Register src, \
1519 int32_t amount) { \
1520 if (V8_LIKELY((amount & 31) != 0)) { \
1521 instruction(dst, src, Operand(amount & 31)); \
1522 } else if (dst != src) { \
1523 mov(dst, src); \
1524 } \
1525 }
1526 #define FP32_UNOP(name, instruction) \
1527 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
1528 instruction(liftoff::GetFloatRegister(dst), \
1529 liftoff::GetFloatRegister(src)); \
1530 }
1531 #define FP32_BINOP(name, instruction) \
1532 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
1533 DoubleRegister rhs) { \
1534 instruction(liftoff::GetFloatRegister(dst), \
1535 liftoff::GetFloatRegister(lhs), \
1536 liftoff::GetFloatRegister(rhs)); \
1537 }
1538 #define FP64_UNOP(name, instruction) \
1539 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \
1540 instruction(dst, src); \
1541 }
1542 #define FP64_BINOP(name, instruction) \
1543 void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \
1544 DoubleRegister rhs) { \
1545 instruction(dst, lhs, rhs); \
1546 }
1547
I32_BINOP_I(i32_add,add)1548 I32_BINOP_I(i32_add, add)
1549 I32_BINOP_I(i32_sub, sub)
1550 I32_BINOP(i32_mul, mul)
1551 I32_BINOP_I(i32_and, and_)
1552 I32_BINOP_I(i32_or, orr)
1553 I32_BINOP_I(i32_xor, eor)
1554 I32_SHIFTOP(i32_shl, lsl)
1555 I32_SHIFTOP(i32_sar, asr)
1556 I32_SHIFTOP(i32_shr, lsr)
1557 FP32_BINOP(f32_add, vadd)
1558 FP32_BINOP(f32_sub, vsub)
1559 FP32_BINOP(f32_mul, vmul)
1560 FP32_BINOP(f32_div, vdiv)
1561 FP32_UNOP(f32_abs, vabs)
1562 FP32_UNOP(f32_neg, vneg)
1563 FP32_UNOP(f32_sqrt, vsqrt)
1564 FP64_BINOP(f64_add, vadd)
1565 FP64_BINOP(f64_sub, vsub)
1566 FP64_BINOP(f64_mul, vmul)
1567 FP64_BINOP(f64_div, vdiv)
1568 FP64_UNOP(f64_abs, vabs)
1569 FP64_UNOP(f64_neg, vneg)
1570 FP64_UNOP(f64_sqrt, vsqrt)
1571
1572 #undef I32_BINOP
1573 #undef I32_SHIFTOP
1574 #undef FP32_UNOP
1575 #undef FP32_BINOP
1576 #undef FP64_UNOP
1577 #undef FP64_BINOP
1578
1579 void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
1580 clz(dst, src);
1581 }
1582
emit_i32_ctz(Register dst,Register src)1583 void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
1584 rbit(dst, src);
1585 clz(dst, dst);
1586 }
1587
1588 namespace liftoff {
GeneratePopCnt(Assembler * assm,Register dst,Register src,Register scratch1,Register scratch2)1589 inline void GeneratePopCnt(Assembler* assm, Register dst, Register src,
1590 Register scratch1, Register scratch2) {
1591 DCHECK(!AreAliased(dst, scratch1, scratch2));
1592 if (src == scratch1) std::swap(scratch1, scratch2);
1593 // x = x - ((x & (0x55555555 << 1)) >> 1)
1594 assm->and_(scratch1, src, Operand(0xaaaaaaaa));
1595 assm->sub(dst, src, Operand(scratch1, LSR, 1));
1596 // x = (x & 0x33333333) + ((x & (0x33333333 << 2)) >> 2)
1597 assm->mov(scratch1, Operand(0x33333333));
1598 assm->and_(scratch2, dst, Operand(scratch1, LSL, 2));
1599 assm->and_(scratch1, dst, scratch1);
1600 assm->add(dst, scratch1, Operand(scratch2, LSR, 2));
1601 // x = (x + (x >> 4)) & 0x0F0F0F0F
1602 assm->add(dst, dst, Operand(dst, LSR, 4));
1603 assm->and_(dst, dst, Operand(0x0f0f0f0f));
1604 // x = x + (x >> 8)
1605 assm->add(dst, dst, Operand(dst, LSR, 8));
1606 // x = x + (x >> 16)
1607 assm->add(dst, dst, Operand(dst, LSR, 16));
1608 // x = x & 0x3F
1609 assm->and_(dst, dst, Operand(0x3f));
1610 }
1611 } // namespace liftoff
1612
emit_i32_popcnt(Register dst,Register src)1613 bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
1614 LiftoffRegList pinned = {dst};
1615 Register scratch1 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
1616 Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
1617 liftoff::GeneratePopCnt(this, dst, src, scratch1, scratch2);
1618 return true;
1619 }
1620
emit_i32_divs(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1621 void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
1622 Label* trap_div_by_zero,
1623 Label* trap_div_unrepresentable) {
1624 if (!CpuFeatures::IsSupported(SUDIV)) {
1625 bailout(kMissingCPUFeature, "i32_divs");
1626 return;
1627 }
1628 CpuFeatureScope scope(this, SUDIV);
1629 // Issue division early so we can perform the trapping checks whilst it
1630 // completes.
1631 bool speculative_sdiv = dst != lhs && dst != rhs;
1632 if (speculative_sdiv) {
1633 sdiv(dst, lhs, rhs);
1634 }
1635 Label noTrap;
1636 // Check for division by zero.
1637 cmp(rhs, Operand(0));
1638 b(trap_div_by_zero, eq);
1639 // Check for kMinInt / -1. This is unrepresentable.
1640 cmp(rhs, Operand(-1));
1641 b(&noTrap, ne);
1642 cmp(lhs, Operand(kMinInt));
1643 b(trap_div_unrepresentable, eq);
1644 bind(&noTrap);
1645 if (!speculative_sdiv) {
1646 sdiv(dst, lhs, rhs);
1647 }
1648 }
1649
emit_i32_divu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1650 void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs,
1651 Label* trap_div_by_zero) {
1652 if (!CpuFeatures::IsSupported(SUDIV)) {
1653 bailout(kMissingCPUFeature, "i32_divu");
1654 return;
1655 }
1656 CpuFeatureScope scope(this, SUDIV);
1657 // Check for division by zero.
1658 cmp(rhs, Operand(0));
1659 b(trap_div_by_zero, eq);
1660 udiv(dst, lhs, rhs);
1661 }
1662
emit_i32_rems(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1663 void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs,
1664 Label* trap_div_by_zero) {
1665 if (!CpuFeatures::IsSupported(SUDIV)) {
1666 // When this case is handled, a check for ARMv7 is required to use mls.
1667 // Mls support is implied with SUDIV support.
1668 bailout(kMissingCPUFeature, "i32_rems");
1669 return;
1670 }
1671 CpuFeatureScope scope(this, SUDIV);
1672 // No need to check kMinInt / -1 because the result is kMinInt and then
1673 // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0.
1674 UseScratchRegisterScope temps(this);
1675 Register scratch = temps.Acquire();
1676 sdiv(scratch, lhs, rhs);
1677 // Check for division by zero.
1678 cmp(rhs, Operand(0));
1679 b(trap_div_by_zero, eq);
1680 // Compute remainder.
1681 mls(dst, scratch, rhs, lhs);
1682 }
1683
emit_i32_remu(Register dst,Register lhs,Register rhs,Label * trap_div_by_zero)1684 void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs,
1685 Label* trap_div_by_zero) {
1686 if (!CpuFeatures::IsSupported(SUDIV)) {
1687 // When this case is handled, a check for ARMv7 is required to use mls.
1688 // Mls support is implied with SUDIV support.
1689 bailout(kMissingCPUFeature, "i32_remu");
1690 return;
1691 }
1692 CpuFeatureScope scope(this, SUDIV);
1693 // No need to check kMinInt / -1 because the result is kMinInt and then
1694 // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0.
1695 UseScratchRegisterScope temps(this);
1696 Register scratch = temps.Acquire();
1697 udiv(scratch, lhs, rhs);
1698 // Check for division by zero.
1699 cmp(rhs, Operand(0));
1700 b(trap_div_by_zero, eq);
1701 // Compute remainder.
1702 mls(dst, scratch, rhs, lhs);
1703 }
1704
emit_i64_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1705 void LiftoffAssembler::emit_i64_add(LiftoffRegister dst, LiftoffRegister lhs,
1706 LiftoffRegister rhs) {
1707 liftoff::I64Binop<&Assembler::add, &Assembler::adc>(this, dst, lhs, rhs);
1708 }
1709
emit_i64_addi(LiftoffRegister dst,LiftoffRegister lhs,int64_t imm)1710 void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs,
1711 int64_t imm) {
1712 liftoff::I64BinopI<&Assembler::add, &Assembler::adc>(this, dst, lhs, imm);
1713 }
1714
emit_i64_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1715 void LiftoffAssembler::emit_i64_sub(LiftoffRegister dst, LiftoffRegister lhs,
1716 LiftoffRegister rhs) {
1717 liftoff::I64Binop<&Assembler::sub, &Assembler::sbc>(this, dst, lhs, rhs);
1718 }
1719
emit_i64_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)1720 void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs,
1721 LiftoffRegister rhs) {
1722 // Idea:
1723 // [ lhs_hi | lhs_lo ] * [ rhs_hi | rhs_lo ]
1724 // = [ lhs_hi * rhs_lo | ] (32 bit mul, shift 32)
1725 // + [ lhs_lo * rhs_hi | ] (32 bit mul, shift 32)
1726 // + [ lhs_lo * rhs_lo ] (32x32->64 mul, shift 0)
1727 UseScratchRegisterScope temps(this);
1728 Register scratch = temps.Acquire();
1729 // scratch = lhs_hi * rhs_lo
1730 mul(scratch, lhs.high_gp(), rhs.low_gp());
1731 // scratch += lhs_lo * rhs_hi
1732 mla(scratch, lhs.low_gp(), rhs.high_gp(), scratch);
1733 // TODO(arm): use umlal once implemented correctly in the simulator.
1734 // [dst_hi|dst_lo] = lhs_lo * rhs_lo
1735 umull(dst.low_gp(), dst.high_gp(), lhs.low_gp(), rhs.low_gp());
1736 // dst_hi += scratch
1737 add(dst.high_gp(), dst.high_gp(), scratch);
1738 }
1739
emit_i64_divs(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero,Label * trap_div_unrepresentable)1740 bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs,
1741 LiftoffRegister rhs,
1742 Label* trap_div_by_zero,
1743 Label* trap_div_unrepresentable) {
1744 return false;
1745 }
1746
emit_i64_divu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1747 bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs,
1748 LiftoffRegister rhs,
1749 Label* trap_div_by_zero) {
1750 return false;
1751 }
1752
emit_i64_rems(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1753 bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs,
1754 LiftoffRegister rhs,
1755 Label* trap_div_by_zero) {
1756 return false;
1757 }
1758
emit_i64_remu(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,Label * trap_div_by_zero)1759 bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs,
1760 LiftoffRegister rhs,
1761 Label* trap_div_by_zero) {
1762 return false;
1763 }
1764
emit_i64_shl(LiftoffRegister dst,LiftoffRegister src,Register amount)1765 void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
1766 Register amount) {
1767 liftoff::I64Shiftop<&TurboAssembler::LslPair, true>(this, dst, src, amount);
1768 }
1769
emit_i64_shli(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1770 void LiftoffAssembler::emit_i64_shli(LiftoffRegister dst, LiftoffRegister src,
1771 int32_t amount) {
1772 UseScratchRegisterScope temps(this);
1773 // {src.low_gp()} will still be needed after writing {dst.high_gp()}.
1774 Register src_low =
1775 liftoff::EnsureNoAlias(this, src.low_gp(), dst.high_gp(), &temps);
1776
1777 LslPair(dst.low_gp(), dst.high_gp(), src_low, src.high_gp(), amount & 63);
1778 }
1779
emit_i64_sar(LiftoffRegister dst,LiftoffRegister src,Register amount)1780 void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
1781 Register amount) {
1782 liftoff::I64Shiftop<&TurboAssembler::AsrPair, false>(this, dst, src, amount);
1783 }
1784
emit_i64_sari(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1785 void LiftoffAssembler::emit_i64_sari(LiftoffRegister dst, LiftoffRegister src,
1786 int32_t amount) {
1787 UseScratchRegisterScope temps(this);
1788 // {src.high_gp()} will still be needed after writing {dst.low_gp()}.
1789 Register src_high =
1790 liftoff::EnsureNoAlias(this, src.high_gp(), dst.low_gp(), &temps);
1791
1792 AsrPair(dst.low_gp(), dst.high_gp(), src.low_gp(), src_high, amount & 63);
1793 }
1794
emit_i64_shr(LiftoffRegister dst,LiftoffRegister src,Register amount)1795 void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
1796 Register amount) {
1797 liftoff::I64Shiftop<&TurboAssembler::LsrPair, false>(this, dst, src, amount);
1798 }
1799
emit_i64_shri(LiftoffRegister dst,LiftoffRegister src,int32_t amount)1800 void LiftoffAssembler::emit_i64_shri(LiftoffRegister dst, LiftoffRegister src,
1801 int32_t amount) {
1802 UseScratchRegisterScope temps(this);
1803 // {src.high_gp()} will still be needed after writing {dst.low_gp()}.
1804 Register src_high =
1805 liftoff::EnsureNoAlias(this, src.high_gp(), dst.low_gp(), &temps);
1806
1807 LsrPair(dst.low_gp(), dst.high_gp(), src.low_gp(), src_high, amount & 63);
1808 }
1809
emit_i64_clz(LiftoffRegister dst,LiftoffRegister src)1810 void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
1811 // return high == 0 ? 32 + CLZ32(low) : CLZ32(high);
1812 Label done;
1813 Label high_is_zero;
1814 cmp(src.high_gp(), Operand(0));
1815 b(&high_is_zero, eq);
1816
1817 clz(dst.low_gp(), src.high_gp());
1818 jmp(&done);
1819
1820 bind(&high_is_zero);
1821 clz(dst.low_gp(), src.low_gp());
1822 add(dst.low_gp(), dst.low_gp(), Operand(32));
1823
1824 bind(&done);
1825 mov(dst.high_gp(), Operand(0)); // High word of result is always 0.
1826 }
1827
emit_i64_ctz(LiftoffRegister dst,LiftoffRegister src)1828 void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
1829 // return low == 0 ? 32 + CTZ32(high) : CTZ32(low);
1830 // CTZ32(x) = CLZ(RBIT(x))
1831 Label done;
1832 Label low_is_zero;
1833 cmp(src.low_gp(), Operand(0));
1834 b(&low_is_zero, eq);
1835
1836 rbit(dst.low_gp(), src.low_gp());
1837 clz(dst.low_gp(), dst.low_gp());
1838 jmp(&done);
1839
1840 bind(&low_is_zero);
1841 rbit(dst.low_gp(), src.high_gp());
1842 clz(dst.low_gp(), dst.low_gp());
1843 add(dst.low_gp(), dst.low_gp(), Operand(32));
1844
1845 bind(&done);
1846 mov(dst.high_gp(), Operand(0)); // High word of result is always 0.
1847 }
1848
emit_i64_popcnt(LiftoffRegister dst,LiftoffRegister src)1849 bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
1850 LiftoffRegister src) {
1851 // Produce partial popcnts in the two dst registers, making sure not to
1852 // overwrite the second src register before using it.
1853 Register src1 = src.high_gp() == dst.low_gp() ? src.high_gp() : src.low_gp();
1854 Register src2 = src.high_gp() == dst.low_gp() ? src.low_gp() : src.high_gp();
1855 LiftoffRegList pinned = {dst, src2};
1856 Register scratch1 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp();
1857 Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp();
1858 liftoff::GeneratePopCnt(this, dst.low_gp(), src1, scratch1, scratch2);
1859 liftoff::GeneratePopCnt(this, dst.high_gp(), src2, scratch1, scratch2);
1860 // Now add the two into the lower dst reg and clear the higher dst reg.
1861 add(dst.low_gp(), dst.low_gp(), dst.high_gp());
1862 mov(dst.high_gp(), Operand(0));
1863 return true;
1864 }
1865
IncrementSmi(LiftoffRegister dst,int offset)1866 void LiftoffAssembler::IncrementSmi(LiftoffRegister dst, int offset) {
1867 UseScratchRegisterScope temps(this);
1868 Register scratch = temps.Acquire();
1869 ldr(scratch, MemOperand(dst.gp(), offset));
1870 add(scratch, scratch, Operand(Smi::FromInt(1)));
1871 str(scratch, MemOperand(dst.gp(), offset));
1872 }
1873
emit_f32_ceil(DoubleRegister dst,DoubleRegister src)1874 bool LiftoffAssembler::emit_f32_ceil(DoubleRegister dst, DoubleRegister src) {
1875 if (CpuFeatures::IsSupported(ARMv8)) {
1876 CpuFeatureScope scope(this, ARMv8);
1877 vrintp(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src));
1878 return true;
1879 }
1880 return false;
1881 }
1882
emit_f32_floor(DoubleRegister dst,DoubleRegister src)1883 bool LiftoffAssembler::emit_f32_floor(DoubleRegister dst, DoubleRegister src) {
1884 if (CpuFeatures::IsSupported(ARMv8)) {
1885 CpuFeatureScope scope(this, ARMv8);
1886 vrintm(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src));
1887 return true;
1888 }
1889 return false;
1890 }
1891
emit_f32_trunc(DoubleRegister dst,DoubleRegister src)1892 bool LiftoffAssembler::emit_f32_trunc(DoubleRegister dst, DoubleRegister src) {
1893 if (CpuFeatures::IsSupported(ARMv8)) {
1894 CpuFeatureScope scope(this, ARMv8);
1895 vrintz(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src));
1896 return true;
1897 }
1898 return false;
1899 }
1900
emit_f32_nearest_int(DoubleRegister dst,DoubleRegister src)1901 bool LiftoffAssembler::emit_f32_nearest_int(DoubleRegister dst,
1902 DoubleRegister src) {
1903 if (CpuFeatures::IsSupported(ARMv8)) {
1904 CpuFeatureScope scope(this, ARMv8);
1905 vrintn(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src));
1906 return true;
1907 }
1908 return false;
1909 }
1910
emit_f32_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1911 void LiftoffAssembler::emit_f32_min(DoubleRegister dst, DoubleRegister lhs,
1912 DoubleRegister rhs) {
1913 liftoff::EmitFloatMinOrMax(
1914 this, liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(lhs),
1915 liftoff::GetFloatRegister(rhs), liftoff::MinOrMax::kMin);
1916 }
1917
emit_f32_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1918 void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs,
1919 DoubleRegister rhs) {
1920 liftoff::EmitFloatMinOrMax(
1921 this, liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(lhs),
1922 liftoff::GetFloatRegister(rhs), liftoff::MinOrMax::kMax);
1923 }
1924
emit_f64_ceil(DoubleRegister dst,DoubleRegister src)1925 bool LiftoffAssembler::emit_f64_ceil(DoubleRegister dst, DoubleRegister src) {
1926 if (CpuFeatures::IsSupported(ARMv8)) {
1927 CpuFeatureScope scope(this, ARMv8);
1928 vrintp(dst, src);
1929 return true;
1930 }
1931 return false;
1932 }
1933
emit_f64_floor(DoubleRegister dst,DoubleRegister src)1934 bool LiftoffAssembler::emit_f64_floor(DoubleRegister dst, DoubleRegister src) {
1935 if (CpuFeatures::IsSupported(ARMv8)) {
1936 CpuFeatureScope scope(this, ARMv8);
1937 vrintm(dst, src);
1938 return true;
1939 }
1940 return false;
1941 }
1942
emit_f64_trunc(DoubleRegister dst,DoubleRegister src)1943 bool LiftoffAssembler::emit_f64_trunc(DoubleRegister dst, DoubleRegister src) {
1944 if (CpuFeatures::IsSupported(ARMv8)) {
1945 CpuFeatureScope scope(this, ARMv8);
1946 vrintz(dst, src);
1947 return true;
1948 }
1949 return false;
1950 }
1951
emit_f64_nearest_int(DoubleRegister dst,DoubleRegister src)1952 bool LiftoffAssembler::emit_f64_nearest_int(DoubleRegister dst,
1953 DoubleRegister src) {
1954 if (CpuFeatures::IsSupported(ARMv8)) {
1955 CpuFeatureScope scope(this, ARMv8);
1956 vrintn(dst, src);
1957 return true;
1958 }
1959 return false;
1960 }
1961
emit_f64_min(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1962 void LiftoffAssembler::emit_f64_min(DoubleRegister dst, DoubleRegister lhs,
1963 DoubleRegister rhs) {
1964 liftoff::EmitFloatMinOrMax(this, dst, lhs, rhs, liftoff::MinOrMax::kMin);
1965 }
1966
emit_f64_max(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1967 void LiftoffAssembler::emit_f64_max(DoubleRegister dst, DoubleRegister lhs,
1968 DoubleRegister rhs) {
1969 liftoff::EmitFloatMinOrMax(this, dst, lhs, rhs, liftoff::MinOrMax::kMax);
1970 }
1971
emit_f32_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1972 void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs,
1973 DoubleRegister rhs) {
1974 constexpr uint32_t kF32SignBit = uint32_t{1} << 31;
1975 UseScratchRegisterScope temps(this);
1976 Register scratch = GetUnusedRegister(kGpReg, {}).gp();
1977 Register scratch2 = temps.Acquire();
1978 VmovLow(scratch, lhs);
1979 // Clear sign bit in {scratch}.
1980 bic(scratch, scratch, Operand(kF32SignBit));
1981 VmovLow(scratch2, rhs);
1982 // Isolate sign bit in {scratch2}.
1983 and_(scratch2, scratch2, Operand(kF32SignBit));
1984 // Combine {scratch2} into {scratch}.
1985 orr(scratch, scratch, scratch2);
1986 VmovLow(dst, scratch);
1987 }
1988
emit_f64_copysign(DoubleRegister dst,DoubleRegister lhs,DoubleRegister rhs)1989 void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs,
1990 DoubleRegister rhs) {
1991 constexpr uint32_t kF64SignBitHighWord = uint32_t{1} << 31;
1992 // On arm, we cannot hold the whole f64 value in a gp register, so we just
1993 // operate on the upper half (UH).
1994 UseScratchRegisterScope temps(this);
1995 Register scratch = GetUnusedRegister(kGpReg, {}).gp();
1996 Register scratch2 = temps.Acquire();
1997 VmovHigh(scratch, lhs);
1998 // Clear sign bit in {scratch}.
1999 bic(scratch, scratch, Operand(kF64SignBitHighWord));
2000 VmovHigh(scratch2, rhs);
2001 // Isolate sign bit in {scratch2}.
2002 and_(scratch2, scratch2, Operand(kF64SignBitHighWord));
2003 // Combine {scratch2} into {scratch}.
2004 orr(scratch, scratch, scratch2);
2005 vmov(dst, lhs);
2006 VmovHigh(dst, scratch);
2007 }
2008
emit_type_conversion(WasmOpcode opcode,LiftoffRegister dst,LiftoffRegister src,Label * trap)2009 bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode,
2010 LiftoffRegister dst,
2011 LiftoffRegister src, Label* trap) {
2012 switch (opcode) {
2013 case kExprI32ConvertI64:
2014 TurboAssembler::Move(dst.gp(), src.low_gp());
2015 return true;
2016 case kExprI32SConvertF32: {
2017 UseScratchRegisterScope temps(this);
2018 SwVfpRegister scratch_f = temps.AcquireS();
2019 vcvt_s32_f32(
2020 scratch_f,
2021 liftoff::GetFloatRegister(src.fp())); // f32 -> i32 round to zero.
2022 vmov(dst.gp(), scratch_f);
2023 // Check underflow and NaN.
2024 vmov(scratch_f, Float32(static_cast<float>(INT32_MIN)));
2025 VFPCompareAndSetFlags(liftoff::GetFloatRegister(src.fp()), scratch_f);
2026 b(trap, lt);
2027 // Check overflow.
2028 cmp(dst.gp(), Operand(-1));
2029 b(trap, vs);
2030 return true;
2031 }
2032 case kExprI32UConvertF32: {
2033 UseScratchRegisterScope temps(this);
2034 SwVfpRegister scratch_f = temps.AcquireS();
2035 vcvt_u32_f32(
2036 scratch_f,
2037 liftoff::GetFloatRegister(src.fp())); // f32 -> i32 round to zero.
2038 vmov(dst.gp(), scratch_f);
2039 // Check underflow and NaN.
2040 vmov(scratch_f, Float32(-1.0f));
2041 VFPCompareAndSetFlags(liftoff::GetFloatRegister(src.fp()), scratch_f);
2042 b(trap, le);
2043 // Check overflow.
2044 cmp(dst.gp(), Operand(-1));
2045 b(trap, eq);
2046 return true;
2047 }
2048 case kExprI32SConvertF64: {
2049 UseScratchRegisterScope temps(this);
2050 SwVfpRegister scratch_f = temps.AcquireS();
2051 vcvt_s32_f64(scratch_f, src.fp()); // f64 -> i32 round to zero.
2052 vmov(dst.gp(), scratch_f);
2053 // Check underflow and NaN.
2054 DwVfpRegister scratch_d = temps.AcquireD();
2055 vmov(scratch_d, base::Double(static_cast<double>(INT32_MIN - 1.0)));
2056 VFPCompareAndSetFlags(src.fp(), scratch_d);
2057 b(trap, le);
2058 // Check overflow.
2059 vmov(scratch_d, base::Double(static_cast<double>(INT32_MAX + 1.0)));
2060 VFPCompareAndSetFlags(src.fp(), scratch_d);
2061 b(trap, ge);
2062 return true;
2063 }
2064 case kExprI32UConvertF64: {
2065 UseScratchRegisterScope temps(this);
2066 SwVfpRegister scratch_f = temps.AcquireS();
2067 vcvt_u32_f64(scratch_f, src.fp()); // f64 -> i32 round to zero.
2068 vmov(dst.gp(), scratch_f);
2069 // Check underflow and NaN.
2070 DwVfpRegister scratch_d = temps.AcquireD();
2071 vmov(scratch_d, base::Double(static_cast<double>(-1.0)));
2072 VFPCompareAndSetFlags(src.fp(), scratch_d);
2073 b(trap, le);
2074 // Check overflow.
2075 vmov(scratch_d, base::Double(static_cast<double>(UINT32_MAX + 1.0)));
2076 VFPCompareAndSetFlags(src.fp(), scratch_d);
2077 b(trap, ge);
2078 return true;
2079 }
2080 case kExprI32SConvertSatF32: {
2081 UseScratchRegisterScope temps(this);
2082 SwVfpRegister scratch_f = temps.AcquireS();
2083 vcvt_s32_f32(
2084 scratch_f,
2085 liftoff::GetFloatRegister(src.fp())); // f32 -> i32 round to zero.
2086 vmov(dst.gp(), scratch_f);
2087 return true;
2088 }
2089 case kExprI32UConvertSatF32: {
2090 UseScratchRegisterScope temps(this);
2091 SwVfpRegister scratch_f = temps.AcquireS();
2092 vcvt_u32_f32(
2093 scratch_f,
2094 liftoff::GetFloatRegister(src.fp())); // f32 -> u32 round to zero.
2095 vmov(dst.gp(), scratch_f);
2096 return true;
2097 }
2098 case kExprI32SConvertSatF64: {
2099 UseScratchRegisterScope temps(this);
2100 SwVfpRegister scratch_f = temps.AcquireS();
2101 vcvt_s32_f64(scratch_f, src.fp()); // f64 -> i32 round to zero.
2102 vmov(dst.gp(), scratch_f);
2103 return true;
2104 }
2105 case kExprI32UConvertSatF64: {
2106 UseScratchRegisterScope temps(this);
2107 SwVfpRegister scratch_f = temps.AcquireS();
2108 vcvt_u32_f64(scratch_f, src.fp()); // f64 -> u32 round to zero.
2109 vmov(dst.gp(), scratch_f);
2110 return true;
2111 }
2112 case kExprI32ReinterpretF32:
2113 vmov(dst.gp(), liftoff::GetFloatRegister(src.fp()));
2114 return true;
2115 case kExprI64SConvertI32:
2116 if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp());
2117 mov(dst.high_gp(), Operand(src.gp(), ASR, 31));
2118 return true;
2119 case kExprI64UConvertI32:
2120 if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp());
2121 mov(dst.high_gp(), Operand(0));
2122 return true;
2123 case kExprI64ReinterpretF64:
2124 vmov(dst.low_gp(), dst.high_gp(), src.fp());
2125 return true;
2126 case kExprF32SConvertI32: {
2127 SwVfpRegister dst_float = liftoff::GetFloatRegister(dst.fp());
2128 vmov(dst_float, src.gp());
2129 vcvt_f32_s32(dst_float, dst_float);
2130 return true;
2131 }
2132 case kExprF32UConvertI32: {
2133 SwVfpRegister dst_float = liftoff::GetFloatRegister(dst.fp());
2134 vmov(dst_float, src.gp());
2135 vcvt_f32_u32(dst_float, dst_float);
2136 return true;
2137 }
2138 case kExprF32ConvertF64:
2139 vcvt_f32_f64(liftoff::GetFloatRegister(dst.fp()), src.fp());
2140 return true;
2141 case kExprF32ReinterpretI32:
2142 vmov(liftoff::GetFloatRegister(dst.fp()), src.gp());
2143 return true;
2144 case kExprF64SConvertI32: {
2145 vmov(liftoff::GetFloatRegister(dst.fp()), src.gp());
2146 vcvt_f64_s32(dst.fp(), liftoff::GetFloatRegister(dst.fp()));
2147 return true;
2148 }
2149 case kExprF64UConvertI32: {
2150 vmov(liftoff::GetFloatRegister(dst.fp()), src.gp());
2151 vcvt_f64_u32(dst.fp(), liftoff::GetFloatRegister(dst.fp()));
2152 return true;
2153 }
2154 case kExprF64ConvertF32:
2155 vcvt_f64_f32(dst.fp(), liftoff::GetFloatRegister(src.fp()));
2156 return true;
2157 case kExprF64ReinterpretI64:
2158 vmov(dst.fp(), src.low_gp(), src.high_gp());
2159 return true;
2160 case kExprF64SConvertI64:
2161 case kExprF64UConvertI64:
2162 case kExprI64SConvertF32:
2163 case kExprI64UConvertF32:
2164 case kExprI64SConvertSatF32:
2165 case kExprI64UConvertSatF32:
2166 case kExprF32SConvertI64:
2167 case kExprF32UConvertI64:
2168 case kExprI64SConvertF64:
2169 case kExprI64UConvertF64:
2170 case kExprI64SConvertSatF64:
2171 case kExprI64UConvertSatF64:
2172 // These cases can be handled by the C fallback function.
2173 return false;
2174 default:
2175 UNREACHABLE();
2176 }
2177 }
2178
emit_i32_signextend_i8(Register dst,Register src)2179 void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) {
2180 sxtb(dst, src);
2181 }
2182
emit_i32_signextend_i16(Register dst,Register src)2183 void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) {
2184 sxth(dst, src);
2185 }
2186
emit_i64_signextend_i8(LiftoffRegister dst,LiftoffRegister src)2187 void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst,
2188 LiftoffRegister src) {
2189 emit_i32_signextend_i8(dst.low_gp(), src.low_gp());
2190 mov(dst.high_gp(), Operand(dst.low_gp(), ASR, 31));
2191 }
2192
emit_i64_signextend_i16(LiftoffRegister dst,LiftoffRegister src)2193 void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst,
2194 LiftoffRegister src) {
2195 emit_i32_signextend_i16(dst.low_gp(), src.low_gp());
2196 mov(dst.high_gp(), Operand(dst.low_gp(), ASR, 31));
2197 }
2198
emit_i64_signextend_i32(LiftoffRegister dst,LiftoffRegister src)2199 void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst,
2200 LiftoffRegister src) {
2201 TurboAssembler::Move(dst.low_gp(), src.low_gp());
2202 mov(dst.high_gp(), Operand(src.low_gp(), ASR, 31));
2203 }
2204
emit_jump(Label * label)2205 void LiftoffAssembler::emit_jump(Label* label) { b(label); }
2206
emit_jump(Register target)2207 void LiftoffAssembler::emit_jump(Register target) { bx(target); }
2208
emit_cond_jump(LiftoffCondition liftoff_cond,Label * label,ValueKind kind,Register lhs,Register rhs)2209 void LiftoffAssembler::emit_cond_jump(LiftoffCondition liftoff_cond,
2210 Label* label, ValueKind kind,
2211 Register lhs, Register rhs) {
2212 Condition cond = liftoff::ToCondition(liftoff_cond);
2213
2214 if (rhs == no_reg) {
2215 DCHECK_EQ(kind, kI32);
2216 cmp(lhs, Operand(0));
2217 } else {
2218 DCHECK(kind == kI32 || (is_reference(kind) && (liftoff_cond == kEqual ||
2219 liftoff_cond == kUnequal)));
2220 cmp(lhs, rhs);
2221 }
2222 b(label, cond);
2223 }
2224
emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,Label * label,Register lhs,int32_t imm)2225 void LiftoffAssembler::emit_i32_cond_jumpi(LiftoffCondition liftoff_cond,
2226 Label* label, Register lhs,
2227 int32_t imm) {
2228 Condition cond = liftoff::ToCondition(liftoff_cond);
2229 cmp(lhs, Operand(imm));
2230 b(label, cond);
2231 }
2232
emit_i32_subi_jump_negative(Register value,int subtrahend,Label * result_negative)2233 void LiftoffAssembler::emit_i32_subi_jump_negative(Register value,
2234 int subtrahend,
2235 Label* result_negative) {
2236 sub(value, value, Operand(subtrahend), SetCC);
2237 b(result_negative, mi);
2238 }
2239
emit_i32_eqz(Register dst,Register src)2240 void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) {
2241 clz(dst, src);
2242 mov(dst, Operand(dst, LSR, kRegSizeInBitsLog2));
2243 }
2244
emit_i32_set_cond(LiftoffCondition liftoff_cond,Register dst,Register lhs,Register rhs)2245 void LiftoffAssembler::emit_i32_set_cond(LiftoffCondition liftoff_cond,
2246 Register dst, Register lhs,
2247 Register rhs) {
2248 Condition cond = liftoff::ToCondition(liftoff_cond);
2249 cmp(lhs, rhs);
2250 mov(dst, Operand(0), LeaveCC);
2251 mov(dst, Operand(1), LeaveCC, cond);
2252 }
2253
emit_i64_eqz(Register dst,LiftoffRegister src)2254 void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) {
2255 orr(dst, src.low_gp(), src.high_gp());
2256 clz(dst, dst);
2257 mov(dst, Operand(dst, LSR, 5));
2258 }
2259
emit_i64_set_cond(LiftoffCondition liftoff_cond,Register dst,LiftoffRegister lhs,LiftoffRegister rhs)2260 void LiftoffAssembler::emit_i64_set_cond(LiftoffCondition liftoff_cond,
2261 Register dst, LiftoffRegister lhs,
2262 LiftoffRegister rhs) {
2263 // For signed i64 comparisons, we still need to use unsigned comparison for
2264 // the low word (the only bit carrying signedness information is the MSB in
2265 // the high word).
2266 Condition cond = liftoff::ToCondition(liftoff_cond);
2267 Condition unsigned_cond =
2268 liftoff::ToCondition(liftoff::MakeUnsigned(liftoff_cond));
2269 Label set_cond;
2270 Label cont;
2271 LiftoffRegister dest = LiftoffRegister(dst);
2272 bool speculative_move = !dest.overlaps(lhs) && !dest.overlaps(rhs);
2273 if (speculative_move) {
2274 mov(dst, Operand(0));
2275 }
2276 // Compare high word first. If it differs, use it for the set_cond. If it's
2277 // equal, compare the low word and use that for set_cond.
2278 cmp(lhs.high_gp(), rhs.high_gp());
2279 if (unsigned_cond == cond) {
2280 cmp(lhs.low_gp(), rhs.low_gp(), eq);
2281 if (!speculative_move) {
2282 mov(dst, Operand(0));
2283 }
2284 mov(dst, Operand(1), LeaveCC, cond);
2285 } else {
2286 // If the condition predicate for the low differs from that for the high
2287 // word, the conditional move instructions must be separated.
2288 b(ne, &set_cond);
2289 cmp(lhs.low_gp(), rhs.low_gp());
2290 if (!speculative_move) {
2291 mov(dst, Operand(0));
2292 }
2293 mov(dst, Operand(1), LeaveCC, unsigned_cond);
2294 b(&cont);
2295 bind(&set_cond);
2296 if (!speculative_move) {
2297 mov(dst, Operand(0));
2298 }
2299 mov(dst, Operand(1), LeaveCC, cond);
2300 bind(&cont);
2301 }
2302 }
2303
emit_f32_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2304 void LiftoffAssembler::emit_f32_set_cond(LiftoffCondition liftoff_cond,
2305 Register dst, DoubleRegister lhs,
2306 DoubleRegister rhs) {
2307 Condition cond = liftoff::ToCondition(liftoff_cond);
2308 VFPCompareAndSetFlags(liftoff::GetFloatRegister(lhs),
2309 liftoff::GetFloatRegister(rhs));
2310 mov(dst, Operand(0), LeaveCC);
2311 mov(dst, Operand(1), LeaveCC, cond);
2312 if (cond != ne) {
2313 // If V flag set, at least one of the arguments was a Nan -> false.
2314 mov(dst, Operand(0), LeaveCC, vs);
2315 }
2316 }
2317
emit_f64_set_cond(LiftoffCondition liftoff_cond,Register dst,DoubleRegister lhs,DoubleRegister rhs)2318 void LiftoffAssembler::emit_f64_set_cond(LiftoffCondition liftoff_cond,
2319 Register dst, DoubleRegister lhs,
2320 DoubleRegister rhs) {
2321 Condition cond = liftoff::ToCondition(liftoff_cond);
2322 VFPCompareAndSetFlags(lhs, rhs);
2323 mov(dst, Operand(0), LeaveCC);
2324 mov(dst, Operand(1), LeaveCC, cond);
2325 if (cond != ne) {
2326 // If V flag set, at least one of the arguments was a Nan -> false.
2327 mov(dst, Operand(0), LeaveCC, vs);
2328 }
2329 }
2330
emit_select(LiftoffRegister dst,Register condition,LiftoffRegister true_value,LiftoffRegister false_value,ValueKind kind)2331 bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition,
2332 LiftoffRegister true_value,
2333 LiftoffRegister false_value,
2334 ValueKind kind) {
2335 return false;
2336 }
2337
emit_smi_check(Register obj,Label * target,SmiCheckMode mode)2338 void LiftoffAssembler::emit_smi_check(Register obj, Label* target,
2339 SmiCheckMode mode) {
2340 tst(obj, Operand(kSmiTagMask));
2341 Condition condition = mode == kJumpOnSmi ? eq : ne;
2342 b(condition, target);
2343 }
2344
LoadTransform(LiftoffRegister dst,Register src_addr,Register offset_reg,uintptr_t offset_imm,LoadType type,LoadTransformationKind transform,uint32_t * protected_load_pc)2345 void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr,
2346 Register offset_reg, uintptr_t offset_imm,
2347 LoadType type,
2348 LoadTransformationKind transform,
2349 uint32_t* protected_load_pc) {
2350 UseScratchRegisterScope temps(this);
2351 Register actual_src_addr = liftoff::CalculateActualAddress(
2352 this, &temps, src_addr, offset_reg, offset_imm);
2353 *protected_load_pc = pc_offset();
2354 MachineType memtype = type.mem_type();
2355
2356 if (transform == LoadTransformationKind::kExtend) {
2357 if (memtype == MachineType::Int8()) {
2358 vld1(Neon8, NeonListOperand(dst.low_fp()),
2359 NeonMemOperand(actual_src_addr));
2360 vmovl(NeonS8, liftoff::GetSimd128Register(dst), dst.low_fp());
2361 } else if (memtype == MachineType::Uint8()) {
2362 vld1(Neon8, NeonListOperand(dst.low_fp()),
2363 NeonMemOperand(actual_src_addr));
2364 vmovl(NeonU8, liftoff::GetSimd128Register(dst), dst.low_fp());
2365 } else if (memtype == MachineType::Int16()) {
2366 vld1(Neon16, NeonListOperand(dst.low_fp()),
2367 NeonMemOperand(actual_src_addr));
2368 vmovl(NeonS16, liftoff::GetSimd128Register(dst), dst.low_fp());
2369 } else if (memtype == MachineType::Uint16()) {
2370 vld1(Neon16, NeonListOperand(dst.low_fp()),
2371 NeonMemOperand(actual_src_addr));
2372 vmovl(NeonU16, liftoff::GetSimd128Register(dst), dst.low_fp());
2373 } else if (memtype == MachineType::Int32()) {
2374 vld1(Neon32, NeonListOperand(dst.low_fp()),
2375 NeonMemOperand(actual_src_addr));
2376 vmovl(NeonS32, liftoff::GetSimd128Register(dst), dst.low_fp());
2377 } else if (memtype == MachineType::Uint32()) {
2378 vld1(Neon32, NeonListOperand(dst.low_fp()),
2379 NeonMemOperand(actual_src_addr));
2380 vmovl(NeonU32, liftoff::GetSimd128Register(dst), dst.low_fp());
2381 }
2382 } else if (transform == LoadTransformationKind::kZeroExtend) {
2383 Simd128Register dest = liftoff::GetSimd128Register(dst);
2384 if (memtype == MachineType::Int32()) {
2385 vmov(dest, 0);
2386 vld1s(Neon32, NeonListOperand(dst.low_fp()), 0,
2387 NeonMemOperand(actual_src_addr));
2388 } else {
2389 DCHECK_EQ(MachineType::Int64(), memtype);
2390 vmov(dest.high(), 0);
2391 vld1(Neon64, NeonListOperand(dest.low()),
2392 NeonMemOperand(actual_src_addr));
2393 }
2394 } else {
2395 DCHECK_EQ(LoadTransformationKind::kSplat, transform);
2396 if (memtype == MachineType::Int8()) {
2397 vld1r(Neon8, NeonListOperand(liftoff::GetSimd128Register(dst)),
2398 NeonMemOperand(actual_src_addr));
2399 } else if (memtype == MachineType::Int16()) {
2400 vld1r(Neon16, NeonListOperand(liftoff::GetSimd128Register(dst)),
2401 NeonMemOperand(actual_src_addr));
2402 } else if (memtype == MachineType::Int32()) {
2403 vld1r(Neon32, NeonListOperand(liftoff::GetSimd128Register(dst)),
2404 NeonMemOperand(actual_src_addr));
2405 } else if (memtype == MachineType::Int64()) {
2406 vld1(Neon32, NeonListOperand(dst.low_fp()),
2407 NeonMemOperand(actual_src_addr));
2408 TurboAssembler::Move(dst.high_fp(), dst.low_fp());
2409 }
2410 }
2411 }
2412
LoadLane(LiftoffRegister dst,LiftoffRegister src,Register addr,Register offset_reg,uintptr_t offset_imm,LoadType type,uint8_t laneidx,uint32_t * protected_load_pc)2413 void LiftoffAssembler::LoadLane(LiftoffRegister dst, LiftoffRegister src,
2414 Register addr, Register offset_reg,
2415 uintptr_t offset_imm, LoadType type,
2416 uint8_t laneidx, uint32_t* protected_load_pc) {
2417 UseScratchRegisterScope temps(this);
2418 Register actual_src_addr = liftoff::CalculateActualAddress(
2419 this, &temps, addr, offset_reg, offset_imm);
2420 TurboAssembler::Move(liftoff::GetSimd128Register(dst),
2421 liftoff::GetSimd128Register(src));
2422 *protected_load_pc = pc_offset();
2423 LoadStoreLaneParams load_params(type.mem_type().representation(), laneidx);
2424 NeonListOperand dst_op =
2425 NeonListOperand(load_params.low_op ? dst.low_fp() : dst.high_fp());
2426 TurboAssembler::LoadLane(load_params.sz, dst_op, load_params.laneidx,
2427 NeonMemOperand(actual_src_addr));
2428 }
2429
StoreLane(Register dst,Register offset,uintptr_t offset_imm,LiftoffRegister src,StoreType type,uint8_t laneidx,uint32_t * protected_store_pc)2430 void LiftoffAssembler::StoreLane(Register dst, Register offset,
2431 uintptr_t offset_imm, LiftoffRegister src,
2432 StoreType type, uint8_t laneidx,
2433 uint32_t* protected_store_pc) {
2434 UseScratchRegisterScope temps(this);
2435 Register actual_dst_addr =
2436 liftoff::CalculateActualAddress(this, &temps, dst, offset, offset_imm);
2437 *protected_store_pc = pc_offset();
2438
2439 LoadStoreLaneParams store_params(type.mem_rep(), laneidx);
2440 NeonListOperand src_op =
2441 NeonListOperand(store_params.low_op ? src.low_fp() : src.high_fp());
2442 TurboAssembler::StoreLane(store_params.sz, src_op, store_params.laneidx,
2443 NeonMemOperand(actual_dst_addr));
2444 }
2445
emit_i8x16_swizzle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2446 void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst,
2447 LiftoffRegister lhs,
2448 LiftoffRegister rhs) {
2449 UseScratchRegisterScope temps(this);
2450
2451 NeonListOperand table(liftoff::GetSimd128Register(lhs));
2452 if (dst == lhs) {
2453 // dst will be overwritten, so keep the table somewhere else.
2454 QwNeonRegister tbl = temps.AcquireQ();
2455 TurboAssembler::Move(tbl, liftoff::GetSimd128Register(lhs));
2456 table = NeonListOperand(tbl);
2457 }
2458
2459 vtbl(dst.low_fp(), table, rhs.low_fp());
2460 vtbl(dst.high_fp(), table, rhs.high_fp());
2461 }
2462
emit_f64x2_splat(LiftoffRegister dst,LiftoffRegister src)2463 void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst,
2464 LiftoffRegister src) {
2465 TurboAssembler::Move(dst.low_fp(), src.fp());
2466 TurboAssembler::Move(dst.high_fp(), src.fp());
2467 }
2468
emit_f64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)2469 void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst,
2470 LiftoffRegister lhs,
2471 uint8_t imm_lane_idx) {
2472 ExtractLane(dst.fp(), liftoff::GetSimd128Register(lhs), imm_lane_idx);
2473 }
2474
emit_f64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)2475 void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst,
2476 LiftoffRegister src1,
2477 LiftoffRegister src2,
2478 uint8_t imm_lane_idx) {
2479 ReplaceLane(liftoff::GetSimd128Register(dst),
2480 liftoff::GetSimd128Register(src1), src2.fp(), imm_lane_idx);
2481 }
2482
emit_f64x2_abs(LiftoffRegister dst,LiftoffRegister src)2483 void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
2484 LiftoffRegister src) {
2485 vabs(dst.low_fp(), src.low_fp());
2486 vabs(dst.high_fp(), src.high_fp());
2487 }
2488
emit_f64x2_neg(LiftoffRegister dst,LiftoffRegister src)2489 void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
2490 LiftoffRegister src) {
2491 vneg(dst.low_fp(), src.low_fp());
2492 vneg(dst.high_fp(), src.high_fp());
2493 }
2494
emit_f64x2_sqrt(LiftoffRegister dst,LiftoffRegister src)2495 void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
2496 LiftoffRegister src) {
2497 vsqrt(dst.low_fp(), src.low_fp());
2498 vsqrt(dst.high_fp(), src.high_fp());
2499 }
2500
emit_f64x2_ceil(LiftoffRegister dst,LiftoffRegister src)2501 bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst,
2502 LiftoffRegister src) {
2503 if (!CpuFeatures::IsSupported(ARMv8)) {
2504 return false;
2505 }
2506
2507 CpuFeatureScope scope(this, ARMv8);
2508 vrintp(dst.low_fp(), src.low_fp());
2509 vrintp(dst.high_fp(), src.high_fp());
2510 return true;
2511 }
2512
emit_f64x2_floor(LiftoffRegister dst,LiftoffRegister src)2513 bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst,
2514 LiftoffRegister src) {
2515 if (!CpuFeatures::IsSupported(ARMv8)) {
2516 return false;
2517 }
2518
2519 CpuFeatureScope scope(this, ARMv8);
2520 vrintm(dst.low_fp(), src.low_fp());
2521 vrintm(dst.high_fp(), src.high_fp());
2522 return true;
2523 }
2524
emit_f64x2_trunc(LiftoffRegister dst,LiftoffRegister src)2525 bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst,
2526 LiftoffRegister src) {
2527 if (!CpuFeatures::IsSupported(ARMv8)) {
2528 return false;
2529 }
2530
2531 CpuFeatureScope scope(this, ARMv8);
2532 vrintz(dst.low_fp(), src.low_fp());
2533 vrintz(dst.high_fp(), src.high_fp());
2534 return true;
2535 }
2536
emit_f64x2_nearest_int(LiftoffRegister dst,LiftoffRegister src)2537 bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst,
2538 LiftoffRegister src) {
2539 if (!CpuFeatures::IsSupported(ARMv8)) {
2540 return false;
2541 }
2542
2543 CpuFeatureScope scope(this, ARMv8);
2544 vrintn(dst.low_fp(), src.low_fp());
2545 vrintn(dst.high_fp(), src.high_fp());
2546 return true;
2547 }
2548
emit_f64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2549 void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
2550 LiftoffRegister rhs) {
2551 vadd(dst.low_fp(), lhs.low_fp(), rhs.low_fp());
2552 vadd(dst.high_fp(), lhs.high_fp(), rhs.high_fp());
2553 }
2554
emit_f64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2555 void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
2556 LiftoffRegister rhs) {
2557 vsub(dst.low_fp(), lhs.low_fp(), rhs.low_fp());
2558 vsub(dst.high_fp(), lhs.high_fp(), rhs.high_fp());
2559 }
2560
emit_f64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2561 void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
2562 LiftoffRegister rhs) {
2563 vmul(dst.low_fp(), lhs.low_fp(), rhs.low_fp());
2564 vmul(dst.high_fp(), lhs.high_fp(), rhs.high_fp());
2565 }
2566
emit_f64x2_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2567 void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs,
2568 LiftoffRegister rhs) {
2569 vdiv(dst.low_fp(), lhs.low_fp(), rhs.low_fp());
2570 vdiv(dst.high_fp(), lhs.high_fp(), rhs.high_fp());
2571 }
2572
emit_f64x2_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2573 void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
2574 LiftoffRegister rhs) {
2575 Simd128Register dest = liftoff::GetSimd128Register(dst);
2576 Simd128Register left = liftoff::GetSimd128Register(lhs);
2577 Simd128Register right = liftoff::GetSimd128Register(rhs);
2578
2579 liftoff::EmitFloatMinOrMax(this, dest.low(), left.low(), right.low(),
2580 liftoff::MinOrMax::kMin);
2581 liftoff::EmitFloatMinOrMax(this, dest.high(), left.high(), right.high(),
2582 liftoff::MinOrMax::kMin);
2583 }
2584
emit_f64x2_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2585 void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
2586 LiftoffRegister rhs) {
2587 Simd128Register dest = liftoff::GetSimd128Register(dst);
2588 Simd128Register left = liftoff::GetSimd128Register(lhs);
2589 Simd128Register right = liftoff::GetSimd128Register(rhs);
2590
2591 liftoff::EmitFloatMinOrMax(this, dest.low(), left.low(), right.low(),
2592 liftoff::MinOrMax::kMax);
2593 liftoff::EmitFloatMinOrMax(this, dest.high(), left.high(), right.high(),
2594 liftoff::MinOrMax::kMax);
2595 }
2596
emit_f64x2_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2597 void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
2598 LiftoffRegister rhs) {
2599 QwNeonRegister dest = liftoff::GetSimd128Register(dst);
2600 QwNeonRegister left = liftoff::GetSimd128Register(lhs);
2601 QwNeonRegister right = liftoff::GetSimd128Register(rhs);
2602
2603 if (dst != rhs) {
2604 vmov(dest, left);
2605 }
2606
2607 VFPCompareAndSetFlags(right.low(), left.low());
2608 vmov(dest.low(), right.low(), mi);
2609 VFPCompareAndSetFlags(right.high(), left.high());
2610 vmov(dest.high(), right.high(), mi);
2611 }
2612
emit_f64x2_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2613 void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs,
2614 LiftoffRegister rhs) {
2615 QwNeonRegister dest = liftoff::GetSimd128Register(dst);
2616 QwNeonRegister left = liftoff::GetSimd128Register(lhs);
2617 QwNeonRegister right = liftoff::GetSimd128Register(rhs);
2618
2619 if (dst != rhs) {
2620 vmov(dest, left);
2621 }
2622
2623 VFPCompareAndSetFlags(right.low(), left.low());
2624 vmov(dest.low(), right.low(), gt);
2625 VFPCompareAndSetFlags(right.high(), left.high());
2626 vmov(dest.high(), right.high(), gt);
2627 }
2628
emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src)2629 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_s(LiftoffRegister dst,
2630 LiftoffRegister src) {
2631 F64x2ConvertLowI32x4S(liftoff::GetSimd128Register(dst),
2632 liftoff::GetSimd128Register(src));
2633 }
2634
emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src)2635 void LiftoffAssembler::emit_f64x2_convert_low_i32x4_u(LiftoffRegister dst,
2636 LiftoffRegister src) {
2637 F64x2ConvertLowI32x4U(liftoff::GetSimd128Register(dst),
2638 liftoff::GetSimd128Register(src));
2639 }
2640
emit_f64x2_promote_low_f32x4(LiftoffRegister dst,LiftoffRegister src)2641 void LiftoffAssembler::emit_f64x2_promote_low_f32x4(LiftoffRegister dst,
2642 LiftoffRegister src) {
2643 F64x2PromoteLowF32x4(liftoff::GetSimd128Register(dst),
2644 liftoff::GetSimd128Register(src));
2645 }
2646
emit_f32x4_splat(LiftoffRegister dst,LiftoffRegister src)2647 void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst,
2648 LiftoffRegister src) {
2649 vdup(Neon32, liftoff::GetSimd128Register(dst), src.fp(), 0);
2650 }
2651
emit_f32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)2652 void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst,
2653 LiftoffRegister lhs,
2654 uint8_t imm_lane_idx) {
2655 ExtractLane(liftoff::GetFloatRegister(dst.fp()),
2656 liftoff::GetSimd128Register(lhs), imm_lane_idx);
2657 }
2658
emit_f32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)2659 void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst,
2660 LiftoffRegister src1,
2661 LiftoffRegister src2,
2662 uint8_t imm_lane_idx) {
2663 ReplaceLane(liftoff::GetSimd128Register(dst),
2664 liftoff::GetSimd128Register(src1),
2665 liftoff::GetFloatRegister(src2.fp()), imm_lane_idx);
2666 }
2667
emit_f32x4_abs(LiftoffRegister dst,LiftoffRegister src)2668 void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
2669 LiftoffRegister src) {
2670 vabs(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
2671 }
2672
emit_f32x4_neg(LiftoffRegister dst,LiftoffRegister src)2673 void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
2674 LiftoffRegister src) {
2675 vneg(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
2676 }
2677
emit_f32x4_sqrt(LiftoffRegister dst,LiftoffRegister src)2678 void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
2679 LiftoffRegister src) {
2680 // The list of d registers available to us is from d0 to d15, which always
2681 // maps to 2 s registers.
2682 LowDwVfpRegister dst_low = LowDwVfpRegister::from_code(dst.low_fp().code());
2683 LowDwVfpRegister src_low = LowDwVfpRegister::from_code(src.low_fp().code());
2684
2685 LowDwVfpRegister dst_high = LowDwVfpRegister::from_code(dst.high_fp().code());
2686 LowDwVfpRegister src_high = LowDwVfpRegister::from_code(src.high_fp().code());
2687
2688 vsqrt(dst_low.low(), src_low.low());
2689 vsqrt(dst_low.high(), src_low.high());
2690 vsqrt(dst_high.low(), src_high.low());
2691 vsqrt(dst_high.high(), src_high.high());
2692 }
2693
emit_f32x4_ceil(LiftoffRegister dst,LiftoffRegister src)2694 bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst,
2695 LiftoffRegister src) {
2696 if (!CpuFeatures::IsSupported(ARMv8)) {
2697 return false;
2698 }
2699
2700 CpuFeatureScope scope(this, ARMv8);
2701 vrintp(NeonS32, liftoff::GetSimd128Register(dst),
2702 liftoff::GetSimd128Register(src));
2703 return true;
2704 }
2705
emit_f32x4_floor(LiftoffRegister dst,LiftoffRegister src)2706 bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst,
2707 LiftoffRegister src) {
2708 if (!CpuFeatures::IsSupported(ARMv8)) {
2709 return false;
2710 }
2711
2712 CpuFeatureScope scope(this, ARMv8);
2713 vrintm(NeonS32, liftoff::GetSimd128Register(dst),
2714 liftoff::GetSimd128Register(src));
2715 return true;
2716 }
2717
emit_f32x4_trunc(LiftoffRegister dst,LiftoffRegister src)2718 bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst,
2719 LiftoffRegister src) {
2720 if (!CpuFeatures::IsSupported(ARMv8)) {
2721 return false;
2722 }
2723
2724 CpuFeatureScope scope(this, ARMv8);
2725 vrintz(NeonS32, liftoff::GetSimd128Register(dst),
2726 liftoff::GetSimd128Register(src));
2727 return true;
2728 }
2729
emit_f32x4_nearest_int(LiftoffRegister dst,LiftoffRegister src)2730 bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst,
2731 LiftoffRegister src) {
2732 if (!CpuFeatures::IsSupported(ARMv8)) {
2733 return false;
2734 }
2735
2736 CpuFeatureScope scope(this, ARMv8);
2737 vrintn(NeonS32, liftoff::GetSimd128Register(dst),
2738 liftoff::GetSimd128Register(src));
2739 return true;
2740 }
2741
emit_f32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2742 void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
2743 LiftoffRegister rhs) {
2744 vadd(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
2745 liftoff::GetSimd128Register(rhs));
2746 }
2747
emit_f32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2748 void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
2749 LiftoffRegister rhs) {
2750 vsub(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
2751 liftoff::GetSimd128Register(rhs));
2752 }
2753
emit_f32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2754 void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
2755 LiftoffRegister rhs) {
2756 vmul(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
2757 liftoff::GetSimd128Register(rhs));
2758 }
2759
emit_f32x4_div(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2760 void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs,
2761 LiftoffRegister rhs) {
2762 // The list of d registers available to us is from d0 to d15, which always
2763 // maps to 2 s registers.
2764 LowDwVfpRegister dst_low = LowDwVfpRegister::from_code(dst.low_fp().code());
2765 LowDwVfpRegister lhs_low = LowDwVfpRegister::from_code(lhs.low_fp().code());
2766 LowDwVfpRegister rhs_low = LowDwVfpRegister::from_code(rhs.low_fp().code());
2767
2768 LowDwVfpRegister dst_high = LowDwVfpRegister::from_code(dst.high_fp().code());
2769 LowDwVfpRegister lhs_high = LowDwVfpRegister::from_code(lhs.high_fp().code());
2770 LowDwVfpRegister rhs_high = LowDwVfpRegister::from_code(rhs.high_fp().code());
2771
2772 vdiv(dst_low.low(), lhs_low.low(), rhs_low.low());
2773 vdiv(dst_low.high(), lhs_low.high(), rhs_low.high());
2774 vdiv(dst_high.low(), lhs_high.low(), rhs_high.low());
2775 vdiv(dst_high.high(), lhs_high.high(), rhs_high.high());
2776 }
2777
emit_f32x4_min(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2778 void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
2779 LiftoffRegister rhs) {
2780 vmin(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
2781 liftoff::GetSimd128Register(rhs));
2782 }
2783
emit_f32x4_max(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2784 void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
2785 LiftoffRegister rhs) {
2786 vmax(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
2787 liftoff::GetSimd128Register(rhs));
2788 }
2789
emit_f32x4_pmin(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2790 void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
2791 LiftoffRegister rhs) {
2792 UseScratchRegisterScope temps(this);
2793
2794 QwNeonRegister tmp = liftoff::GetSimd128Register(dst);
2795 if (dst == lhs || dst == rhs) {
2796 tmp = temps.AcquireQ();
2797 }
2798
2799 QwNeonRegister left = liftoff::GetSimd128Register(lhs);
2800 QwNeonRegister right = liftoff::GetSimd128Register(rhs);
2801 vcgt(tmp, left, right);
2802 vbsl(tmp, right, left);
2803
2804 if (dst == lhs || dst == rhs) {
2805 vmov(liftoff::GetSimd128Register(dst), tmp);
2806 }
2807 }
2808
emit_f32x4_pmax(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2809 void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,
2810 LiftoffRegister rhs) {
2811 UseScratchRegisterScope temps(this);
2812
2813 QwNeonRegister tmp = liftoff::GetSimd128Register(dst);
2814 if (dst == lhs || dst == rhs) {
2815 tmp = temps.AcquireQ();
2816 }
2817
2818 QwNeonRegister left = liftoff::GetSimd128Register(lhs);
2819 QwNeonRegister right = liftoff::GetSimd128Register(rhs);
2820 vcgt(tmp, right, left);
2821 vbsl(tmp, right, left);
2822
2823 if (dst == lhs || dst == rhs) {
2824 vmov(liftoff::GetSimd128Register(dst), tmp);
2825 }
2826 }
2827
emit_i64x2_splat(LiftoffRegister dst,LiftoffRegister src)2828 void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst,
2829 LiftoffRegister src) {
2830 Simd128Register dst_simd = liftoff::GetSimd128Register(dst);
2831 vdup(Neon32, dst_simd, src.low_gp());
2832 ReplaceLane(dst_simd, dst_simd, src.high_gp(), NeonS32, 1);
2833 ReplaceLane(dst_simd, dst_simd, src.high_gp(), NeonS32, 3);
2834 }
2835
emit_i64x2_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)2836 void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst,
2837 LiftoffRegister lhs,
2838 uint8_t imm_lane_idx) {
2839 ExtractLane(dst.low_gp(), liftoff::GetSimd128Register(lhs), NeonS32,
2840 imm_lane_idx * 2);
2841 ExtractLane(dst.high_gp(), liftoff::GetSimd128Register(lhs), NeonS32,
2842 imm_lane_idx * 2 + 1);
2843 }
2844
emit_i64x2_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)2845 void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst,
2846 LiftoffRegister src1,
2847 LiftoffRegister src2,
2848 uint8_t imm_lane_idx) {
2849 Simd128Register dst_simd = liftoff::GetSimd128Register(dst);
2850 Simd128Register src1_simd = liftoff::GetSimd128Register(src1);
2851 ReplaceLane(dst_simd, src1_simd, src2.low_gp(), NeonS32, imm_lane_idx * 2);
2852 ReplaceLane(dst_simd, dst_simd, src2.high_gp(), NeonS32,
2853 imm_lane_idx * 2 + 1);
2854 }
2855
emit_i64x2_neg(LiftoffRegister dst,LiftoffRegister src)2856 void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
2857 LiftoffRegister src) {
2858 UseScratchRegisterScope temps(this);
2859 QwNeonRegister zero =
2860 dst == src ? temps.AcquireQ() : liftoff::GetSimd128Register(dst);
2861 vmov(zero, uint64_t{0});
2862 vsub(Neon64, liftoff::GetSimd128Register(dst), zero,
2863 liftoff::GetSimd128Register(src));
2864 }
2865
emit_i64x2_alltrue(LiftoffRegister dst,LiftoffRegister src)2866 void LiftoffAssembler::emit_i64x2_alltrue(LiftoffRegister dst,
2867 LiftoffRegister src) {
2868 I64x2AllTrue(dst.gp(), liftoff::GetSimd128Register(src));
2869 }
2870
emit_i64x2_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2871 void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
2872 LiftoffRegister rhs) {
2873 liftoff::EmitSimdShift<liftoff::kLeft, NeonS64, Neon32>(this, dst, lhs, rhs);
2874 }
2875
emit_i64x2_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2876 void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs,
2877 int32_t rhs) {
2878 vshl(NeonS64, liftoff::GetSimd128Register(dst),
2879 liftoff::GetSimd128Register(lhs), rhs & 63);
2880 }
2881
emit_i64x2_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2882 void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst,
2883 LiftoffRegister lhs,
2884 LiftoffRegister rhs) {
2885 liftoff::EmitSimdShift<liftoff::kRight, NeonS64, Neon32>(this, dst, lhs, rhs);
2886 }
2887
emit_i64x2_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2888 void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst,
2889 LiftoffRegister lhs, int32_t rhs) {
2890 liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS64>(this, dst, lhs,
2891 rhs);
2892 }
2893
emit_i64x2_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2894 void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst,
2895 LiftoffRegister lhs,
2896 LiftoffRegister rhs) {
2897 liftoff::EmitSimdShift<liftoff::kRight, NeonU64, Neon32>(this, dst, lhs, rhs);
2898 }
2899
emit_i64x2_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)2900 void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst,
2901 LiftoffRegister lhs, int32_t rhs) {
2902 liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU64>(this, dst, lhs,
2903 rhs);
2904 }
2905
emit_i64x2_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2906 void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs,
2907 LiftoffRegister rhs) {
2908 vadd(Neon64, liftoff::GetSimd128Register(dst),
2909 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
2910 }
2911
emit_i64x2_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2912 void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs,
2913 LiftoffRegister rhs) {
2914 vsub(Neon64, liftoff::GetSimd128Register(dst),
2915 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
2916 }
2917
emit_i64x2_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)2918 void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
2919 LiftoffRegister rhs) {
2920 UseScratchRegisterScope temps(this);
2921
2922 QwNeonRegister dst_neon = liftoff::GetSimd128Register(dst);
2923 QwNeonRegister left = liftoff::GetSimd128Register(lhs);
2924 QwNeonRegister right = liftoff::GetSimd128Register(rhs);
2925
2926 // These temporary registers will be modified. We can directly modify lhs and
2927 // rhs if they are not uesd, saving on temporaries.
2928 QwNeonRegister tmp1 = left;
2929 QwNeonRegister tmp2 = right;
2930
2931 LiftoffRegList used_plus_dst =
2932 cache_state()->used_registers | LiftoffRegList{dst};
2933
2934 if (used_plus_dst.has(lhs) && used_plus_dst.has(rhs)) {
2935 tmp1 = temps.AcquireQ();
2936 // We only have 1 scratch Q register, so acquire another ourselves.
2937 LiftoffRegList pinned = {dst};
2938 LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
2939 tmp2 = liftoff::GetSimd128Register(unused_pair);
2940 } else if (used_plus_dst.has(lhs)) {
2941 tmp1 = temps.AcquireQ();
2942 } else if (used_plus_dst.has(rhs)) {
2943 tmp2 = temps.AcquireQ();
2944 }
2945
2946 // Algorithm from code-generator-arm.cc, refer to comments there for details.
2947 if (tmp1 != left) {
2948 vmov(tmp1, left);
2949 }
2950 if (tmp2 != right) {
2951 vmov(tmp2, right);
2952 }
2953
2954 vtrn(Neon32, tmp1.low(), tmp1.high());
2955 vtrn(Neon32, tmp2.low(), tmp2.high());
2956
2957 vmull(NeonU32, dst_neon, tmp1.low(), tmp2.high());
2958 vmlal(NeonU32, dst_neon, tmp1.high(), tmp2.low());
2959 vshl(NeonU64, dst_neon, dst_neon, 32);
2960
2961 vmlal(NeonU32, dst_neon, tmp1.low(), tmp2.low());
2962 }
2963
emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2964 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
2965 LiftoffRegister src1,
2966 LiftoffRegister src2) {
2967 vmull(NeonS32, liftoff::GetSimd128Register(dst), src1.low_fp(),
2968 src2.low_fp());
2969 }
2970
emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2971 void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
2972 LiftoffRegister src1,
2973 LiftoffRegister src2) {
2974 vmull(NeonU32, liftoff::GetSimd128Register(dst), src1.low_fp(),
2975 src2.low_fp());
2976 }
2977
emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2978 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
2979 LiftoffRegister src1,
2980 LiftoffRegister src2) {
2981 vmull(NeonS32, liftoff::GetSimd128Register(dst), src1.high_fp(),
2982 src2.high_fp());
2983 }
2984
emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)2985 void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
2986 LiftoffRegister src1,
2987 LiftoffRegister src2) {
2988 vmull(NeonU32, liftoff::GetSimd128Register(dst), src1.high_fp(),
2989 src2.high_fp());
2990 }
2991
emit_i64x2_bitmask(LiftoffRegister dst,LiftoffRegister src)2992 void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
2993 LiftoffRegister src) {
2994 I64x2BitMask(dst.gp(), liftoff::GetSimd128Register(src));
2995 }
2996
emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)2997 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
2998 LiftoffRegister src) {
2999 vmovl(NeonS32, liftoff::GetSimd128Register(dst), src.low_fp());
3000 }
3001
emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)3002 void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
3003 LiftoffRegister src) {
3004 vmovl(NeonS32, liftoff::GetSimd128Register(dst), src.high_fp());
3005 }
3006
emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,LiftoffRegister src)3007 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
3008 LiftoffRegister src) {
3009 vmovl(NeonU32, liftoff::GetSimd128Register(dst), src.low_fp());
3010 }
3011
emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,LiftoffRegister src)3012 void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
3013 LiftoffRegister src) {
3014 vmovl(NeonU32, liftoff::GetSimd128Register(dst), src.high_fp());
3015 }
3016
emit_i32x4_splat(LiftoffRegister dst,LiftoffRegister src)3017 void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
3018 LiftoffRegister src) {
3019 vdup(Neon32, liftoff::GetSimd128Register(dst), src.gp());
3020 }
3021
emit_i32x4_extract_lane(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3022 void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst,
3023 LiftoffRegister lhs,
3024 uint8_t imm_lane_idx) {
3025 ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS32,
3026 imm_lane_idx);
3027 }
3028
emit_i32x4_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3029 void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst,
3030 LiftoffRegister src1,
3031 LiftoffRegister src2,
3032 uint8_t imm_lane_idx) {
3033 ReplaceLane(liftoff::GetSimd128Register(dst),
3034 liftoff::GetSimd128Register(src1), src2.gp(), NeonS32,
3035 imm_lane_idx);
3036 }
3037
emit_i32x4_neg(LiftoffRegister dst,LiftoffRegister src)3038 void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
3039 LiftoffRegister src) {
3040 vneg(Neon32, liftoff::GetSimd128Register(dst),
3041 liftoff::GetSimd128Register(src));
3042 }
3043
emit_i32x4_alltrue(LiftoffRegister dst,LiftoffRegister src)3044 void LiftoffAssembler::emit_i32x4_alltrue(LiftoffRegister dst,
3045 LiftoffRegister src) {
3046 UseScratchRegisterScope temps(this);
3047 DwVfpRegister scratch = temps.AcquireD();
3048 vpmin(NeonU32, scratch, src.low_fp(), src.high_fp());
3049 vpmin(NeonU32, scratch, scratch, scratch);
3050 ExtractLane(dst.gp(), scratch, NeonS32, 0);
3051 cmp(dst.gp(), Operand(0));
3052 mov(dst.gp(), Operand(1), LeaveCC, ne);
3053 }
3054
emit_i32x4_bitmask(LiftoffRegister dst,LiftoffRegister src)3055 void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst,
3056 LiftoffRegister src) {
3057 UseScratchRegisterScope temps(this);
3058 Simd128Register tmp = liftoff::GetSimd128Register(src);
3059 Simd128Register mask = temps.AcquireQ();
3060
3061 if (cache_state()->is_used(src)) {
3062 // We only have 1 scratch Q register, so try and reuse src.
3063 LiftoffRegList pinned = {src};
3064 LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
3065 mask = liftoff::GetSimd128Register(unused_pair);
3066 }
3067
3068 vshr(NeonS32, tmp, liftoff::GetSimd128Register(src), 31);
3069 // Set i-th bit of each lane i. When AND with tmp, the lanes that
3070 // are signed will have i-th bit set, unsigned will be 0.
3071 vmov(mask.low(), base::Double((uint64_t)0x0000'0002'0000'0001));
3072 vmov(mask.high(), base::Double((uint64_t)0x0000'0008'0000'0004));
3073 vand(tmp, mask, tmp);
3074 vpadd(Neon32, tmp.low(), tmp.low(), tmp.high());
3075 vpadd(Neon32, tmp.low(), tmp.low(), kDoubleRegZero);
3076 VmovLow(dst.gp(), tmp.low());
3077 }
3078
emit_i32x4_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3079 void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs,
3080 LiftoffRegister rhs) {
3081 liftoff::EmitSimdShift<liftoff::kLeft, NeonS32, Neon32>(this, dst, lhs, rhs);
3082 }
3083
emit_i32x4_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3084 void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs,
3085 int32_t rhs) {
3086 vshl(NeonS32, liftoff::GetSimd128Register(dst),
3087 liftoff::GetSimd128Register(lhs), rhs & 31);
3088 }
3089
emit_i32x4_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3090 void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst,
3091 LiftoffRegister lhs,
3092 LiftoffRegister rhs) {
3093 liftoff::EmitSimdShift<liftoff::kRight, NeonS32, Neon32>(this, dst, lhs, rhs);
3094 }
3095
emit_i32x4_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3096 void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst,
3097 LiftoffRegister lhs, int32_t rhs) {
3098 liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS32>(this, dst, lhs,
3099 rhs);
3100 }
3101
emit_i32x4_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3102 void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst,
3103 LiftoffRegister lhs,
3104 LiftoffRegister rhs) {
3105 liftoff::EmitSimdShift<liftoff::kRight, NeonU32, Neon32>(this, dst, lhs, rhs);
3106 }
3107
emit_i32x4_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3108 void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst,
3109 LiftoffRegister lhs, int32_t rhs) {
3110 liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU32>(this, dst, lhs,
3111 rhs);
3112 }
3113
emit_i32x4_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3114 void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs,
3115 LiftoffRegister rhs) {
3116 vadd(Neon32, liftoff::GetSimd128Register(dst),
3117 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3118 }
3119
emit_i32x4_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3120 void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs,
3121 LiftoffRegister rhs) {
3122 vsub(Neon32, liftoff::GetSimd128Register(dst),
3123 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3124 }
3125
emit_i32x4_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3126 void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs,
3127 LiftoffRegister rhs) {
3128 vmul(Neon32, liftoff::GetSimd128Register(dst),
3129 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3130 }
3131
emit_i32x4_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3132 void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst,
3133 LiftoffRegister lhs,
3134 LiftoffRegister rhs) {
3135 vmin(NeonS32, liftoff::GetSimd128Register(dst),
3136 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3137 }
3138
emit_i32x4_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3139 void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst,
3140 LiftoffRegister lhs,
3141 LiftoffRegister rhs) {
3142 vmin(NeonU32, liftoff::GetSimd128Register(dst),
3143 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3144 }
3145
emit_i32x4_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3146 void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst,
3147 LiftoffRegister lhs,
3148 LiftoffRegister rhs) {
3149 vmax(NeonS32, liftoff::GetSimd128Register(dst),
3150 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3151 }
3152
emit_i32x4_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3153 void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst,
3154 LiftoffRegister lhs,
3155 LiftoffRegister rhs) {
3156 vmax(NeonU32, liftoff::GetSimd128Register(dst),
3157 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3158 }
3159
emit_i32x4_dot_i16x8_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3160 void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
3161 LiftoffRegister lhs,
3162 LiftoffRegister rhs) {
3163 QwNeonRegister dest = liftoff::GetSimd128Register(dst);
3164 QwNeonRegister left = liftoff::GetSimd128Register(lhs);
3165 QwNeonRegister right = liftoff::GetSimd128Register(rhs);
3166
3167 UseScratchRegisterScope temps(this);
3168 Simd128Register scratch = temps.AcquireQ();
3169
3170 vmull(NeonS16, scratch, left.low(), right.low());
3171 vpadd(Neon32, dest.low(), scratch.low(), scratch.high());
3172
3173 vmull(NeonS16, scratch, left.high(), right.high());
3174 vpadd(Neon32, dest.high(), scratch.low(), scratch.high());
3175 }
3176
emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,LiftoffRegister src)3177 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
3178 LiftoffRegister src) {
3179 vpaddl(NeonS16, liftoff::GetSimd128Register(dst),
3180 liftoff::GetSimd128Register(src));
3181 }
3182
emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,LiftoffRegister src)3183 void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
3184 LiftoffRegister src) {
3185 vpaddl(NeonU16, liftoff::GetSimd128Register(dst),
3186 liftoff::GetSimd128Register(src));
3187 }
3188
emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3189 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
3190 LiftoffRegister src1,
3191 LiftoffRegister src2) {
3192 vmull(NeonS16, liftoff::GetSimd128Register(dst), src1.low_fp(),
3193 src2.low_fp());
3194 }
3195
emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3196 void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
3197 LiftoffRegister src1,
3198 LiftoffRegister src2) {
3199 vmull(NeonU16, liftoff::GetSimd128Register(dst), src1.low_fp(),
3200 src2.low_fp());
3201 }
3202
emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3203 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
3204 LiftoffRegister src1,
3205 LiftoffRegister src2) {
3206 vmull(NeonS16, liftoff::GetSimd128Register(dst), src1.high_fp(),
3207 src2.high_fp());
3208 }
3209
emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3210 void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
3211 LiftoffRegister src1,
3212 LiftoffRegister src2) {
3213 vmull(NeonU16, liftoff::GetSimd128Register(dst), src1.high_fp(),
3214 src2.high_fp());
3215 }
3216
emit_i16x8_splat(LiftoffRegister dst,LiftoffRegister src)3217 void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
3218 LiftoffRegister src) {
3219 vdup(Neon16, liftoff::GetSimd128Register(dst), src.gp());
3220 }
3221
emit_i16x8_neg(LiftoffRegister dst,LiftoffRegister src)3222 void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst,
3223 LiftoffRegister src) {
3224 vneg(Neon16, liftoff::GetSimd128Register(dst),
3225 liftoff::GetSimd128Register(src));
3226 }
3227
emit_i16x8_alltrue(LiftoffRegister dst,LiftoffRegister src)3228 void LiftoffAssembler::emit_i16x8_alltrue(LiftoffRegister dst,
3229 LiftoffRegister src) {
3230 UseScratchRegisterScope temps(this);
3231 DwVfpRegister scratch = temps.AcquireD();
3232 vpmin(NeonU16, scratch, src.low_fp(), src.high_fp());
3233 vpmin(NeonU16, scratch, scratch, scratch);
3234 vpmin(NeonU16, scratch, scratch, scratch);
3235 ExtractLane(dst.gp(), scratch, NeonS16, 0);
3236 cmp(dst.gp(), Operand(0));
3237 mov(dst.gp(), Operand(1), LeaveCC, ne);
3238 }
3239
emit_i16x8_bitmask(LiftoffRegister dst,LiftoffRegister src)3240 void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
3241 LiftoffRegister src) {
3242 UseScratchRegisterScope temps(this);
3243 Simd128Register tmp = liftoff::GetSimd128Register(src);
3244 Simd128Register mask = temps.AcquireQ();
3245
3246 if (cache_state()->is_used(src)) {
3247 // We only have 1 scratch Q register, so try and reuse src.
3248 LiftoffRegList pinned = {src};
3249 LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
3250 mask = liftoff::GetSimd128Register(unused_pair);
3251 }
3252
3253 vshr(NeonS16, tmp, liftoff::GetSimd128Register(src), 15);
3254 // Set i-th bit of each lane i. When AND with tmp, the lanes that
3255 // are signed will have i-th bit set, unsigned will be 0.
3256 vmov(mask.low(), base::Double((uint64_t)0x0008'0004'0002'0001));
3257 vmov(mask.high(), base::Double((uint64_t)0x0080'0040'0020'0010));
3258 vand(tmp, mask, tmp);
3259 vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
3260 vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
3261 vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
3262 vmov(NeonU16, dst.gp(), tmp.low(), 0);
3263 }
3264
emit_i16x8_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3265 void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs,
3266 LiftoffRegister rhs) {
3267 liftoff::EmitSimdShift<liftoff::kLeft, NeonS16, Neon16>(this, dst, lhs, rhs);
3268 }
3269
emit_i16x8_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3270 void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs,
3271 int32_t rhs) {
3272 vshl(NeonS16, liftoff::GetSimd128Register(dst),
3273 liftoff::GetSimd128Register(lhs), rhs & 15);
3274 }
3275
emit_i16x8_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3276 void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst,
3277 LiftoffRegister lhs,
3278 LiftoffRegister rhs) {
3279 liftoff::EmitSimdShift<liftoff::kRight, NeonS16, Neon16>(this, dst, lhs, rhs);
3280 }
3281
emit_i16x8_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3282 void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst,
3283 LiftoffRegister lhs, int32_t rhs) {
3284 liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS16>(this, dst, lhs,
3285 rhs);
3286 }
3287
emit_i16x8_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3288 void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst,
3289 LiftoffRegister lhs,
3290 LiftoffRegister rhs) {
3291 liftoff::EmitSimdShift<liftoff::kRight, NeonU16, Neon16>(this, dst, lhs, rhs);
3292 }
3293
emit_i16x8_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3294 void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst,
3295 LiftoffRegister lhs, int32_t rhs) {
3296 liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU16>(this, dst, lhs,
3297 rhs);
3298 }
3299
emit_i16x8_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3300 void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs,
3301 LiftoffRegister rhs) {
3302 vadd(Neon16, liftoff::GetSimd128Register(dst),
3303 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3304 }
3305
emit_i16x8_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3306 void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst,
3307 LiftoffRegister lhs,
3308 LiftoffRegister rhs) {
3309 vqadd(NeonS16, liftoff::GetSimd128Register(dst),
3310 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3311 }
3312
emit_i16x8_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3313 void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs,
3314 LiftoffRegister rhs) {
3315 vsub(Neon16, liftoff::GetSimd128Register(dst),
3316 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3317 }
3318
emit_i16x8_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3319 void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst,
3320 LiftoffRegister lhs,
3321 LiftoffRegister rhs) {
3322 vqsub(NeonS16, liftoff::GetSimd128Register(dst),
3323 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3324 }
3325
emit_i16x8_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3326 void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst,
3327 LiftoffRegister lhs,
3328 LiftoffRegister rhs) {
3329 vqsub(NeonU16, liftoff::GetSimd128Register(dst),
3330 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3331 }
3332
emit_i16x8_mul(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3333 void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs,
3334 LiftoffRegister rhs) {
3335 vmul(Neon16, liftoff::GetSimd128Register(dst),
3336 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3337 }
3338
emit_i16x8_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3339 void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst,
3340 LiftoffRegister lhs,
3341 LiftoffRegister rhs) {
3342 vqadd(NeonU16, liftoff::GetSimd128Register(dst),
3343 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3344 }
3345
emit_i16x8_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3346 void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst,
3347 LiftoffRegister lhs,
3348 LiftoffRegister rhs) {
3349 vmin(NeonS16, liftoff::GetSimd128Register(dst),
3350 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3351 }
3352
emit_i16x8_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3353 void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst,
3354 LiftoffRegister lhs,
3355 LiftoffRegister rhs) {
3356 vmin(NeonU16, liftoff::GetSimd128Register(dst),
3357 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3358 }
3359
emit_i16x8_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3360 void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst,
3361 LiftoffRegister lhs,
3362 LiftoffRegister rhs) {
3363 vmax(NeonS16, liftoff::GetSimd128Register(dst),
3364 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3365 }
3366
emit_i16x8_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3367 void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
3368 LiftoffRegister lhs,
3369 LiftoffRegister rhs) {
3370 vmax(NeonU16, liftoff::GetSimd128Register(dst),
3371 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3372 }
3373
emit_i16x8_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3374 void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst,
3375 LiftoffRegister lhs,
3376 uint8_t imm_lane_idx) {
3377 ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonU16,
3378 imm_lane_idx);
3379 }
3380
emit_i16x8_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3381 void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst,
3382 LiftoffRegister lhs,
3383 uint8_t imm_lane_idx) {
3384 ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS16,
3385 imm_lane_idx);
3386 }
3387
emit_i16x8_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3388 void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
3389 LiftoffRegister src1,
3390 LiftoffRegister src2,
3391 uint8_t imm_lane_idx) {
3392 ReplaceLane(liftoff::GetSimd128Register(dst),
3393 liftoff::GetSimd128Register(src1), src2.gp(), NeonS16,
3394 imm_lane_idx);
3395 }
3396
emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,LiftoffRegister src)3397 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
3398 LiftoffRegister src) {
3399 vpaddl(NeonS8, liftoff::GetSimd128Register(dst),
3400 liftoff::GetSimd128Register(src));
3401 }
3402
emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,LiftoffRegister src)3403 void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
3404 LiftoffRegister src) {
3405 vpaddl(NeonU8, liftoff::GetSimd128Register(dst),
3406 liftoff::GetSimd128Register(src));
3407 }
3408
emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3409 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
3410 LiftoffRegister src1,
3411 LiftoffRegister src2) {
3412 vmull(NeonS8, liftoff::GetSimd128Register(dst), src1.low_fp(), src2.low_fp());
3413 }
3414
emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3415 void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
3416 LiftoffRegister src1,
3417 LiftoffRegister src2) {
3418 vmull(NeonU8, liftoff::GetSimd128Register(dst), src1.low_fp(), src2.low_fp());
3419 }
3420
emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3421 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
3422 LiftoffRegister src1,
3423 LiftoffRegister src2) {
3424 vmull(NeonS8, liftoff::GetSimd128Register(dst), src1.high_fp(),
3425 src2.high_fp());
3426 }
3427
emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3428 void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
3429 LiftoffRegister src1,
3430 LiftoffRegister src2) {
3431 vmull(NeonU8, liftoff::GetSimd128Register(dst), src1.high_fp(),
3432 src2.high_fp());
3433 }
3434
emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2)3435 void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
3436 LiftoffRegister src1,
3437 LiftoffRegister src2) {
3438 vqrdmulh(NeonS16, liftoff::GetSimd128Register(dst),
3439 liftoff::GetSimd128Register(src1),
3440 liftoff::GetSimd128Register(src2));
3441 }
3442
emit_i8x16_shuffle(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs,const uint8_t shuffle[16],bool is_swizzle)3443 void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
3444 LiftoffRegister lhs,
3445 LiftoffRegister rhs,
3446 const uint8_t shuffle[16],
3447 bool is_swizzle) {
3448 Simd128Register dest = liftoff::GetSimd128Register(dst);
3449 Simd128Register src1 = liftoff::GetSimd128Register(lhs);
3450 Simd128Register src2 = liftoff::GetSimd128Register(rhs);
3451 UseScratchRegisterScope temps(this);
3452 Simd128Register scratch = temps.AcquireQ();
3453 if ((src1 != src2) && src1.code() + 1 != src2.code()) {
3454 // vtbl requires the operands to be consecutive or the same.
3455 // If they are the same, we build a smaller list operand (table_size = 2).
3456 // If they are not the same, and not consecutive, we move the src1 and src2
3457 // to q14 and q15, which will be unused since they are not allocatable in
3458 // Liftoff. If the operands are the same, then we build a smaller list
3459 // operand below.
3460 static_assert(!kLiftoffAssemblerFpCacheRegs.has(d28),
3461 "This only works if q14-q15 (d28-d31) are not used.");
3462 static_assert(!kLiftoffAssemblerFpCacheRegs.has(d29),
3463 "This only works if q14-q15 (d28-d31) are not used.");
3464 static_assert(!kLiftoffAssemblerFpCacheRegs.has(d30),
3465 "This only works if q14-q15 (d28-d31) are not used.");
3466 static_assert(!kLiftoffAssemblerFpCacheRegs.has(d31),
3467 "This only works if q14-q15 (d28-d31) are not used.");
3468 vmov(q14, src1);
3469 src1 = q14;
3470 vmov(q15, src2);
3471 src2 = q15;
3472 }
3473
3474 int table_size = src1 == src2 ? 2 : 4;
3475
3476 int scratch_s_base = scratch.code() * 4;
3477 for (int j = 0; j < 4; j++) {
3478 uint32_t imm = 0;
3479 for (int i = 3; i >= 0; i--) {
3480 imm = (imm << 8) | shuffle[j * 4 + i];
3481 }
3482 DCHECK_EQ(0, imm & (table_size == 2 ? 0xF0F0F0F0 : 0xE0E0E0E0));
3483 // Ensure indices are in [0,15] if table_size is 2, or [0,31] if 4.
3484 vmov(SwVfpRegister::from_code(scratch_s_base + j), Float32::FromBits(imm));
3485 }
3486
3487 DwVfpRegister table_base = src1.low();
3488 NeonListOperand table(table_base, table_size);
3489
3490 if (dest != src1 && dest != src2) {
3491 vtbl(dest.low(), table, scratch.low());
3492 vtbl(dest.high(), table, scratch.high());
3493 } else {
3494 vtbl(scratch.low(), table, scratch.low());
3495 vtbl(scratch.high(), table, scratch.high());
3496 vmov(dest, scratch);
3497 }
3498 }
3499
emit_i8x16_popcnt(LiftoffRegister dst,LiftoffRegister src)3500 void LiftoffAssembler::emit_i8x16_popcnt(LiftoffRegister dst,
3501 LiftoffRegister src) {
3502 vcnt(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
3503 }
3504
emit_i8x16_splat(LiftoffRegister dst,LiftoffRegister src)3505 void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,
3506 LiftoffRegister src) {
3507 vdup(Neon8, liftoff::GetSimd128Register(dst), src.gp());
3508 }
3509
emit_i8x16_extract_lane_u(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3510 void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst,
3511 LiftoffRegister lhs,
3512 uint8_t imm_lane_idx) {
3513 ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonU8, imm_lane_idx);
3514 }
3515
emit_i8x16_extract_lane_s(LiftoffRegister dst,LiftoffRegister lhs,uint8_t imm_lane_idx)3516 void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst,
3517 LiftoffRegister lhs,
3518 uint8_t imm_lane_idx) {
3519 ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS8, imm_lane_idx);
3520 }
3521
emit_i8x16_replace_lane(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,uint8_t imm_lane_idx)3522 void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst,
3523 LiftoffRegister src1,
3524 LiftoffRegister src2,
3525 uint8_t imm_lane_idx) {
3526 ReplaceLane(liftoff::GetSimd128Register(dst),
3527 liftoff::GetSimd128Register(src1), src2.gp(), NeonS8,
3528 imm_lane_idx);
3529 }
3530
emit_i8x16_neg(LiftoffRegister dst,LiftoffRegister src)3531 void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst,
3532 LiftoffRegister src) {
3533 vneg(Neon8, liftoff::GetSimd128Register(dst),
3534 liftoff::GetSimd128Register(src));
3535 }
3536
emit_v128_anytrue(LiftoffRegister dst,LiftoffRegister src)3537 void LiftoffAssembler::emit_v128_anytrue(LiftoffRegister dst,
3538 LiftoffRegister src) {
3539 liftoff::EmitAnyTrue(this, dst, src);
3540 }
3541
emit_i8x16_alltrue(LiftoffRegister dst,LiftoffRegister src)3542 void LiftoffAssembler::emit_i8x16_alltrue(LiftoffRegister dst,
3543 LiftoffRegister src) {
3544 UseScratchRegisterScope temps(this);
3545 DwVfpRegister scratch = temps.AcquireD();
3546 vpmin(NeonU8, scratch, src.low_fp(), src.high_fp());
3547 vpmin(NeonU8, scratch, scratch, scratch);
3548 vpmin(NeonU8, scratch, scratch, scratch);
3549 vpmin(NeonU8, scratch, scratch, scratch);
3550 ExtractLane(dst.gp(), scratch, NeonS8, 0);
3551 cmp(dst.gp(), Operand(0));
3552 mov(dst.gp(), Operand(1), LeaveCC, ne);
3553 }
3554
emit_i8x16_bitmask(LiftoffRegister dst,LiftoffRegister src)3555 void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst,
3556 LiftoffRegister src) {
3557 UseScratchRegisterScope temps(this);
3558 Simd128Register tmp = liftoff::GetSimd128Register(src);
3559 Simd128Register mask = temps.AcquireQ();
3560
3561 if (cache_state()->is_used(src)) {
3562 // We only have 1 scratch Q register, so try and reuse src.
3563 LiftoffRegList pinned = {src};
3564 LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned);
3565 mask = liftoff::GetSimd128Register(unused_pair);
3566 }
3567
3568 vshr(NeonS8, tmp, liftoff::GetSimd128Register(src), 7);
3569 // Set i-th bit of each lane i. When AND with tmp, the lanes that
3570 // are signed will have i-th bit set, unsigned will be 0.
3571 vmov(mask.low(), base::Double((uint64_t)0x8040'2010'0804'0201));
3572 vmov(mask.high(), base::Double((uint64_t)0x8040'2010'0804'0201));
3573 vand(tmp, mask, tmp);
3574 vext(mask, tmp, tmp, 8);
3575 vzip(Neon8, mask, tmp);
3576 vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
3577 vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
3578 vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
3579 vmov(NeonU16, dst.gp(), tmp.low(), 0);
3580 }
3581
emit_i8x16_shl(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3582 void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs,
3583 LiftoffRegister rhs) {
3584 liftoff::EmitSimdShift<liftoff::kLeft, NeonS8, Neon8>(this, dst, lhs, rhs);
3585 }
3586
emit_i8x16_shli(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3587 void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs,
3588 int32_t rhs) {
3589 vshl(NeonS8, liftoff::GetSimd128Register(dst),
3590 liftoff::GetSimd128Register(lhs), rhs & 7);
3591 }
3592
emit_i8x16_shr_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3593 void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst,
3594 LiftoffRegister lhs,
3595 LiftoffRegister rhs) {
3596 liftoff::EmitSimdShift<liftoff::kRight, NeonS8, Neon8>(this, dst, lhs, rhs);
3597 }
3598
emit_i8x16_shri_s(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3599 void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst,
3600 LiftoffRegister lhs, int32_t rhs) {
3601 liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonS8>(this, dst, lhs, rhs);
3602 }
3603
emit_i8x16_shr_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3604 void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst,
3605 LiftoffRegister lhs,
3606 LiftoffRegister rhs) {
3607 liftoff::EmitSimdShift<liftoff::kRight, NeonU8, Neon8>(this, dst, lhs, rhs);
3608 }
3609
emit_i8x16_shri_u(LiftoffRegister dst,LiftoffRegister lhs,int32_t rhs)3610 void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst,
3611 LiftoffRegister lhs, int32_t rhs) {
3612 liftoff::EmitSimdShiftImmediate<liftoff::kRight, NeonU8>(this, dst, lhs, rhs);
3613 }
3614
emit_i8x16_add(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3615 void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs,
3616 LiftoffRegister rhs) {
3617 vadd(Neon8, liftoff::GetSimd128Register(dst),
3618 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3619 }
3620
emit_i8x16_add_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3621 void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst,
3622 LiftoffRegister lhs,
3623 LiftoffRegister rhs) {
3624 vqadd(NeonS8, liftoff::GetSimd128Register(dst),
3625 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3626 }
3627
emit_i8x16_sub(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3628 void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs,
3629 LiftoffRegister rhs) {
3630 vsub(Neon8, liftoff::GetSimd128Register(dst),
3631 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3632 }
3633
emit_i8x16_sub_sat_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3634 void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst,
3635 LiftoffRegister lhs,
3636 LiftoffRegister rhs) {
3637 vqsub(NeonS8, liftoff::GetSimd128Register(dst),
3638 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3639 }
3640
emit_i8x16_sub_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3641 void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst,
3642 LiftoffRegister lhs,
3643 LiftoffRegister rhs) {
3644 vqsub(NeonU8, liftoff::GetSimd128Register(dst),
3645 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3646 }
3647
emit_i8x16_add_sat_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3648 void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst,
3649 LiftoffRegister lhs,
3650 LiftoffRegister rhs) {
3651 vqadd(NeonU8, liftoff::GetSimd128Register(dst),
3652 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3653 }
3654
emit_i8x16_min_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3655 void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst,
3656 LiftoffRegister lhs,
3657 LiftoffRegister rhs) {
3658 vmin(NeonS8, liftoff::GetSimd128Register(dst),
3659 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3660 }
3661
emit_i8x16_min_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3662 void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst,
3663 LiftoffRegister lhs,
3664 LiftoffRegister rhs) {
3665 vmin(NeonU8, liftoff::GetSimd128Register(dst),
3666 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3667 }
3668
emit_i8x16_max_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3669 void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst,
3670 LiftoffRegister lhs,
3671 LiftoffRegister rhs) {
3672 vmax(NeonS8, liftoff::GetSimd128Register(dst),
3673 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3674 }
3675
emit_i8x16_max_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3676 void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst,
3677 LiftoffRegister lhs,
3678 LiftoffRegister rhs) {
3679 vmax(NeonU8, liftoff::GetSimd128Register(dst),
3680 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3681 }
3682
emit_i8x16_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3683 void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs,
3684 LiftoffRegister rhs) {
3685 vceq(Neon8, liftoff::GetSimd128Register(dst),
3686 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3687 }
3688
emit_i8x16_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3689 void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs,
3690 LiftoffRegister rhs) {
3691 vceq(Neon8, liftoff::GetSimd128Register(dst),
3692 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3693 vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst));
3694 }
3695
emit_i8x16_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3696 void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3697 LiftoffRegister rhs) {
3698 vcgt(NeonS8, liftoff::GetSimd128Register(dst),
3699 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3700 }
3701
emit_i8x16_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3702 void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3703 LiftoffRegister rhs) {
3704 vcgt(NeonU8, liftoff::GetSimd128Register(dst),
3705 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3706 }
3707
emit_i8x16_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3708 void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3709 LiftoffRegister rhs) {
3710 vcge(NeonS8, liftoff::GetSimd128Register(dst),
3711 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3712 }
3713
emit_i8x16_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3714 void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3715 LiftoffRegister rhs) {
3716 vcge(NeonU8, liftoff::GetSimd128Register(dst),
3717 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3718 }
3719
emit_i16x8_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3720 void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs,
3721 LiftoffRegister rhs) {
3722 vceq(Neon16, liftoff::GetSimd128Register(dst),
3723 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3724 }
3725
emit_i16x8_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3726 void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs,
3727 LiftoffRegister rhs) {
3728 vceq(Neon16, liftoff::GetSimd128Register(dst),
3729 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3730 vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst));
3731 }
3732
emit_i16x8_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3733 void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3734 LiftoffRegister rhs) {
3735 vcgt(NeonS16, liftoff::GetSimd128Register(dst),
3736 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3737 }
3738
emit_i16x8_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3739 void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3740 LiftoffRegister rhs) {
3741 vcgt(NeonU16, liftoff::GetSimd128Register(dst),
3742 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3743 }
3744
emit_i16x8_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3745 void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3746 LiftoffRegister rhs) {
3747 vcge(NeonS16, liftoff::GetSimd128Register(dst),
3748 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3749 }
3750
emit_i16x8_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3751 void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3752 LiftoffRegister rhs) {
3753 vcge(NeonU16, liftoff::GetSimd128Register(dst),
3754 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3755 }
3756
emit_i32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3757 void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
3758 LiftoffRegister rhs) {
3759 vceq(Neon32, liftoff::GetSimd128Register(dst),
3760 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3761 }
3762
emit_i32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3763 void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
3764 LiftoffRegister rhs) {
3765 vceq(Neon32, liftoff::GetSimd128Register(dst),
3766 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3767 vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst));
3768 }
3769
emit_i32x4_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3770 void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3771 LiftoffRegister rhs) {
3772 vcgt(NeonS32, liftoff::GetSimd128Register(dst),
3773 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3774 }
3775
emit_i32x4_gt_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3776 void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs,
3777 LiftoffRegister rhs) {
3778 vcgt(NeonU32, liftoff::GetSimd128Register(dst),
3779 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3780 }
3781
emit_i32x4_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3782 void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3783 LiftoffRegister rhs) {
3784 vcge(NeonS32, liftoff::GetSimd128Register(dst),
3785 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3786 }
3787
emit_i32x4_ge_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3788 void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs,
3789 LiftoffRegister rhs) {
3790 vcge(NeonU32, liftoff::GetSimd128Register(dst),
3791 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
3792 }
3793
emit_i64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3794 void LiftoffAssembler::emit_i64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
3795 LiftoffRegister rhs) {
3796 I64x2Eq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3797 liftoff::GetSimd128Register(rhs));
3798 }
3799
emit_i64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3800 void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
3801 LiftoffRegister rhs) {
3802 I64x2Ne(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3803 liftoff::GetSimd128Register(rhs));
3804 }
3805
emit_i64x2_gt_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3806 void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
3807 LiftoffRegister rhs) {
3808 I64x2GtS(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3809 liftoff::GetSimd128Register(rhs));
3810 }
3811
emit_i64x2_ge_s(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3812 void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
3813 LiftoffRegister rhs) {
3814 I64x2GeS(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3815 liftoff::GetSimd128Register(rhs));
3816 }
3817
emit_f32x4_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3818 void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
3819 LiftoffRegister rhs) {
3820 vceq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3821 liftoff::GetSimd128Register(rhs));
3822 }
3823
emit_f32x4_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3824 void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
3825 LiftoffRegister rhs) {
3826 vceq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3827 liftoff::GetSimd128Register(rhs));
3828 vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst));
3829 }
3830
emit_f32x4_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3831 void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs,
3832 LiftoffRegister rhs) {
3833 vcgt(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(rhs),
3834 liftoff::GetSimd128Register(lhs));
3835 }
3836
emit_f32x4_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3837 void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs,
3838 LiftoffRegister rhs) {
3839 vcge(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(rhs),
3840 liftoff::GetSimd128Register(lhs));
3841 }
3842
emit_f64x2_eq(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3843 void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
3844 LiftoffRegister rhs) {
3845 liftoff::F64x2Compare(this, dst, lhs, rhs, eq);
3846 }
3847
emit_f64x2_ne(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3848 void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
3849 LiftoffRegister rhs) {
3850 liftoff::F64x2Compare(this, dst, lhs, rhs, ne);
3851 }
3852
emit_f64x2_lt(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3853 void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs,
3854 LiftoffRegister rhs) {
3855 liftoff::F64x2Compare(this, dst, lhs, rhs, lt);
3856 }
3857
emit_f64x2_le(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3858 void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs,
3859 LiftoffRegister rhs) {
3860 liftoff::F64x2Compare(this, dst, lhs, rhs, le);
3861 }
3862
emit_s128_const(LiftoffRegister dst,const uint8_t imms[16])3863 void LiftoffAssembler::emit_s128_const(LiftoffRegister dst,
3864 const uint8_t imms[16]) {
3865 uint64_t vals[2];
3866 memcpy(vals, imms, sizeof(vals));
3867 vmov(dst.low_fp(), base::Double(vals[0]));
3868 vmov(dst.high_fp(), base::Double(vals[1]));
3869 }
3870
emit_s128_not(LiftoffRegister dst,LiftoffRegister src)3871 void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) {
3872 vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
3873 }
3874
emit_s128_and(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3875 void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs,
3876 LiftoffRegister rhs) {
3877 vand(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3878 liftoff::GetSimd128Register(rhs));
3879 }
3880
emit_s128_or(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3881 void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs,
3882 LiftoffRegister rhs) {
3883 vorr(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3884 liftoff::GetSimd128Register(rhs));
3885 }
3886
emit_s128_xor(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3887 void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs,
3888 LiftoffRegister rhs) {
3889 veor(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
3890 liftoff::GetSimd128Register(rhs));
3891 }
3892
emit_s128_select(LiftoffRegister dst,LiftoffRegister src1,LiftoffRegister src2,LiftoffRegister mask)3893 void LiftoffAssembler::emit_s128_select(LiftoffRegister dst,
3894 LiftoffRegister src1,
3895 LiftoffRegister src2,
3896 LiftoffRegister mask) {
3897 if (dst != mask) {
3898 vmov(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(mask));
3899 }
3900 vbsl(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src1),
3901 liftoff::GetSimd128Register(src2));
3902 }
3903
emit_i32x4_sconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)3904 void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst,
3905 LiftoffRegister src) {
3906 vcvt_s32_f32(liftoff::GetSimd128Register(dst),
3907 liftoff::GetSimd128Register(src));
3908 }
3909
emit_i32x4_uconvert_f32x4(LiftoffRegister dst,LiftoffRegister src)3910 void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst,
3911 LiftoffRegister src) {
3912 vcvt_u32_f32(liftoff::GetSimd128Register(dst),
3913 liftoff::GetSimd128Register(src));
3914 }
3915
emit_f32x4_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)3916 void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst,
3917 LiftoffRegister src) {
3918 vcvt_f32_s32(liftoff::GetSimd128Register(dst),
3919 liftoff::GetSimd128Register(src));
3920 }
3921
emit_f32x4_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister src)3922 void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst,
3923 LiftoffRegister src) {
3924 vcvt_f32_u32(liftoff::GetSimd128Register(dst),
3925 liftoff::GetSimd128Register(src));
3926 }
3927
emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,LiftoffRegister src)3928 void LiftoffAssembler::emit_f32x4_demote_f64x2_zero(LiftoffRegister dst,
3929 LiftoffRegister src) {
3930 LowDwVfpRegister dst_d = LowDwVfpRegister::from_code(dst.low_fp().code());
3931 vcvt_f32_f64(dst_d.low(), src.low_fp());
3932 vcvt_f32_f64(dst_d.high(), src.high_fp());
3933 vmov(dst.high_fp(), 0);
3934 }
3935
emit_i8x16_sconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3936 void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst,
3937 LiftoffRegister lhs,
3938 LiftoffRegister rhs) {
3939 liftoff::S128NarrowOp(this, NeonS8, NeonS8, dst, lhs, rhs);
3940 }
3941
emit_i8x16_uconvert_i16x8(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3942 void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
3943 LiftoffRegister lhs,
3944 LiftoffRegister rhs) {
3945 liftoff::S128NarrowOp(this, NeonU8, NeonS8, dst, lhs, rhs);
3946 }
3947
emit_i16x8_sconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3948 void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
3949 LiftoffRegister lhs,
3950 LiftoffRegister rhs) {
3951 liftoff::S128NarrowOp(this, NeonS16, NeonS16, dst, lhs, rhs);
3952 }
3953
emit_i16x8_uconvert_i32x4(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)3954 void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
3955 LiftoffRegister lhs,
3956 LiftoffRegister rhs) {
3957 liftoff::S128NarrowOp(this, NeonU16, NeonS16, dst, lhs, rhs);
3958 }
3959
emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)3960 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
3961 LiftoffRegister src) {
3962 vmovl(NeonS8, liftoff::GetSimd128Register(dst), src.low_fp());
3963 }
3964
emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)3965 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst,
3966 LiftoffRegister src) {
3967 vmovl(NeonS8, liftoff::GetSimd128Register(dst), src.high_fp());
3968 }
3969
emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,LiftoffRegister src)3970 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst,
3971 LiftoffRegister src) {
3972 vmovl(NeonU8, liftoff::GetSimd128Register(dst), src.low_fp());
3973 }
3974
emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,LiftoffRegister src)3975 void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst,
3976 LiftoffRegister src) {
3977 vmovl(NeonU8, liftoff::GetSimd128Register(dst), src.high_fp());
3978 }
3979
emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)3980 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst,
3981 LiftoffRegister src) {
3982 vmovl(NeonS16, liftoff::GetSimd128Register(dst), src.low_fp());
3983 }
3984
emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)3985 void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst,
3986 LiftoffRegister src) {
3987 vmovl(NeonS16, liftoff::GetSimd128Register(dst), src.high_fp());
3988 }
3989
emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,LiftoffRegister src)3990 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst,
3991 LiftoffRegister src) {
3992 vmovl(NeonU16, liftoff::GetSimd128Register(dst), src.low_fp());
3993 }
3994
emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,LiftoffRegister src)3995 void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst,
3996 LiftoffRegister src) {
3997 vmovl(NeonU16, liftoff::GetSimd128Register(dst), src.high_fp());
3998 }
3999
emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,LiftoffRegister src)4000 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_s_zero(LiftoffRegister dst,
4001 LiftoffRegister src) {
4002 LowDwVfpRegister dst_d = LowDwVfpRegister::from_code(dst.low_fp().code());
4003 vcvt_s32_f64(dst_d.low(), src.low_fp());
4004 vcvt_s32_f64(dst_d.high(), src.high_fp());
4005 vmov(dst.high_fp(), 0);
4006 }
4007
emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,LiftoffRegister src)4008 void LiftoffAssembler::emit_i32x4_trunc_sat_f64x2_u_zero(LiftoffRegister dst,
4009 LiftoffRegister src) {
4010 LowDwVfpRegister dst_d = LowDwVfpRegister::from_code(dst.low_fp().code());
4011 vcvt_u32_f64(dst_d.low(), src.low_fp());
4012 vcvt_u32_f64(dst_d.high(), src.high_fp());
4013 vmov(dst.high_fp(), 0);
4014 }
4015
emit_s128_and_not(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4016 void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst,
4017 LiftoffRegister lhs,
4018 LiftoffRegister rhs) {
4019 vbic(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
4020 liftoff::GetSimd128Register(rhs));
4021 }
4022
emit_i8x16_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4023 void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst,
4024 LiftoffRegister lhs,
4025 LiftoffRegister rhs) {
4026 vrhadd(NeonU8, liftoff::GetSimd128Register(dst),
4027 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
4028 }
4029
emit_i16x8_rounding_average_u(LiftoffRegister dst,LiftoffRegister lhs,LiftoffRegister rhs)4030 void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst,
4031 LiftoffRegister lhs,
4032 LiftoffRegister rhs) {
4033 vrhadd(NeonU16, liftoff::GetSimd128Register(dst),
4034 liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs));
4035 }
4036
emit_i8x16_abs(LiftoffRegister dst,LiftoffRegister src)4037 void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst,
4038 LiftoffRegister src) {
4039 vabs(Neon8, liftoff::GetSimd128Register(dst),
4040 liftoff::GetSimd128Register(src));
4041 }
4042
emit_i16x8_abs(LiftoffRegister dst,LiftoffRegister src)4043 void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
4044 LiftoffRegister src) {
4045 vabs(Neon16, liftoff::GetSimd128Register(dst),
4046 liftoff::GetSimd128Register(src));
4047 }
4048
emit_i32x4_abs(LiftoffRegister dst,LiftoffRegister src)4049 void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
4050 LiftoffRegister src) {
4051 vabs(Neon32, liftoff::GetSimd128Register(dst),
4052 liftoff::GetSimd128Register(src));
4053 }
4054
emit_i64x2_abs(LiftoffRegister dst,LiftoffRegister src)4055 void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
4056 LiftoffRegister src) {
4057 I64x2Abs(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src));
4058 }
4059
StackCheck(Label * ool_code,Register limit_address)4060 void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) {
4061 ldr(limit_address, MemOperand(limit_address));
4062 cmp(sp, limit_address);
4063 b(ool_code, ls);
4064 }
4065
CallTrapCallbackForTesting()4066 void LiftoffAssembler::CallTrapCallbackForTesting() {
4067 PrepareCallCFunction(0, 0);
4068 CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0);
4069 }
4070
AssertUnreachable(AbortReason reason)4071 void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
4072 // Asserts unreachable within the wasm code.
4073 TurboAssembler::AssertUnreachable(reason);
4074 }
4075
PushRegisters(LiftoffRegList regs)4076 void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
4077 RegList core_regs = regs.GetGpList();
4078 if (!core_regs.is_empty()) {
4079 stm(db_w, sp, core_regs);
4080 }
4081 LiftoffRegList fp_regs = regs & kFpCacheRegList;
4082 while (!fp_regs.is_empty()) {
4083 LiftoffRegister reg = fp_regs.GetFirstRegSet();
4084 DoubleRegister first = reg.fp();
4085 DoubleRegister last = first;
4086 fp_regs.clear(reg);
4087 while (!fp_regs.is_empty()) {
4088 LiftoffRegister reg = fp_regs.GetFirstRegSet();
4089 int code = reg.fp().code();
4090 // vstm can not push more than 16 registers. We have to make sure the
4091 // condition is met.
4092 if ((code != last.code() + 1) || ((code - first.code() + 1) > 16)) break;
4093 last = reg.fp();
4094 fp_regs.clear(reg);
4095 }
4096 vstm(db_w, sp, first, last);
4097 }
4098 }
4099
PopRegisters(LiftoffRegList regs)4100 void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
4101 LiftoffRegList fp_regs = regs & kFpCacheRegList;
4102 while (!fp_regs.is_empty()) {
4103 LiftoffRegister reg = fp_regs.GetLastRegSet();
4104 DoubleRegister last = reg.fp();
4105 DoubleRegister first = last;
4106 fp_regs.clear(reg);
4107 while (!fp_regs.is_empty()) {
4108 LiftoffRegister reg = fp_regs.GetLastRegSet();
4109 int code = reg.fp().code();
4110 if ((code != first.code() - 1) || ((last.code() - code + 1) > 16)) break;
4111 first = reg.fp();
4112 fp_regs.clear(reg);
4113 }
4114 vldm(ia_w, sp, first, last);
4115 }
4116 RegList core_regs = regs.GetGpList();
4117 if (!core_regs.is_empty()) {
4118 ldm(ia_w, sp, core_regs);
4119 }
4120 }
4121
RecordSpillsInSafepoint(SafepointTableBuilder::Safepoint & safepoint,LiftoffRegList all_spills,LiftoffRegList ref_spills,int spill_offset)4122 void LiftoffAssembler::RecordSpillsInSafepoint(
4123 SafepointTableBuilder::Safepoint& safepoint, LiftoffRegList all_spills,
4124 LiftoffRegList ref_spills, int spill_offset) {
4125 int spill_space_size = 0;
4126 while (!all_spills.is_empty()) {
4127 LiftoffRegister reg = all_spills.GetLastRegSet();
4128 if (ref_spills.has(reg)) {
4129 safepoint.DefineTaggedStackSlot(spill_offset);
4130 }
4131 all_spills.clear(reg);
4132 ++spill_offset;
4133 spill_space_size += kSystemPointerSize;
4134 }
4135 // Record the number of additional spill slots.
4136 RecordOolSpillSpaceSize(spill_space_size);
4137 }
4138
DropStackSlotsAndRet(uint32_t num_stack_slots)4139 void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) {
4140 Drop(num_stack_slots);
4141 Ret();
4142 }
4143
CallC(const ValueKindSig * sig,const LiftoffRegister * args,const LiftoffRegister * rets,ValueKind out_argument_kind,int stack_bytes,ExternalReference ext_ref)4144 void LiftoffAssembler::CallC(const ValueKindSig* sig,
4145 const LiftoffRegister* args,
4146 const LiftoffRegister* rets,
4147 ValueKind out_argument_kind, int stack_bytes,
4148 ExternalReference ext_ref) {
4149 // Arguments are passed by pushing them all to the stack and then passing
4150 // a pointer to them.
4151 DCHECK(IsAligned(stack_bytes, kSystemPointerSize));
4152 // Reserve space in the stack.
4153 AllocateStackSpace(stack_bytes);
4154
4155 int arg_bytes = 0;
4156 for (ValueKind param_kind : sig->parameters()) {
4157 switch (param_kind) {
4158 case kI32:
4159 str(args->gp(), MemOperand(sp, arg_bytes));
4160 break;
4161 case kI64:
4162 str(args->low_gp(), MemOperand(sp, arg_bytes));
4163 str(args->high_gp(), MemOperand(sp, arg_bytes + kSystemPointerSize));
4164 break;
4165 case kF32:
4166 vstr(liftoff::GetFloatRegister(args->fp()), MemOperand(sp, arg_bytes));
4167 break;
4168 case kF64:
4169 vstr(args->fp(), MemOperand(sp, arg_bytes));
4170 break;
4171 case kS128:
4172 vstr(args->low_fp(), MemOperand(sp, arg_bytes));
4173 vstr(args->high_fp(),
4174 MemOperand(sp, arg_bytes + 2 * kSystemPointerSize));
4175 break;
4176 default:
4177 UNREACHABLE();
4178 }
4179 args++;
4180 arg_bytes += value_kind_size(param_kind);
4181 }
4182 DCHECK_LE(arg_bytes, stack_bytes);
4183
4184 // Pass a pointer to the buffer with the arguments to the C function.
4185 mov(r0, sp);
4186
4187 // Now call the C function.
4188 constexpr int kNumCCallArgs = 1;
4189 PrepareCallCFunction(kNumCCallArgs);
4190 CallCFunction(ext_ref, kNumCCallArgs);
4191
4192 // Move return value to the right register.
4193 const LiftoffRegister* result_reg = rets;
4194 if (sig->return_count() > 0) {
4195 DCHECK_EQ(1, sig->return_count());
4196 constexpr Register kReturnReg = r0;
4197 if (kReturnReg != rets->gp()) {
4198 Move(*rets, LiftoffRegister(kReturnReg), sig->GetReturn(0));
4199 }
4200 result_reg++;
4201 }
4202
4203 // Load potential output value from the buffer on the stack.
4204 if (out_argument_kind != kVoid) {
4205 switch (out_argument_kind) {
4206 case kI32:
4207 ldr(result_reg->gp(), MemOperand(sp));
4208 break;
4209 case kI64:
4210 ldr(result_reg->low_gp(), MemOperand(sp));
4211 ldr(result_reg->high_gp(), MemOperand(sp, kSystemPointerSize));
4212 break;
4213 case kF32:
4214 vldr(liftoff::GetFloatRegister(result_reg->fp()), MemOperand(sp));
4215 break;
4216 case kF64:
4217 vldr(result_reg->fp(), MemOperand(sp));
4218 break;
4219 case kS128:
4220 vld1(Neon8, NeonListOperand(result_reg->low_fp(), 2),
4221 NeonMemOperand(sp));
4222 break;
4223 default:
4224 UNREACHABLE();
4225 }
4226 }
4227 add(sp, sp, Operand(stack_bytes));
4228 }
4229
CallNativeWasmCode(Address addr)4230 void LiftoffAssembler::CallNativeWasmCode(Address addr) {
4231 Call(addr, RelocInfo::WASM_CALL);
4232 }
4233
TailCallNativeWasmCode(Address addr)4234 void LiftoffAssembler::TailCallNativeWasmCode(Address addr) {
4235 Jump(addr, RelocInfo::WASM_CALL);
4236 }
4237
CallIndirect(const ValueKindSig * sig,compiler::CallDescriptor * call_descriptor,Register target)4238 void LiftoffAssembler::CallIndirect(const ValueKindSig* sig,
4239 compiler::CallDescriptor* call_descriptor,
4240 Register target) {
4241 DCHECK(target != no_reg);
4242 Call(target);
4243 }
4244
TailCallIndirect(Register target)4245 void LiftoffAssembler::TailCallIndirect(Register target) {
4246 DCHECK(target != no_reg);
4247 Jump(target);
4248 }
4249
CallRuntimeStub(WasmCode::RuntimeStubId sid)4250 void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) {
4251 // A direct call to a wasm runtime stub defined in this module.
4252 // Just encode the stub index. This will be patched at relocation.
4253 Call(static_cast<Address>(sid), RelocInfo::WASM_STUB_CALL);
4254 }
4255
AllocateStackSlot(Register addr,uint32_t size)4256 void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) {
4257 AllocateStackSpace(size);
4258 mov(addr, sp);
4259 }
4260
DeallocateStackSlot(uint32_t size)4261 void LiftoffAssembler::DeallocateStackSlot(uint32_t size) {
4262 add(sp, sp, Operand(size));
4263 }
4264
MaybeOSR()4265 void LiftoffAssembler::MaybeOSR() {}
4266
emit_set_if_nan(Register dst,DoubleRegister src,ValueKind kind)4267 void LiftoffAssembler::emit_set_if_nan(Register dst, DoubleRegister src,
4268 ValueKind kind) {
4269 if (kind == kF32) {
4270 FloatRegister src_f = liftoff::GetFloatRegister(src);
4271 VFPCompareAndSetFlags(src_f, src_f);
4272 } else {
4273 DCHECK_EQ(kind, kF64);
4274 VFPCompareAndSetFlags(src, src);
4275 }
4276
4277 // Store a non-zero value if src is NaN.
4278 str(dst, MemOperand(dst), ne); // x != x iff isnan(x)
4279 }
4280
emit_s128_set_if_nan(Register dst,LiftoffRegister src,Register tmp_gp,LiftoffRegister tmp_s128,ValueKind lane_kind)4281 void LiftoffAssembler::emit_s128_set_if_nan(Register dst, LiftoffRegister src,
4282 Register tmp_gp,
4283 LiftoffRegister tmp_s128,
4284 ValueKind lane_kind) {
4285 QwNeonRegister src_q = liftoff::GetSimd128Register(src);
4286 QwNeonRegister tmp_q = liftoff::GetSimd128Register(tmp_s128);
4287 if (lane_kind == kF32) {
4288 vpadd(tmp_q.low(), src_q.low(), src_q.high());
4289 LowDwVfpRegister tmp_d =
4290 LowDwVfpRegister::from_code(tmp_s128.low_fp().code());
4291 vadd(tmp_d.low(), tmp_d.low(), tmp_d.high());
4292 } else {
4293 DCHECK_EQ(lane_kind, kF64);
4294 vadd(tmp_q.low(), src_q.low(), src_q.high());
4295 }
4296 emit_set_if_nan(dst, tmp_q.low(), lane_kind);
4297 }
4298
Construct(int param_slots)4299 void LiftoffStackSlots::Construct(int param_slots) {
4300 DCHECK_LT(0, slots_.size());
4301 SortInPushOrder();
4302 int last_stack_slot = param_slots;
4303 for (auto& slot : slots_) {
4304 const int stack_slot = slot.dst_slot_;
4305 int stack_decrement = (last_stack_slot - stack_slot) * kSystemPointerSize;
4306 DCHECK_LT(0, stack_decrement);
4307 last_stack_slot = stack_slot;
4308 const LiftoffAssembler::VarState& src = slot.src_;
4309 switch (src.loc()) {
4310 case LiftoffAssembler::VarState::kStack: {
4311 switch (src.kind()) {
4312 // i32 and i64 can be treated as similar cases, i64 being previously
4313 // split into two i32 registers
4314 case kI32:
4315 case kI64:
4316 case kF32:
4317 case kRef:
4318 case kOptRef: {
4319 asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4320 UseScratchRegisterScope temps(asm_);
4321 Register scratch = temps.Acquire();
4322 asm_->ldr(scratch,
4323 liftoff::GetHalfStackSlot(slot.src_offset_, slot.half_));
4324 asm_->Push(scratch);
4325 } break;
4326 case kF64: {
4327 asm_->AllocateStackSpace(stack_decrement - kDoubleSize);
4328 UseScratchRegisterScope temps(asm_);
4329 DwVfpRegister scratch = temps.AcquireD();
4330 asm_->vldr(scratch, liftoff::GetStackSlot(slot.src_offset_));
4331 asm_->vpush(scratch);
4332 } break;
4333 case kS128: {
4334 asm_->AllocateStackSpace(stack_decrement - kSimd128Size);
4335 MemOperand mem_op = liftoff::GetStackSlot(slot.src_offset_);
4336 UseScratchRegisterScope temps(asm_);
4337 Register addr = liftoff::CalculateActualAddress(
4338 asm_, &temps, mem_op.rn(), no_reg, mem_op.offset());
4339 QwNeonRegister scratch = temps.AcquireQ();
4340 asm_->vld1(Neon8, NeonListOperand(scratch), NeonMemOperand(addr));
4341 asm_->vpush(scratch);
4342 break;
4343 }
4344 default:
4345 UNREACHABLE();
4346 }
4347 break;
4348 }
4349 case LiftoffAssembler::VarState::kRegister: {
4350 int pushed_bytes = SlotSizeInBytes(slot);
4351 asm_->AllocateStackSpace(stack_decrement - pushed_bytes);
4352 switch (src.kind()) {
4353 case kI64: {
4354 LiftoffRegister reg =
4355 slot.half_ == kLowWord ? src.reg().low() : src.reg().high();
4356 asm_->push(reg.gp());
4357 } break;
4358 case kI32:
4359 case kRef:
4360 case kOptRef:
4361 asm_->push(src.reg().gp());
4362 break;
4363 case kF32:
4364 asm_->vpush(liftoff::GetFloatRegister(src.reg().fp()));
4365 break;
4366 case kF64:
4367 asm_->vpush(src.reg().fp());
4368 break;
4369 case kS128:
4370 asm_->vpush(liftoff::GetSimd128Register(src.reg()));
4371 break;
4372 default:
4373 UNREACHABLE();
4374 }
4375 break;
4376 }
4377 case LiftoffAssembler::VarState::kIntConst: {
4378 asm_->AllocateStackSpace(stack_decrement - kSystemPointerSize);
4379 DCHECK(src.kind() == kI32 || src.kind() == kI64);
4380 UseScratchRegisterScope temps(asm_);
4381 Register scratch = temps.Acquire();
4382 // The high word is the sign extension of the low word.
4383 asm_->mov(scratch,
4384 Operand(slot.half_ == kLowWord ? src.i32_const()
4385 : src.i32_const() >> 31));
4386 asm_->push(scratch);
4387 break;
4388 }
4389 }
4390 }
4391 }
4392
4393 } // namespace wasm
4394 } // namespace internal
4395 } // namespace v8
4396
4397 #endif // V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_
4398