// Copyright 2017 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_ #define V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_ #include "src/heap/memory-chunk.h" #include "src/wasm/baseline/liftoff-assembler.h" #include "src/wasm/baseline/liftoff-register.h" namespace v8 { namespace internal { namespace wasm { namespace liftoff { // half // slot Frame // -----+--------------------+--------------------------- // n+3 | parameter n | // ... | ... | // 4 | parameter 1 | or parameter 2 // 3 | parameter 0 | or parameter 1 // 2 | (result address) | or parameter 0 // -----+--------------------+--------------------------- // 1 | return addr (lr) | // 0 | previous frame (fp)| // -----+--------------------+ <-- frame ptr (fp) // -1 | 0xa: WASM | // -2 | instance | // -----+--------------------+--------------------------- // -3 | slot 0 (high) | ^ // -4 | slot 0 (low) | | // -5 | slot 1 (high) | Frame slots // -6 | slot 1 (low) | | // | | v // -----+--------------------+ <-- stack ptr (sp) // static_assert(2 * kSystemPointerSize == LiftoffAssembler::kStackSlotSize, "Slot size should be twice the size of the 32 bit pointer."); constexpr int kInstanceOffset = 2 * kSystemPointerSize; // kPatchInstructionsRequired sets a maximum limit of how many instructions that // PatchPrepareStackFrame will use in order to increase the stack appropriately. // Three instructions are required to sub a large constant, movw + movt + sub. constexpr int32_t kPatchInstructionsRequired = 3; constexpr int kHalfStackSlotSize = LiftoffAssembler::kStackSlotSize >> 1; inline MemOperand GetStackSlot(int offset) { return MemOperand(fp, -offset); } inline MemOperand GetHalfStackSlot(int offset, RegPairHalf half) { int32_t half_offset = half == kLowWord ? 0 : LiftoffAssembler::kStackSlotSize / 2; return MemOperand(offset > 0 ? fp : sp, -offset + half_offset); } inline MemOperand GetInstanceOperand() { return GetStackSlot(kInstanceOffset); } inline MemOperand GetMemOp(LiftoffAssembler* assm, UseScratchRegisterScope* temps, Register addr, Register offset, int32_t offset_imm) { if (offset != no_reg) { if (offset_imm == 0) return MemOperand(addr, offset); Register tmp = temps->Acquire(); assm->add(tmp, offset, Operand(offset_imm)); return MemOperand(addr, tmp); } return MemOperand(addr, offset_imm); } inline Register CalculateActualAddress(LiftoffAssembler* assm, UseScratchRegisterScope* temps, Register addr_reg, Register offset_reg, int32_t offset_imm, Register result_reg = no_reg) { if (offset_reg == no_reg && offset_imm == 0) { if (result_reg == no_reg) { return addr_reg; } else { assm->mov(result_reg, addr_reg); return result_reg; } } Register actual_addr_reg = result_reg != no_reg ? result_reg : temps->Acquire(); if (offset_reg == no_reg) { assm->add(actual_addr_reg, addr_reg, Operand(offset_imm)); } else { assm->add(actual_addr_reg, addr_reg, Operand(offset_reg)); if (offset_imm != 0) { assm->add(actual_addr_reg, actual_addr_reg, Operand(offset_imm)); } } return actual_addr_reg; } inline Condition MakeUnsigned(Condition cond) { switch (cond) { case kSignedLessThan: return kUnsignedLessThan; case kSignedLessEqual: return kUnsignedLessEqual; case kSignedGreaterThan: return kUnsignedGreaterThan; case kSignedGreaterEqual: return kUnsignedGreaterEqual; case kEqual: case kUnequal: case kUnsignedLessThan: case kUnsignedLessEqual: case kUnsignedGreaterThan: case kUnsignedGreaterEqual: return cond; default: UNREACHABLE(); } } template inline void I64Binop(LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { Register dst_low = dst.low_gp(); if (dst_low == lhs.high_gp() || dst_low == rhs.high_gp()) { dst_low = assm->GetUnusedRegister( kGpReg, LiftoffRegList::ForRegs(lhs, rhs, dst.high_gp())) .gp(); } (assm->*op)(dst_low, lhs.low_gp(), rhs.low_gp(), SetCC, al); (assm->*op_with_carry)(dst.high_gp(), lhs.high_gp(), Operand(rhs.high_gp()), LeaveCC, al); if (dst_low != dst.low_gp()) assm->mov(dst.low_gp(), dst_low); } template inline void I64BinopI(LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs, int32_t imm) { // The compiler allocated registers such that either {dst == lhs} or there is // no overlap between the two. DCHECK_NE(dst.low_gp(), lhs.high_gp()); (assm->*op)(dst.low_gp(), lhs.low_gp(), Operand(imm), SetCC, al); // Top half of the immediate sign extended, either 0 or -1. int32_t sign_extend = imm < 0 ? -1 : 0; (assm->*op_with_carry)(dst.high_gp(), lhs.high_gp(), Operand(sign_extend), LeaveCC, al); } template inline void I64Shiftop(LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister src, Register amount) { Register src_low = src.low_gp(); Register src_high = src.high_gp(); Register dst_low = dst.low_gp(); Register dst_high = dst.high_gp(); // Left shift writes {dst_high} then {dst_low}, right shifts write {dst_low} // then {dst_high}. Register clobbered_dst_reg = is_left_shift ? dst_high : dst_low; LiftoffRegList pinned = LiftoffRegList::ForRegs(clobbered_dst_reg, src); Register amount_capped = pinned.set(assm->GetUnusedRegister(kGpReg, pinned)).gp(); assm->and_(amount_capped, amount, Operand(0x3F)); // Ensure that writing the first half of {dst} does not overwrite the still // needed half of {src}. Register* later_src_reg = is_left_shift ? &src_low : &src_high; if (*later_src_reg == clobbered_dst_reg) { *later_src_reg = assm->GetUnusedRegister(kGpReg, pinned).gp(); assm->TurboAssembler::Move(*later_src_reg, clobbered_dst_reg); } (assm->*op)(dst_low, dst_high, src_low, src_high, amount_capped); } inline FloatRegister GetFloatRegister(DoubleRegister reg) { DCHECK_LT(reg.code(), kDoubleCode_d16); return LowDwVfpRegister::from_code(reg.code()).low(); } inline Simd128Register GetSimd128Register(DoubleRegister reg) { return QwNeonRegister::from_code(reg.code() / 2); } inline Simd128Register GetSimd128Register(LiftoffRegister reg) { return liftoff::GetSimd128Register(reg.low_fp()); } enum class MinOrMax : uint8_t { kMin, kMax }; template inline void EmitFloatMinOrMax(LiftoffAssembler* assm, RegisterType dst, RegisterType lhs, RegisterType rhs, MinOrMax min_or_max) { DCHECK(RegisterType::kSizeInBytes == 4 || RegisterType::kSizeInBytes == 8); if (lhs == rhs) { assm->TurboAssembler::Move(dst, lhs); return; } Label done, is_nan; if (min_or_max == MinOrMax::kMin) { assm->TurboAssembler::FloatMin(dst, lhs, rhs, &is_nan); } else { assm->TurboAssembler::FloatMax(dst, lhs, rhs, &is_nan); } assm->b(&done); assm->bind(&is_nan); // Create a NaN output. assm->vadd(dst, lhs, rhs); assm->bind(&done); } inline Register EnsureNoAlias(Assembler* assm, Register reg, Register must_not_alias, UseScratchRegisterScope* temps) { if (reg != must_not_alias) return reg; Register tmp = temps->Acquire(); DCHECK_NE(reg, tmp); assm->mov(tmp, reg); return tmp; } inline void S128NarrowOp(LiftoffAssembler* assm, NeonDataType dt, NeonDataType sdt, LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { if (dst == lhs) { assm->vqmovn(dt, sdt, dst.low_fp(), liftoff::GetSimd128Register(lhs)); assm->vqmovn(dt, sdt, dst.high_fp(), liftoff::GetSimd128Register(rhs)); } else { assm->vqmovn(dt, sdt, dst.high_fp(), liftoff::GetSimd128Register(rhs)); assm->vqmovn(dt, sdt, dst.low_fp(), liftoff::GetSimd128Register(lhs)); } } inline void F64x2Compare(LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, Condition cond) { DCHECK(cond == eq || cond == ne || cond == lt || cond == le); QwNeonRegister dest = liftoff::GetSimd128Register(dst); QwNeonRegister left = liftoff::GetSimd128Register(lhs); QwNeonRegister right = liftoff::GetSimd128Register(rhs); UseScratchRegisterScope temps(assm); Register scratch = temps.Acquire(); assm->mov(scratch, Operand(0)); assm->VFPCompareAndSetFlags(left.low(), right.low()); assm->mov(scratch, Operand(-1), LeaveCC, cond); if (cond == lt || cond == le) { // Check for NaN. assm->mov(scratch, Operand(0), LeaveCC, vs); } assm->vmov(dest.low(), scratch, scratch); assm->mov(scratch, Operand(0)); assm->VFPCompareAndSetFlags(left.high(), right.high()); assm->mov(scratch, Operand(-1), LeaveCC, cond); if (cond == lt || cond == le) { // Check for NaN. assm->mov(scratch, Operand(0), LeaveCC, vs); } assm->vmov(dest.high(), scratch, scratch); } inline void Store(LiftoffAssembler* assm, LiftoffRegister src, MemOperand dst, ValueType type) { #ifdef DEBUG // The {str} instruction needs a temp register when the immediate in the // provided MemOperand does not fit into 12 bits. This happens for large stack // frames. This DCHECK checks that the temp register is available when needed. DCHECK(UseScratchRegisterScope{assm}.CanAcquire()); #endif switch (type.kind()) { case ValueType::kI32: case ValueType::kOptRef: case ValueType::kRef: assm->str(src.gp(), dst); break; case ValueType::kI64: // Positive offsets should be lowered to kI32. assm->str(src.low_gp(), MemOperand(dst.rn(), dst.offset())); assm->str( src.high_gp(), MemOperand(dst.rn(), dst.offset() + liftoff::kHalfStackSlotSize)); break; case ValueType::kF32: assm->vstr(liftoff::GetFloatRegister(src.fp()), dst); break; case ValueType::kF64: assm->vstr(src.fp(), dst); break; case ValueType::kS128: { UseScratchRegisterScope temps(assm); Register addr = liftoff::CalculateActualAddress(assm, &temps, dst.rn(), no_reg, dst.offset()); assm->vst1(Neon8, NeonListOperand(src.low_fp(), 2), NeonMemOperand(addr)); break; } default: UNREACHABLE(); } } inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, MemOperand src, ValueType type) { switch (type.kind()) { case ValueType::kI32: case ValueType::kOptRef: case ValueType::kRef: assm->ldr(dst.gp(), src); break; case ValueType::kI64: assm->ldr(dst.low_gp(), MemOperand(src.rn(), src.offset())); assm->ldr( dst.high_gp(), MemOperand(src.rn(), src.offset() + liftoff::kHalfStackSlotSize)); break; case ValueType::kF32: assm->vldr(liftoff::GetFloatRegister(dst.fp()), src); break; case ValueType::kF64: assm->vldr(dst.fp(), src); break; case ValueType::kS128: { // Get memory address of slot to fill from. UseScratchRegisterScope temps(assm); Register addr = liftoff::CalculateActualAddress(assm, &temps, src.rn(), no_reg, src.offset()); assm->vld1(Neon8, NeonListOperand(dst.low_fp(), 2), NeonMemOperand(addr)); break; } default: UNREACHABLE(); } } constexpr int MaskFromNeonDataType(NeonDataType dt) { switch (dt) { case NeonS8: case NeonU8: return 7; case NeonS16: case NeonU16: return 15; case NeonS32: case NeonU32: return 31; case NeonS64: case NeonU64: return 63; } } enum ShiftDirection { kLeft, kRight }; template inline void EmitSimdShift(LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { constexpr int mask = MaskFromNeonDataType(dt); UseScratchRegisterScope temps(assm); QwNeonRegister tmp = temps.AcquireQ(); Register shift = temps.Acquire(); assm->and_(shift, rhs.gp(), Operand(mask)); assm->vdup(sz, tmp, shift); if (dir == kRight) { assm->vneg(sz, tmp, tmp); } assm->vshl(dt, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), tmp); } template inline void EmitSimdShiftImmediate(LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { // vshr by 0 is not allowed, so check for it, and only move if dst != lhs. int32_t shift = rhs & MaskFromNeonDataType(dt); if (shift) { if (dir == kLeft) { assm->vshl(dt, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), shift); } else { assm->vshr(dt, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), shift); } } else if (dst != lhs) { assm->vmov(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs)); } } inline void EmitAnyTrue(LiftoffAssembler* assm, LiftoffRegister dst, LiftoffRegister src) { UseScratchRegisterScope temps(assm); DwVfpRegister scratch = temps.AcquireD(); assm->vpmax(NeonU32, scratch, src.low_fp(), src.high_fp()); assm->vpmax(NeonU32, scratch, scratch, scratch); assm->ExtractLane(dst.gp(), scratch, NeonS32, 0); assm->cmp(dst.gp(), Operand(0)); assm->mov(dst.gp(), Operand(1), LeaveCC, ne); } } // namespace liftoff int LiftoffAssembler::PrepareStackFrame() { if (!CpuFeatures::IsSupported(ARMv7)) { bailout(kUnsupportedArchitecture, "Armv6 not supported"); return 0; } uint32_t offset = static_cast(pc_offset()); // PatchPrepareStackFrame will patch this in order to increase the stack // appropriately. Additional nops are required as the bytes operand might // require extra moves to encode. for (int i = 0; i < liftoff::kPatchInstructionsRequired; i++) { nop(); } DCHECK_EQ(offset + liftoff::kPatchInstructionsRequired * kInstrSize, pc_offset()); return offset; } void LiftoffAssembler::PrepareTailCall(int num_callee_stack_params, int stack_param_delta) { UseScratchRegisterScope temps(this); Register scratch = temps.Acquire(); // Push the return address and frame pointer to complete the stack frame. sub(sp, sp, Operand(8)); ldr(scratch, MemOperand(fp, 4)); str(scratch, MemOperand(sp, 4)); ldr(scratch, MemOperand(fp, 0)); str(scratch, MemOperand(sp, 0)); // Shift the whole frame upwards. int slot_count = num_callee_stack_params + 2; for (int i = slot_count - 1; i >= 0; --i) { ldr(scratch, MemOperand(sp, i * 4)); str(scratch, MemOperand(fp, (i - stack_param_delta) * 4)); } // Set the new stack and frame pointer. sub(sp, fp, Operand(stack_param_delta * 4)); Pop(lr, fp); } void LiftoffAssembler::PatchPrepareStackFrame(int offset, int frame_size) { #ifdef USE_SIMULATOR // When using the simulator, deal with Liftoff which allocates the stack // before checking it. // TODO(arm): Remove this when the stack check mechanism will be updated. if (frame_size > KB / 2) { bailout(kOtherReason, "Stack limited to 512 bytes to avoid a bug in StackCheck"); return; } #endif PatchingAssembler patching_assembler(AssemblerOptions{}, buffer_start_ + offset, liftoff::kPatchInstructionsRequired); #if V8_OS_WIN if (frame_size > kStackPageSize) { // Generate OOL code (at the end of the function, where the current // assembler is pointing) to do the explicit stack limit check (see // https://docs.microsoft.com/en-us/previous-versions/visualstudio/ // visual-studio-6.0/aa227153(v=vs.60)). // At the function start, emit a jump to that OOL code (from {offset} to // {pc_offset()}). int ool_offset = pc_offset() - offset; patching_assembler.b(ool_offset - Instruction::kPcLoadDelta); patching_assembler.PadWithNops(); // Now generate the OOL code. AllocateStackSpace(frame_size); // Jump back to the start of the function (from {pc_offset()} to {offset + // liftoff::kPatchInstructionsRequired * kInstrSize}). int func_start_offset = offset + liftoff::kPatchInstructionsRequired * kInstrSize - pc_offset(); b(func_start_offset - Instruction::kPcLoadDelta); return; } #endif patching_assembler.sub(sp, sp, Operand(frame_size)); patching_assembler.PadWithNops(); } void LiftoffAssembler::FinishCode() { CheckConstPool(true, false); } void LiftoffAssembler::AbortCompilation() { AbortedCodeGeneration(); } // static constexpr int LiftoffAssembler::StaticStackFrameSize() { return liftoff::kInstanceOffset; } int LiftoffAssembler::SlotSizeForType(ValueType type) { switch (type.kind()) { case ValueType::kS128: return type.element_size_bytes(); default: return kStackSlotSize; } } bool LiftoffAssembler::NeedsAlignment(ValueType type) { return (type.kind() == ValueType::kS128 || type.is_reference_type()); } void LiftoffAssembler::LoadConstant(LiftoffRegister reg, WasmValue value, RelocInfo::Mode rmode) { switch (value.type().kind()) { case ValueType::kI32: TurboAssembler::Move(reg.gp(), Operand(value.to_i32(), rmode)); break; case ValueType::kI64: { DCHECK(RelocInfo::IsNone(rmode)); int32_t low_word = value.to_i64(); int32_t high_word = value.to_i64() >> 32; TurboAssembler::Move(reg.low_gp(), Operand(low_word)); TurboAssembler::Move(reg.high_gp(), Operand(high_word)); break; } case ValueType::kF32: vmov(liftoff::GetFloatRegister(reg.fp()), value.to_f32_boxed()); break; case ValueType::kF64: { Register extra_scratch = GetUnusedRegister(kGpReg, {}).gp(); vmov(reg.fp(), Double(value.to_f64_boxed().get_bits()), extra_scratch); break; } default: UNREACHABLE(); } } void LiftoffAssembler::LoadFromInstance(Register dst, int offset, int size) { DCHECK_LE(0, offset); DCHECK_EQ(4, size); ldr(dst, liftoff::GetInstanceOperand()); ldr(dst, MemOperand(dst, offset)); } void LiftoffAssembler::LoadTaggedPointerFromInstance(Register dst, int offset) { LoadFromInstance(dst, offset, kTaggedSize); } void LiftoffAssembler::SpillInstance(Register instance) { str(instance, liftoff::GetInstanceOperand()); } void LiftoffAssembler::FillInstanceInto(Register dst) { ldr(dst, liftoff::GetInstanceOperand()); } namespace liftoff { #define __ lasm-> inline void LoadInternal(LiftoffAssembler* lasm, LiftoffRegister dst, Register src_addr, Register offset_reg, int32_t offset_imm, LoadType type, LiftoffRegList pinned, uint32_t* protected_load_pc = nullptr, bool is_load_mem = false) { DCHECK_IMPLIES(type.value_type() == kWasmI64, dst.is_gp_pair()); UseScratchRegisterScope temps(lasm); if (type.value() == LoadType::kF64Load || type.value() == LoadType::kF32Load || type.value() == LoadType::kS128Load) { Register actual_src_addr = liftoff::CalculateActualAddress( lasm, &temps, src_addr, offset_reg, offset_imm); if (type.value() == LoadType::kF64Load) { // Armv6 is not supported so Neon can be used to avoid alignment issues. CpuFeatureScope scope(lasm, NEON); __ vld1(Neon64, NeonListOperand(dst.fp()), NeonMemOperand(actual_src_addr)); } else if (type.value() == LoadType::kF32Load) { // TODO(arm): Use vld1 for f32 when implemented in simulator as used for // f64. It supports unaligned access. Register scratch = (actual_src_addr == src_addr) ? temps.Acquire() : actual_src_addr; __ ldr(scratch, MemOperand(actual_src_addr)); __ vmov(liftoff::GetFloatRegister(dst.fp()), scratch); } else { // Armv6 is not supported so Neon can be used to avoid alignment issues. CpuFeatureScope scope(lasm, NEON); __ vld1(Neon8, NeonListOperand(dst.low_fp(), 2), NeonMemOperand(actual_src_addr)); } } else { MemOperand src_op = liftoff::GetMemOp(lasm, &temps, src_addr, offset_reg, offset_imm); if (protected_load_pc) *protected_load_pc = __ pc_offset(); switch (type.value()) { case LoadType::kI32Load8U: __ ldrb(dst.gp(), src_op); break; case LoadType::kI64Load8U: __ ldrb(dst.low_gp(), src_op); __ mov(dst.high_gp(), Operand(0)); break; case LoadType::kI32Load8S: __ ldrsb(dst.gp(), src_op); break; case LoadType::kI64Load8S: __ ldrsb(dst.low_gp(), src_op); __ asr(dst.high_gp(), dst.low_gp(), Operand(31)); break; case LoadType::kI32Load16U: __ ldrh(dst.gp(), src_op); break; case LoadType::kI64Load16U: __ ldrh(dst.low_gp(), src_op); __ mov(dst.high_gp(), Operand(0)); break; case LoadType::kI32Load16S: __ ldrsh(dst.gp(), src_op); break; case LoadType::kI32Load: __ ldr(dst.gp(), src_op); break; case LoadType::kI64Load16S: __ ldrsh(dst.low_gp(), src_op); __ asr(dst.high_gp(), dst.low_gp(), Operand(31)); break; case LoadType::kI64Load32U: __ ldr(dst.low_gp(), src_op); __ mov(dst.high_gp(), Operand(0)); break; case LoadType::kI64Load32S: __ ldr(dst.low_gp(), src_op); __ asr(dst.high_gp(), dst.low_gp(), Operand(31)); break; case LoadType::kI64Load: __ ldr(dst.low_gp(), src_op); // GetMemOp may use a scratch register as the offset register, in which // case, calling GetMemOp again will fail due to the assembler having // ran out of scratch registers. if (temps.CanAcquire()) { src_op = liftoff::GetMemOp(lasm, &temps, src_addr, offset_reg, offset_imm + kSystemPointerSize); } else { __ add(src_op.rm(), src_op.rm(), Operand(kSystemPointerSize)); } __ ldr(dst.high_gp(), src_op); break; default: UNREACHABLE(); } } } #undef __ } // namespace liftoff void LiftoffAssembler::LoadTaggedPointer(Register dst, Register src_addr, Register offset_reg, int32_t offset_imm, LiftoffRegList pinned) { STATIC_ASSERT(kTaggedSize == kInt32Size); liftoff::LoadInternal(this, LiftoffRegister(dst), src_addr, offset_reg, offset_imm, LoadType::kI32Load, pinned); } void LiftoffAssembler::StoreTaggedPointer(Register dst_addr, int32_t offset_imm, LiftoffRegister src, LiftoffRegList pinned) { STATIC_ASSERT(kTaggedSize == kInt32Size); // Store the value. MemOperand dst_op(dst_addr, offset_imm); str(src.gp(), dst_op); // The write barrier. Label write_barrier; Label exit; CheckPageFlag(dst_addr, MemoryChunk::kPointersFromHereAreInterestingMask, ne, &write_barrier); b(&exit); bind(&write_barrier); JumpIfSmi(src.gp(), &exit); CheckPageFlag(src.gp(), MemoryChunk::kPointersToHereAreInterestingMask, eq, &exit); CallRecordWriteStub(dst_addr, Operand(offset_imm), EMIT_REMEMBERED_SET, kSaveFPRegs, wasm::WasmCode::kRecordWrite); bind(&exit); } void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr, Register offset_reg, uint32_t offset_imm, LoadType type, LiftoffRegList pinned, uint32_t* protected_load_pc, bool is_load_mem) { // If offset_imm cannot be converted to int32 safely, we abort as a separate // check should cause this code to never be executed. // TODO(7881): Support when >2GB is required. if (!is_uint31(offset_imm)) { TurboAssembler::Abort(AbortReason::kOffsetOutOfRange); return; } liftoff::LoadInternal(this, dst, src_addr, offset_reg, static_cast(offset_imm), type, pinned, protected_load_pc, is_load_mem); } void LiftoffAssembler::Store(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister src, StoreType type, LiftoffRegList pinned, uint32_t* protected_store_pc, bool is_store_mem) { // If offset_imm cannot be converted to int32 safely, we abort as a separate // check should cause this code to never be executed. // TODO(7881): Support when >2GB is required. if (!is_uint31(offset_imm)) { TurboAssembler::Abort(AbortReason::kOffsetOutOfRange); return; } UseScratchRegisterScope temps(this); if (type.value() == StoreType::kF64Store) { Register actual_dst_addr = liftoff::CalculateActualAddress( this, &temps, dst_addr, offset_reg, offset_imm); // Armv6 is not supported so Neon can be used to avoid alignment issues. CpuFeatureScope scope(this, NEON); vst1(Neon64, NeonListOperand(src.fp()), NeonMemOperand(actual_dst_addr)); } else if (type.value() == StoreType::kS128Store) { Register actual_dst_addr = liftoff::CalculateActualAddress( this, &temps, dst_addr, offset_reg, offset_imm); // Armv6 is not supported so Neon can be used to avoid alignment issues. CpuFeatureScope scope(this, NEON); vst1(Neon8, NeonListOperand(src.low_fp(), 2), NeonMemOperand(actual_dst_addr)); } else if (type.value() == StoreType::kF32Store) { // TODO(arm): Use vst1 for f32 when implemented in simulator as used for // f64. It supports unaligned access. // CalculateActualAddress will only not use a scratch register if the // following condition holds, otherwise another register must be // retrieved. Register scratch = (offset_reg == no_reg && offset_imm == 0) ? temps.Acquire() : GetUnusedRegister(kGpReg, pinned).gp(); Register actual_dst_addr = liftoff::CalculateActualAddress( this, &temps, dst_addr, offset_reg, offset_imm); vmov(scratch, liftoff::GetFloatRegister(src.fp())); str(scratch, MemOperand(actual_dst_addr)); } else { MemOperand dst_op = liftoff::GetMemOp(this, &temps, dst_addr, offset_reg, offset_imm); if (protected_store_pc) *protected_store_pc = pc_offset(); switch (type.value()) { case StoreType::kI64Store8: src = src.low(); V8_FALLTHROUGH; case StoreType::kI32Store8: strb(src.gp(), dst_op); break; case StoreType::kI64Store16: src = src.low(); V8_FALLTHROUGH; case StoreType::kI32Store16: strh(src.gp(), dst_op); break; case StoreType::kI64Store32: src = src.low(); V8_FALLTHROUGH; case StoreType::kI32Store: str(src.gp(), dst_op); break; case StoreType::kI64Store: str(src.low_gp(), dst_op); // GetMemOp may use a scratch register as the offset register, in which // case, calling GetMemOp again will fail due to the assembler having // ran out of scratch registers. if (temps.CanAcquire()) { dst_op = liftoff::GetMemOp(this, &temps, dst_addr, offset_reg, offset_imm + kSystemPointerSize); } else { add(dst_op.rm(), dst_op.rm(), Operand(kSystemPointerSize)); } str(src.high_gp(), dst_op); break; default: UNREACHABLE(); } } } namespace liftoff { #define __ lasm-> inline void AtomicOp32( LiftoffAssembler* lasm, Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, LiftoffRegList pinned, void (Assembler::*load)(Register, Register, Condition), void (Assembler::*store)(Register, Register, Register, Condition), void (*op)(LiftoffAssembler*, Register, Register, Register)) { Register store_result = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp(); // Allocate an additional {temp} register to hold the result that should be // stored to memory. Note that {temp} and {store_result} are not allowed to be // the same register. Register temp = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp(); // Make sure that {result} is unique. Register result_reg = result.gp(); if (result_reg == value.gp() || result_reg == dst_addr || result_reg == offset_reg) { result_reg = __ GetUnusedRegister(kGpReg, pinned).gp(); } UseScratchRegisterScope temps(lasm); Register actual_addr = liftoff::CalculateActualAddress( lasm, &temps, dst_addr, offset_reg, offset_imm); __ dmb(ISH); Label retry; __ bind(&retry); (lasm->*load)(result_reg, actual_addr, al); op(lasm, temp, result_reg, value.gp()); (lasm->*store)(store_result, temp, actual_addr, al); __ cmp(store_result, Operand(0)); __ b(ne, &retry); __ dmb(ISH); if (result_reg != result.gp()) { __ mov(result.gp(), result_reg); } } inline void Add(LiftoffAssembler* lasm, Register dst, Register lhs, Register rhs) { __ add(dst, lhs, rhs); } inline void Sub(LiftoffAssembler* lasm, Register dst, Register lhs, Register rhs) { __ sub(dst, lhs, rhs); } inline void And(LiftoffAssembler* lasm, Register dst, Register lhs, Register rhs) { __ and_(dst, lhs, rhs); } inline void Or(LiftoffAssembler* lasm, Register dst, Register lhs, Register rhs) { __ orr(dst, lhs, rhs); } inline void Xor(LiftoffAssembler* lasm, Register dst, Register lhs, Register rhs) { __ eor(dst, lhs, rhs); } inline void Exchange(LiftoffAssembler* lasm, Register dst, Register lhs, Register rhs) { __ mov(dst, rhs); } inline void AtomicBinop32(LiftoffAssembler* lasm, Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type, void (*op)(LiftoffAssembler*, Register, Register, Register)) { LiftoffRegList pinned = LiftoffRegList::ForRegs(dst_addr, offset_reg, value, result); switch (type.value()) { case StoreType::kI64Store8: __ LoadConstant(result.high(), WasmValue(0)); result = result.low(); value = value.low(); V8_FALLTHROUGH; case StoreType::kI32Store8: liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result, pinned, &Assembler::ldrexb, &Assembler::strexb, op); return; case StoreType::kI64Store16: __ LoadConstant(result.high(), WasmValue(0)); result = result.low(); value = value.low(); V8_FALLTHROUGH; case StoreType::kI32Store16: liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result, pinned, &Assembler::ldrexh, &Assembler::strexh, op); return; case StoreType::kI64Store32: __ LoadConstant(result.high(), WasmValue(0)); result = result.low(); value = value.low(); V8_FALLTHROUGH; case StoreType::kI32Store: liftoff::AtomicOp32(lasm, dst_addr, offset_reg, offset_imm, value, result, pinned, &Assembler::ldrex, &Assembler::strex, op); return; default: UNREACHABLE(); } } inline void AtomicOp64(LiftoffAssembler* lasm, Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, base::Optional result, void (*op)(LiftoffAssembler*, LiftoffRegister, LiftoffRegister, LiftoffRegister)) { // strexd loads a 64 bit word into two registers. The first register needs // to have an even index, e.g. r8, the second register needs to be the one // with the next higher index, e.g. r9 if the first register is r8. In the // following code we use the fixed register pair r8/r9 to make the code here // simpler, even though other register pairs would also be possible. constexpr Register dst_low = r8; constexpr Register dst_high = r9; // Make sure {dst_low} and {dst_high} are not occupied by any other value. Register value_low = value.low_gp(); Register value_high = value.high_gp(); LiftoffRegList pinned = LiftoffRegList::ForRegs( dst_addr, offset_reg, value_low, value_high, dst_low, dst_high); __ ClearRegister(dst_low, {&dst_addr, &offset_reg, &value_low, &value_high}, pinned); pinned = pinned | LiftoffRegList::ForRegs(dst_addr, offset_reg, value_low, value_high); __ ClearRegister(dst_high, {&dst_addr, &offset_reg, &value_low, &value_high}, pinned); pinned = pinned | LiftoffRegList::ForRegs(dst_addr, offset_reg, value_low, value_high); // Make sure that {result}, if it exists, also does not overlap with // {dst_low} and {dst_high}. We don't have to transfer the value stored in // {result}. Register result_low = no_reg; Register result_high = no_reg; if (result.has_value()) { result_low = result.value().low_gp(); if (pinned.has(result_low)) { result_low = __ GetUnusedRegister(kGpReg, pinned).gp(); } pinned.set(result_low); result_high = result.value().high_gp(); if (pinned.has(result_high)) { result_high = __ GetUnusedRegister(kGpReg, pinned).gp(); } pinned.set(result_high); } Register store_result = __ GetUnusedRegister(kGpReg, pinned).gp(); UseScratchRegisterScope temps(lasm); Register actual_addr = liftoff::CalculateActualAddress( lasm, &temps, dst_addr, offset_reg, offset_imm); __ dmb(ISH); Label retry; __ bind(&retry); // {ldrexd} is needed here so that the {strexd} instruction below can // succeed. We don't need the value we are reading. We use {dst_low} and // {dst_high} as the destination registers because {ldrexd} has the same // restrictions on registers as {strexd}, see the comment above. __ ldrexd(dst_low, dst_high, actual_addr); if (result.has_value()) { __ mov(result_low, dst_low); __ mov(result_high, dst_high); } op(lasm, LiftoffRegister::ForPair(dst_low, dst_high), LiftoffRegister::ForPair(dst_low, dst_high), LiftoffRegister::ForPair(value_low, value_high)); __ strexd(store_result, dst_low, dst_high, actual_addr); __ cmp(store_result, Operand(0)); __ b(ne, &retry); __ dmb(ISH); if (result.has_value()) { if (result_low != result.value().low_gp()) { __ mov(result.value().low_gp(), result_low); } if (result_high != result.value().high_gp()) { __ mov(result.value().high_gp(), result_high); } } } inline void I64Store(LiftoffAssembler* lasm, LiftoffRegister dst, LiftoffRegister, LiftoffRegister src) { __ mov(dst.low_gp(), src.low_gp()); __ mov(dst.high_gp(), src.high_gp()); } #undef __ } // namespace liftoff void LiftoffAssembler::AtomicLoad(LiftoffRegister dst, Register src_addr, Register offset_reg, uint32_t offset_imm, LoadType type, LiftoffRegList pinned) { if (type.value() != LoadType::kI64Load) { Load(dst, src_addr, offset_reg, offset_imm, type, pinned, nullptr, true); dmb(ISH); return; } // ldrexd loads a 64 bit word into two registers. The first register needs to // have an even index, e.g. r8, the second register needs to be the one with // the next higher index, e.g. r9 if the first register is r8. In the // following code we use the fixed register pair r8/r9 to make the code here // simpler, even though other register pairs would also be possible. constexpr Register dst_low = r8; constexpr Register dst_high = r9; if (cache_state()->is_used(LiftoffRegister(dst_low))) { SpillRegister(LiftoffRegister(dst_low)); } if (cache_state()->is_used(LiftoffRegister(dst_high))) { SpillRegister(LiftoffRegister(dst_high)); } { UseScratchRegisterScope temps(this); Register actual_addr = liftoff::CalculateActualAddress( this, &temps, src_addr, offset_reg, offset_imm); ldrexd(dst_low, dst_high, actual_addr); dmb(ISH); } ParallelRegisterMove( {{dst, LiftoffRegister::ForPair(dst_low, dst_high), kWasmI64}}); } void LiftoffAssembler::AtomicStore(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister src, StoreType type, LiftoffRegList pinned) { if (type.value() == StoreType::kI64Store) { liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, src, {}, liftoff::I64Store); return; } dmb(ISH); Store(dst_addr, offset_reg, offset_imm, src, type, pinned, nullptr, true); dmb(ISH); return; } void LiftoffAssembler::AtomicAdd(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type) { if (type.value() == StoreType::kI64Store) { liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, liftoff::I64Binop<&Assembler::add, &Assembler::adc>); return; } liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, type, &liftoff::Add); } void LiftoffAssembler::AtomicSub(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type) { if (type.value() == StoreType::kI64Store) { liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, liftoff::I64Binop<&Assembler::sub, &Assembler::sbc>); return; } liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, type, &liftoff::Sub); } void LiftoffAssembler::AtomicAnd(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type) { if (type.value() == StoreType::kI64Store) { liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, liftoff::I64Binop<&Assembler::and_, &Assembler::and_>); return; } liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, type, &liftoff::And); } void LiftoffAssembler::AtomicOr(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type) { if (type.value() == StoreType::kI64Store) { liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, liftoff::I64Binop<&Assembler::orr, &Assembler::orr>); return; } liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, type, &liftoff::Or); } void LiftoffAssembler::AtomicXor(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type) { if (type.value() == StoreType::kI64Store) { liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, liftoff::I64Binop<&Assembler::eor, &Assembler::eor>); return; } liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, type, &liftoff::Xor); } void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister value, LiftoffRegister result, StoreType type) { if (type.value() == StoreType::kI64Store) { liftoff::AtomicOp64(this, dst_addr, offset_reg, offset_imm, value, {result}, liftoff::I64Store); return; } liftoff::AtomicBinop32(this, dst_addr, offset_reg, offset_imm, value, result, type, &liftoff::Exchange); } namespace liftoff { #define __ lasm-> inline void AtomicI64CompareExchange(LiftoffAssembler* lasm, Register dst_addr_reg, Register offset_reg, uint32_t offset_imm, LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result) { // To implement I64AtomicCompareExchange, we nearly need all registers, with // some registers having special constraints, e.g. like for {new_value} and // {result} the low-word register has to have an even register code, and the // high-word has to be in the next higher register. To avoid complicated // register allocation code here, we just assign fixed registers to all // values here, and then move all values into the correct register. Register dst_addr = r0; Register offset = r1; Register result_low = r4; Register result_high = r5; Register new_value_low = r2; Register new_value_high = r3; Register store_result = r6; Register expected_low = r8; Register expected_high = r9; // We spill all registers, so that we can re-assign them afterwards. __ SpillRegisters(dst_addr, offset, result_low, result_high, new_value_low, new_value_high, store_result, expected_low, expected_high); __ ParallelRegisterMove( {{LiftoffRegister::ForPair(new_value_low, new_value_high), new_value, kWasmI64}, {LiftoffRegister::ForPair(expected_low, expected_high), expected, kWasmI64}, {dst_addr, dst_addr_reg, kWasmI32}, {offset, offset_reg != no_reg ? offset_reg : offset, kWasmI32}}); { UseScratchRegisterScope temps(lasm); Register temp = liftoff::CalculateActualAddress( lasm, &temps, dst_addr, offset_reg == no_reg ? no_reg : offset, offset_imm, dst_addr); // Make sure the actual address is stored in the right register. DCHECK_EQ(dst_addr, temp); USE(temp); } Label retry; Label done; __ dmb(ISH); __ bind(&retry); __ ldrexd(result_low, result_high, dst_addr); __ cmp(result_low, expected_low); __ b(ne, &done); __ cmp(result_high, expected_high); __ b(ne, &done); __ strexd(store_result, new_value_low, new_value_high, dst_addr); __ cmp(store_result, Operand(0)); __ b(ne, &retry); __ dmb(ISH); __ bind(&done); __ ParallelRegisterMove( {{result, LiftoffRegister::ForPair(result_low, result_high), kWasmI64}}); } #undef __ } // namespace liftoff void LiftoffAssembler::AtomicCompareExchange( Register dst_addr, Register offset_reg, uint32_t offset_imm, LiftoffRegister expected, LiftoffRegister new_value, LiftoffRegister result, StoreType type) { if (type.value() == StoreType::kI64Store) { liftoff::AtomicI64CompareExchange(this, dst_addr, offset_reg, offset_imm, expected, new_value, result); return; } // The other versions of CompareExchange can share code, but need special load // and store instructions. void (Assembler::*load)(Register, Register, Condition) = nullptr; void (Assembler::*store)(Register, Register, Register, Condition) = nullptr; LiftoffRegList pinned = LiftoffRegList::ForRegs(dst_addr, offset_reg); // We need to remember the high word of {result}, so we can set it to zero in // the end if necessary. Register result_high = no_reg; switch (type.value()) { case StoreType::kI64Store8: result_high = result.high_gp(); result = result.low(); new_value = new_value.low(); expected = expected.low(); V8_FALLTHROUGH; case StoreType::kI32Store8: load = &Assembler::ldrexb; store = &Assembler::strexb; // We have to clear the high bits of {expected}, as we can only do a // 32-bit comparison. If the {expected} register is used, we spill it // first. if (cache_state()->is_used(expected)) { SpillRegister(expected); } uxtb(expected.gp(), expected.gp()); break; case StoreType::kI64Store16: result_high = result.high_gp(); result = result.low(); new_value = new_value.low(); expected = expected.low(); V8_FALLTHROUGH; case StoreType::kI32Store16: load = &Assembler::ldrexh; store = &Assembler::strexh; // We have to clear the high bits of {expected}, as we can only do a // 32-bit comparison. If the {expected} register is used, we spill it // first. if (cache_state()->is_used(expected)) { SpillRegister(expected); } uxth(expected.gp(), expected.gp()); break; case StoreType::kI64Store32: result_high = result.high_gp(); result = result.low(); new_value = new_value.low(); expected = expected.low(); V8_FALLTHROUGH; case StoreType::kI32Store: load = &Assembler::ldrex; store = &Assembler::strex; break; default: UNREACHABLE(); } pinned.set(new_value); pinned.set(expected); Register result_reg = result.gp(); if (pinned.has(result)) { result_reg = GetUnusedRegister(kGpReg, pinned).gp(); } pinned.set(LiftoffRegister(result)); Register store_result = GetUnusedRegister(kGpReg, pinned).gp(); UseScratchRegisterScope temps(this); Register actual_addr = liftoff::CalculateActualAddress( this, &temps, dst_addr, offset_reg, offset_imm); Label retry; Label done; dmb(ISH); bind(&retry); (this->*load)(result.gp(), actual_addr, al); cmp(result.gp(), expected.gp()); b(ne, &done); (this->*store)(store_result, new_value.gp(), actual_addr, al); cmp(store_result, Operand(0)); b(ne, &retry); dmb(ISH); bind(&done); if (result.gp() != result_reg) { mov(result.gp(), result_reg); } if (result_high != no_reg) { LoadConstant(LiftoffRegister(result_high), WasmValue(0)); } } void LiftoffAssembler::AtomicFence() { dmb(ISH); } void LiftoffAssembler::LoadCallerFrameSlot(LiftoffRegister dst, uint32_t caller_slot_idx, ValueType type) { MemOperand src(fp, (caller_slot_idx + 1) * kSystemPointerSize); liftoff::Load(this, dst, src, type); } void LiftoffAssembler::StoreCallerFrameSlot(LiftoffRegister src, uint32_t caller_slot_idx, ValueType type) { MemOperand dst(fp, (caller_slot_idx + 1) * kSystemPointerSize); liftoff::Store(this, src, dst, type); } void LiftoffAssembler::LoadReturnStackSlot(LiftoffRegister dst, int offset, ValueType type) { MemOperand src(sp, offset); liftoff::Load(this, dst, src, type); } void LiftoffAssembler::MoveStackValue(uint32_t dst_offset, uint32_t src_offset, ValueType type) { DCHECK_NE(dst_offset, src_offset); LiftoffRegister reg = GetUnusedRegister(reg_class_for(type), {}); Fill(reg, src_offset, type); Spill(dst_offset, reg, type); } void LiftoffAssembler::Move(Register dst, Register src, ValueType type) { DCHECK_NE(dst, src); DCHECK(type == kWasmI32 || type.is_reference_type()); TurboAssembler::Move(dst, src); } void LiftoffAssembler::Move(DoubleRegister dst, DoubleRegister src, ValueType type) { DCHECK_NE(dst, src); if (type == kWasmF32) { vmov(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src)); } else if (type == kWasmF64) { vmov(dst, src); } else { DCHECK_EQ(kWasmS128, type); vmov(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); } } void LiftoffAssembler::Spill(int offset, LiftoffRegister reg, ValueType type) { // The {str} instruction needs a temp register when the immediate in the // provided MemOperand does not fit into 12 bits. This happens for large stack // frames. This DCHECK checks that the temp register is available when needed. DCHECK(UseScratchRegisterScope{this}.CanAcquire()); DCHECK_LT(0, offset); RecordUsedSpillOffset(offset); MemOperand dst(fp, -offset); liftoff::Store(this, reg, dst, type); } void LiftoffAssembler::Spill(int offset, WasmValue value) { RecordUsedSpillOffset(offset); MemOperand dst = liftoff::GetStackSlot(offset); UseScratchRegisterScope temps(this); Register src = no_reg; // The scratch register will be required by str if multiple instructions // are required to encode the offset, and so we cannot use it in that case. if (!ImmediateFitsAddrMode2Instruction(dst.offset())) { src = GetUnusedRegister(kGpReg, {}).gp(); } else { src = temps.Acquire(); } switch (value.type().kind()) { case ValueType::kI32: mov(src, Operand(value.to_i32())); str(src, dst); break; case ValueType::kI64: { int32_t low_word = value.to_i64(); mov(src, Operand(low_word)); str(src, liftoff::GetHalfStackSlot(offset, kLowWord)); int32_t high_word = value.to_i64() >> 32; mov(src, Operand(high_word)); str(src, liftoff::GetHalfStackSlot(offset, kHighWord)); break; } default: // We do not track f32 and f64 constants, hence they are unreachable. UNREACHABLE(); } } void LiftoffAssembler::Fill(LiftoffRegister reg, int offset, ValueType type) { liftoff::Load(this, reg, liftoff::GetStackSlot(offset), type); } void LiftoffAssembler::FillI64Half(Register reg, int offset, RegPairHalf half) { ldr(reg, liftoff::GetHalfStackSlot(offset, half)); } void LiftoffAssembler::FillStackSlotsWithZero(int start, int size) { DCHECK_LT(0, size); DCHECK_EQ(0, size % 4); RecordUsedSpillOffset(start + size); // We need a zero reg. Always use r0 for that, and push it before to restore // its value afterwards. push(r0); mov(r0, Operand(0)); if (size <= 36) { // Special straight-line code for up to 9 words. Generates one // instruction per word. for (int offset = 4; offset <= size; offset += 4) { str(r0, liftoff::GetHalfStackSlot(start + offset, kLowWord)); } } else { // General case for bigger counts (9 instructions). // Use r1 for start address (inclusive), r2 for end address (exclusive). push(r1); push(r2); sub(r1, fp, Operand(start + size)); sub(r2, fp, Operand(start)); Label loop; bind(&loop); str(r0, MemOperand(r1, /* offset */ kSystemPointerSize, PostIndex)); cmp(r1, r2); b(&loop, ne); pop(r2); pop(r1); } pop(r0); } #define I32_BINOP(name, instruction) \ void LiftoffAssembler::emit_##name(Register dst, Register lhs, \ Register rhs) { \ instruction(dst, lhs, rhs); \ } #define I32_BINOP_I(name, instruction) \ I32_BINOP(name, instruction) \ void LiftoffAssembler::emit_##name##i(Register dst, Register lhs, \ int32_t imm) { \ instruction(dst, lhs, Operand(imm)); \ } #define I32_SHIFTOP(name, instruction) \ void LiftoffAssembler::emit_##name(Register dst, Register src, \ Register amount) { \ UseScratchRegisterScope temps(this); \ Register scratch = temps.Acquire(); \ and_(scratch, amount, Operand(0x1f)); \ instruction(dst, src, Operand(scratch)); \ } \ void LiftoffAssembler::emit_##name##i(Register dst, Register src, \ int32_t amount) { \ if (V8_LIKELY((amount & 31) != 0)) { \ instruction(dst, src, Operand(amount & 31)); \ } else if (dst != src) { \ mov(dst, src); \ } \ } #define FP32_UNOP(name, instruction) \ void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \ instruction(liftoff::GetFloatRegister(dst), \ liftoff::GetFloatRegister(src)); \ } #define FP32_BINOP(name, instruction) \ void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \ DoubleRegister rhs) { \ instruction(liftoff::GetFloatRegister(dst), \ liftoff::GetFloatRegister(lhs), \ liftoff::GetFloatRegister(rhs)); \ } #define FP64_UNOP(name, instruction) \ void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister src) { \ instruction(dst, src); \ } #define FP64_BINOP(name, instruction) \ void LiftoffAssembler::emit_##name(DoubleRegister dst, DoubleRegister lhs, \ DoubleRegister rhs) { \ instruction(dst, lhs, rhs); \ } I32_BINOP_I(i32_add, add) I32_BINOP(i32_sub, sub) I32_BINOP(i32_mul, mul) I32_BINOP_I(i32_and, and_) I32_BINOP_I(i32_or, orr) I32_BINOP_I(i32_xor, eor) I32_SHIFTOP(i32_shl, lsl) I32_SHIFTOP(i32_sar, asr) I32_SHIFTOP(i32_shr, lsr) FP32_BINOP(f32_add, vadd) FP32_BINOP(f32_sub, vsub) FP32_BINOP(f32_mul, vmul) FP32_BINOP(f32_div, vdiv) FP32_UNOP(f32_abs, vabs) FP32_UNOP(f32_neg, vneg) FP32_UNOP(f32_sqrt, vsqrt) FP64_BINOP(f64_add, vadd) FP64_BINOP(f64_sub, vsub) FP64_BINOP(f64_mul, vmul) FP64_BINOP(f64_div, vdiv) FP64_UNOP(f64_abs, vabs) FP64_UNOP(f64_neg, vneg) FP64_UNOP(f64_sqrt, vsqrt) #undef I32_BINOP #undef I32_SHIFTOP #undef FP32_UNOP #undef FP32_BINOP #undef FP64_UNOP #undef FP64_BINOP void LiftoffAssembler::emit_i32_clz(Register dst, Register src) { clz(dst, src); } void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) { rbit(dst, src); clz(dst, dst); } namespace liftoff { inline void GeneratePopCnt(Assembler* assm, Register dst, Register src, Register scratch1, Register scratch2) { DCHECK(!AreAliased(dst, scratch1, scratch2)); if (src == scratch1) std::swap(scratch1, scratch2); // x = x - ((x & (0x55555555 << 1)) >> 1) assm->and_(scratch1, src, Operand(0xaaaaaaaa)); assm->sub(dst, src, Operand(scratch1, LSR, 1)); // x = (x & 0x33333333) + ((x & (0x33333333 << 2)) >> 2) assm->mov(scratch1, Operand(0x33333333)); assm->and_(scratch2, dst, Operand(scratch1, LSL, 2)); assm->and_(scratch1, dst, scratch1); assm->add(dst, scratch1, Operand(scratch2, LSR, 2)); // x = (x + (x >> 4)) & 0x0F0F0F0F assm->add(dst, dst, Operand(dst, LSR, 4)); assm->and_(dst, dst, Operand(0x0f0f0f0f)); // x = x + (x >> 8) assm->add(dst, dst, Operand(dst, LSR, 8)); // x = x + (x >> 16) assm->add(dst, dst, Operand(dst, LSR, 16)); // x = x & 0x3F assm->and_(dst, dst, Operand(0x3f)); } } // namespace liftoff bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) { LiftoffRegList pinned = LiftoffRegList::ForRegs(dst); Register scratch1 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp(); Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp(); liftoff::GeneratePopCnt(this, dst, src, scratch1, scratch2); return true; } void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs, Label* trap_div_by_zero, Label* trap_div_unrepresentable) { if (!CpuFeatures::IsSupported(SUDIV)) { bailout(kMissingCPUFeature, "i32_divs"); return; } CpuFeatureScope scope(this, SUDIV); // Issue division early so we can perform the trapping checks whilst it // completes. bool speculative_sdiv = dst != lhs && dst != rhs; if (speculative_sdiv) { sdiv(dst, lhs, rhs); } Label noTrap; // Check for division by zero. cmp(rhs, Operand(0)); b(trap_div_by_zero, eq); // Check for kMinInt / -1. This is unrepresentable. cmp(rhs, Operand(-1)); b(&noTrap, ne); cmp(lhs, Operand(kMinInt)); b(trap_div_unrepresentable, eq); bind(&noTrap); if (!speculative_sdiv) { sdiv(dst, lhs, rhs); } } void LiftoffAssembler::emit_i32_divu(Register dst, Register lhs, Register rhs, Label* trap_div_by_zero) { if (!CpuFeatures::IsSupported(SUDIV)) { bailout(kMissingCPUFeature, "i32_divu"); return; } CpuFeatureScope scope(this, SUDIV); // Check for division by zero. cmp(rhs, Operand(0)); b(trap_div_by_zero, eq); udiv(dst, lhs, rhs); } void LiftoffAssembler::emit_i32_rems(Register dst, Register lhs, Register rhs, Label* trap_div_by_zero) { if (!CpuFeatures::IsSupported(SUDIV)) { // When this case is handled, a check for ARMv7 is required to use mls. // Mls support is implied with SUDIV support. bailout(kMissingCPUFeature, "i32_rems"); return; } CpuFeatureScope scope(this, SUDIV); // No need to check kMinInt / -1 because the result is kMinInt and then // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0. UseScratchRegisterScope temps(this); Register scratch = temps.Acquire(); sdiv(scratch, lhs, rhs); // Check for division by zero. cmp(rhs, Operand(0)); b(trap_div_by_zero, eq); // Compute remainder. mls(dst, scratch, rhs, lhs); } void LiftoffAssembler::emit_i32_remu(Register dst, Register lhs, Register rhs, Label* trap_div_by_zero) { if (!CpuFeatures::IsSupported(SUDIV)) { // When this case is handled, a check for ARMv7 is required to use mls. // Mls support is implied with SUDIV support. bailout(kMissingCPUFeature, "i32_remu"); return; } CpuFeatureScope scope(this, SUDIV); // No need to check kMinInt / -1 because the result is kMinInt and then // kMinInt * -1 -> kMinInt. In this case, the Msub result is therefore 0. UseScratchRegisterScope temps(this); Register scratch = temps.Acquire(); udiv(scratch, lhs, rhs); // Check for division by zero. cmp(rhs, Operand(0)); b(trap_div_by_zero, eq); // Compute remainder. mls(dst, scratch, rhs, lhs); } void LiftoffAssembler::emit_i64_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::I64Binop<&Assembler::add, &Assembler::adc>(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i64_addi(LiftoffRegister dst, LiftoffRegister lhs, int32_t imm) { liftoff::I64BinopI<&Assembler::add, &Assembler::adc>(this, dst, lhs, imm); } void LiftoffAssembler::emit_i64_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::I64Binop<&Assembler::sub, &Assembler::sbc>(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i64_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { // Idea: // [ lhs_hi | lhs_lo ] * [ rhs_hi | rhs_lo ] // = [ lhs_hi * rhs_lo | ] (32 bit mul, shift 32) // + [ lhs_lo * rhs_hi | ] (32 bit mul, shift 32) // + [ lhs_lo * rhs_lo ] (32x32->64 mul, shift 0) UseScratchRegisterScope temps(this); Register scratch = temps.Acquire(); // scratch = lhs_hi * rhs_lo mul(scratch, lhs.high_gp(), rhs.low_gp()); // scratch += lhs_lo * rhs_hi mla(scratch, lhs.low_gp(), rhs.high_gp(), scratch); // TODO(arm): use umlal once implemented correctly in the simulator. // [dst_hi|dst_lo] = lhs_lo * rhs_lo umull(dst.low_gp(), dst.high_gp(), lhs.low_gp(), rhs.low_gp()); // dst_hi += scratch add(dst.high_gp(), dst.high_gp(), scratch); } bool LiftoffAssembler::emit_i64_divs(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, Label* trap_div_by_zero, Label* trap_div_unrepresentable) { return false; } bool LiftoffAssembler::emit_i64_divu(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, Label* trap_div_by_zero) { return false; } bool LiftoffAssembler::emit_i64_rems(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, Label* trap_div_by_zero) { return false; } bool LiftoffAssembler::emit_i64_remu(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, Label* trap_div_by_zero) { return false; } void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src, Register amount) { liftoff::I64Shiftop<&TurboAssembler::LslPair, true>(this, dst, src, amount); } void LiftoffAssembler::emit_i64_shli(LiftoffRegister dst, LiftoffRegister src, int32_t amount) { UseScratchRegisterScope temps(this); // {src.low_gp()} will still be needed after writing {dst.high_gp()}. Register src_low = liftoff::EnsureNoAlias(this, src.low_gp(), dst.high_gp(), &temps); LslPair(dst.low_gp(), dst.high_gp(), src_low, src.high_gp(), amount & 63); } void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src, Register amount) { liftoff::I64Shiftop<&TurboAssembler::AsrPair, false>(this, dst, src, amount); } void LiftoffAssembler::emit_i64_sari(LiftoffRegister dst, LiftoffRegister src, int32_t amount) { UseScratchRegisterScope temps(this); // {src.high_gp()} will still be needed after writing {dst.low_gp()}. Register src_high = liftoff::EnsureNoAlias(this, src.high_gp(), dst.low_gp(), &temps); AsrPair(dst.low_gp(), dst.high_gp(), src.low_gp(), src_high, amount & 63); } void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src, Register amount) { liftoff::I64Shiftop<&TurboAssembler::LsrPair, false>(this, dst, src, amount); } void LiftoffAssembler::emit_i64_shri(LiftoffRegister dst, LiftoffRegister src, int32_t amount) { UseScratchRegisterScope temps(this); // {src.high_gp()} will still be needed after writing {dst.low_gp()}. Register src_high = liftoff::EnsureNoAlias(this, src.high_gp(), dst.low_gp(), &temps); LsrPair(dst.low_gp(), dst.high_gp(), src.low_gp(), src_high, amount & 63); } void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) { // return high == 0 ? 32 + CLZ32(low) : CLZ32(high); Label done; Label high_is_zero; cmp(src.high_gp(), Operand(0)); b(&high_is_zero, eq); clz(dst.low_gp(), src.high_gp()); jmp(&done); bind(&high_is_zero); clz(dst.low_gp(), src.low_gp()); add(dst.low_gp(), dst.low_gp(), Operand(32)); bind(&done); mov(dst.high_gp(), Operand(0)); // High word of result is always 0. } void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) { // return low == 0 ? 32 + CTZ32(high) : CTZ32(low); // CTZ32(x) = CLZ(RBIT(x)) Label done; Label low_is_zero; cmp(src.low_gp(), Operand(0)); b(&low_is_zero, eq); rbit(dst.low_gp(), src.low_gp()); clz(dst.low_gp(), dst.low_gp()); jmp(&done); bind(&low_is_zero); rbit(dst.low_gp(), src.high_gp()); clz(dst.low_gp(), dst.low_gp()); add(dst.low_gp(), dst.low_gp(), Operand(32)); bind(&done); mov(dst.high_gp(), Operand(0)); // High word of result is always 0. } bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst, LiftoffRegister src) { // Produce partial popcnts in the two dst registers, making sure not to // overwrite the second src register before using it. Register src1 = src.high_gp() == dst.low_gp() ? src.high_gp() : src.low_gp(); Register src2 = src.high_gp() == dst.low_gp() ? src.low_gp() : src.high_gp(); LiftoffRegList pinned = LiftoffRegList::ForRegs(dst, src2); Register scratch1 = pinned.set(GetUnusedRegister(kGpReg, pinned)).gp(); Register scratch2 = GetUnusedRegister(kGpReg, pinned).gp(); liftoff::GeneratePopCnt(this, dst.low_gp(), src1, scratch1, scratch2); liftoff::GeneratePopCnt(this, dst.high_gp(), src2, scratch1, scratch2); // Now add the two into the lower dst reg and clear the higher dst reg. add(dst.low_gp(), dst.low_gp(), dst.high_gp()); mov(dst.high_gp(), Operand(0)); return true; } bool LiftoffAssembler::emit_f32_ceil(DoubleRegister dst, DoubleRegister src) { if (CpuFeatures::IsSupported(ARMv8)) { CpuFeatureScope scope(this, ARMv8); vrintp(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src)); return true; } return false; } bool LiftoffAssembler::emit_f32_floor(DoubleRegister dst, DoubleRegister src) { if (CpuFeatures::IsSupported(ARMv8)) { CpuFeatureScope scope(this, ARMv8); vrintm(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src)); return true; } return false; } bool LiftoffAssembler::emit_f32_trunc(DoubleRegister dst, DoubleRegister src) { if (CpuFeatures::IsSupported(ARMv8)) { CpuFeatureScope scope(this, ARMv8); vrintz(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src)); return true; } return false; } bool LiftoffAssembler::emit_f32_nearest_int(DoubleRegister dst, DoubleRegister src) { if (CpuFeatures::IsSupported(ARMv8)) { CpuFeatureScope scope(this, ARMv8); vrintn(liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(src)); return true; } return false; } void LiftoffAssembler::emit_f32_min(DoubleRegister dst, DoubleRegister lhs, DoubleRegister rhs) { liftoff::EmitFloatMinOrMax( this, liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(lhs), liftoff::GetFloatRegister(rhs), liftoff::MinOrMax::kMin); } void LiftoffAssembler::emit_f32_max(DoubleRegister dst, DoubleRegister lhs, DoubleRegister rhs) { liftoff::EmitFloatMinOrMax( this, liftoff::GetFloatRegister(dst), liftoff::GetFloatRegister(lhs), liftoff::GetFloatRegister(rhs), liftoff::MinOrMax::kMax); } bool LiftoffAssembler::emit_f64_ceil(DoubleRegister dst, DoubleRegister src) { if (CpuFeatures::IsSupported(ARMv8)) { CpuFeatureScope scope(this, ARMv8); vrintp(dst, src); return true; } return false; } bool LiftoffAssembler::emit_f64_floor(DoubleRegister dst, DoubleRegister src) { if (CpuFeatures::IsSupported(ARMv8)) { CpuFeatureScope scope(this, ARMv8); vrintm(dst, src); return true; } return false; } bool LiftoffAssembler::emit_f64_trunc(DoubleRegister dst, DoubleRegister src) { if (CpuFeatures::IsSupported(ARMv8)) { CpuFeatureScope scope(this, ARMv8); vrintz(dst, src); return true; } return false; } bool LiftoffAssembler::emit_f64_nearest_int(DoubleRegister dst, DoubleRegister src) { if (CpuFeatures::IsSupported(ARMv8)) { CpuFeatureScope scope(this, ARMv8); vrintn(dst, src); return true; } return false; } void LiftoffAssembler::emit_f64_min(DoubleRegister dst, DoubleRegister lhs, DoubleRegister rhs) { liftoff::EmitFloatMinOrMax(this, dst, lhs, rhs, liftoff::MinOrMax::kMin); } void LiftoffAssembler::emit_f64_max(DoubleRegister dst, DoubleRegister lhs, DoubleRegister rhs) { liftoff::EmitFloatMinOrMax(this, dst, lhs, rhs, liftoff::MinOrMax::kMax); } void LiftoffAssembler::emit_u32_to_intptr(Register dst, Register src) { // This is a nop on arm. } void LiftoffAssembler::emit_f32_copysign(DoubleRegister dst, DoubleRegister lhs, DoubleRegister rhs) { constexpr uint32_t kF32SignBit = uint32_t{1} << 31; UseScratchRegisterScope temps(this); Register scratch = GetUnusedRegister(kGpReg, {}).gp(); Register scratch2 = temps.Acquire(); VmovLow(scratch, lhs); // Clear sign bit in {scratch}. bic(scratch, scratch, Operand(kF32SignBit)); VmovLow(scratch2, rhs); // Isolate sign bit in {scratch2}. and_(scratch2, scratch2, Operand(kF32SignBit)); // Combine {scratch2} into {scratch}. orr(scratch, scratch, scratch2); VmovLow(dst, scratch); } void LiftoffAssembler::emit_f64_copysign(DoubleRegister dst, DoubleRegister lhs, DoubleRegister rhs) { constexpr uint32_t kF64SignBitHighWord = uint32_t{1} << 31; // On arm, we cannot hold the whole f64 value in a gp register, so we just // operate on the upper half (UH). UseScratchRegisterScope temps(this); Register scratch = GetUnusedRegister(kGpReg, {}).gp(); Register scratch2 = temps.Acquire(); VmovHigh(scratch, lhs); // Clear sign bit in {scratch}. bic(scratch, scratch, Operand(kF64SignBitHighWord)); VmovHigh(scratch2, rhs); // Isolate sign bit in {scratch2}. and_(scratch2, scratch2, Operand(kF64SignBitHighWord)); // Combine {scratch2} into {scratch}. orr(scratch, scratch, scratch2); vmov(dst, lhs); VmovHigh(dst, scratch); } bool LiftoffAssembler::emit_type_conversion(WasmOpcode opcode, LiftoffRegister dst, LiftoffRegister src, Label* trap) { switch (opcode) { case kExprI32ConvertI64: TurboAssembler::Move(dst.gp(), src.low_gp()); return true; case kExprI32SConvertF32: { UseScratchRegisterScope temps(this); SwVfpRegister scratch_f = temps.AcquireS(); vcvt_s32_f32( scratch_f, liftoff::GetFloatRegister(src.fp())); // f32 -> i32 round to zero. vmov(dst.gp(), scratch_f); // Check underflow and NaN. vmov(scratch_f, Float32(static_cast(INT32_MIN))); VFPCompareAndSetFlags(liftoff::GetFloatRegister(src.fp()), scratch_f); b(trap, lt); // Check overflow. cmp(dst.gp(), Operand(-1)); b(trap, vs); return true; } case kExprI32UConvertF32: { UseScratchRegisterScope temps(this); SwVfpRegister scratch_f = temps.AcquireS(); vcvt_u32_f32( scratch_f, liftoff::GetFloatRegister(src.fp())); // f32 -> i32 round to zero. vmov(dst.gp(), scratch_f); // Check underflow and NaN. vmov(scratch_f, Float32(-1.0f)); VFPCompareAndSetFlags(liftoff::GetFloatRegister(src.fp()), scratch_f); b(trap, le); // Check overflow. cmp(dst.gp(), Operand(-1)); b(trap, eq); return true; } case kExprI32SConvertF64: { UseScratchRegisterScope temps(this); SwVfpRegister scratch_f = temps.AcquireS(); vcvt_s32_f64(scratch_f, src.fp()); // f64 -> i32 round to zero. vmov(dst.gp(), scratch_f); // Check underflow and NaN. DwVfpRegister scratch_d = temps.AcquireD(); vmov(scratch_d, Double(static_cast(INT32_MIN - 1.0))); VFPCompareAndSetFlags(src.fp(), scratch_d); b(trap, le); // Check overflow. vmov(scratch_d, Double(static_cast(INT32_MAX + 1.0))); VFPCompareAndSetFlags(src.fp(), scratch_d); b(trap, ge); return true; } case kExprI32UConvertF64: { UseScratchRegisterScope temps(this); SwVfpRegister scratch_f = temps.AcquireS(); vcvt_u32_f64(scratch_f, src.fp()); // f64 -> i32 round to zero. vmov(dst.gp(), scratch_f); // Check underflow and NaN. DwVfpRegister scratch_d = temps.AcquireD(); vmov(scratch_d, Double(static_cast(-1.0))); VFPCompareAndSetFlags(src.fp(), scratch_d); b(trap, le); // Check overflow. vmov(scratch_d, Double(static_cast(UINT32_MAX + 1.0))); VFPCompareAndSetFlags(src.fp(), scratch_d); b(trap, ge); return true; } case kExprI32SConvertSatF32: { UseScratchRegisterScope temps(this); SwVfpRegister scratch_f = temps.AcquireS(); vcvt_s32_f32( scratch_f, liftoff::GetFloatRegister(src.fp())); // f32 -> i32 round to zero. vmov(dst.gp(), scratch_f); return true; } case kExprI32UConvertSatF32: { UseScratchRegisterScope temps(this); SwVfpRegister scratch_f = temps.AcquireS(); vcvt_u32_f32( scratch_f, liftoff::GetFloatRegister(src.fp())); // f32 -> u32 round to zero. vmov(dst.gp(), scratch_f); return true; } case kExprI32SConvertSatF64: { UseScratchRegisterScope temps(this); SwVfpRegister scratch_f = temps.AcquireS(); vcvt_s32_f64(scratch_f, src.fp()); // f64 -> i32 round to zero. vmov(dst.gp(), scratch_f); return true; } case kExprI32UConvertSatF64: { UseScratchRegisterScope temps(this); SwVfpRegister scratch_f = temps.AcquireS(); vcvt_u32_f64(scratch_f, src.fp()); // f64 -> u32 round to zero. vmov(dst.gp(), scratch_f); return true; } case kExprI32ReinterpretF32: vmov(dst.gp(), liftoff::GetFloatRegister(src.fp())); return true; case kExprI64SConvertI32: if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp()); mov(dst.high_gp(), Operand(src.gp(), ASR, 31)); return true; case kExprI64UConvertI32: if (dst.low_gp() != src.gp()) mov(dst.low_gp(), src.gp()); mov(dst.high_gp(), Operand(0)); return true; case kExprI64ReinterpretF64: vmov(dst.low_gp(), dst.high_gp(), src.fp()); return true; case kExprF32SConvertI32: { SwVfpRegister dst_float = liftoff::GetFloatRegister(dst.fp()); vmov(dst_float, src.gp()); vcvt_f32_s32(dst_float, dst_float); return true; } case kExprF32UConvertI32: { SwVfpRegister dst_float = liftoff::GetFloatRegister(dst.fp()); vmov(dst_float, src.gp()); vcvt_f32_u32(dst_float, dst_float); return true; } case kExprF32ConvertF64: vcvt_f32_f64(liftoff::GetFloatRegister(dst.fp()), src.fp()); return true; case kExprF32ReinterpretI32: vmov(liftoff::GetFloatRegister(dst.fp()), src.gp()); return true; case kExprF64SConvertI32: { vmov(liftoff::GetFloatRegister(dst.fp()), src.gp()); vcvt_f64_s32(dst.fp(), liftoff::GetFloatRegister(dst.fp())); return true; } case kExprF64UConvertI32: { vmov(liftoff::GetFloatRegister(dst.fp()), src.gp()); vcvt_f64_u32(dst.fp(), liftoff::GetFloatRegister(dst.fp())); return true; } case kExprF64ConvertF32: vcvt_f64_f32(dst.fp(), liftoff::GetFloatRegister(src.fp())); return true; case kExprF64ReinterpretI64: vmov(dst.fp(), src.low_gp(), src.high_gp()); return true; case kExprF64SConvertI64: case kExprF64UConvertI64: case kExprI64SConvertF32: case kExprI64UConvertF32: case kExprI64SConvertSatF32: case kExprI64UConvertSatF32: case kExprF32SConvertI64: case kExprF32UConvertI64: case kExprI64SConvertF64: case kExprI64UConvertF64: case kExprI64SConvertSatF64: case kExprI64UConvertSatF64: // These cases can be handled by the C fallback function. return false; default: UNREACHABLE(); } } void LiftoffAssembler::emit_i32_signextend_i8(Register dst, Register src) { sxtb(dst, src); } void LiftoffAssembler::emit_i32_signextend_i16(Register dst, Register src) { sxth(dst, src); } void LiftoffAssembler::emit_i64_signextend_i8(LiftoffRegister dst, LiftoffRegister src) { emit_i32_signextend_i8(dst.low_gp(), src.low_gp()); mov(dst.high_gp(), Operand(dst.low_gp(), ASR, 31)); } void LiftoffAssembler::emit_i64_signextend_i16(LiftoffRegister dst, LiftoffRegister src) { emit_i32_signextend_i16(dst.low_gp(), src.low_gp()); mov(dst.high_gp(), Operand(dst.low_gp(), ASR, 31)); } void LiftoffAssembler::emit_i64_signextend_i32(LiftoffRegister dst, LiftoffRegister src) { TurboAssembler::Move(dst.low_gp(), src.low_gp()); mov(dst.high_gp(), Operand(src.low_gp(), ASR, 31)); } void LiftoffAssembler::emit_jump(Label* label) { b(label); } void LiftoffAssembler::emit_jump(Register target) { bx(target); } void LiftoffAssembler::emit_cond_jump(Condition cond, Label* label, ValueType type, Register lhs, Register rhs) { DCHECK_EQ(type, kWasmI32); if (rhs == no_reg) { cmp(lhs, Operand(0)); } else { cmp(lhs, rhs); } b(label, cond); } void LiftoffAssembler::emit_i32_eqz(Register dst, Register src) { clz(dst, src); mov(dst, Operand(dst, LSR, kRegSizeInBitsLog2)); } void LiftoffAssembler::emit_i32_set_cond(Condition cond, Register dst, Register lhs, Register rhs) { cmp(lhs, rhs); mov(dst, Operand(0), LeaveCC); mov(dst, Operand(1), LeaveCC, cond); } void LiftoffAssembler::emit_i64_eqz(Register dst, LiftoffRegister src) { orr(dst, src.low_gp(), src.high_gp()); clz(dst, dst); mov(dst, Operand(dst, LSR, 5)); } void LiftoffAssembler::emit_i64_set_cond(Condition cond, Register dst, LiftoffRegister lhs, LiftoffRegister rhs) { // For signed i64 comparisons, we still need to use unsigned comparison for // the low word (the only bit carrying signedness information is the MSB in // the high word). Condition unsigned_cond = liftoff::MakeUnsigned(cond); Label set_cond; Label cont; LiftoffRegister dest = LiftoffRegister(dst); bool speculative_move = !dest.overlaps(lhs) && !dest.overlaps(rhs); if (speculative_move) { mov(dst, Operand(0)); } // Compare high word first. If it differs, use it for the set_cond. If it's // equal, compare the low word and use that for set_cond. cmp(lhs.high_gp(), rhs.high_gp()); if (unsigned_cond == cond) { cmp(lhs.low_gp(), rhs.low_gp(), kEqual); if (!speculative_move) { mov(dst, Operand(0)); } mov(dst, Operand(1), LeaveCC, cond); } else { // If the condition predicate for the low differs from that for the high // word, the conditional move instructions must be separated. b(ne, &set_cond); cmp(lhs.low_gp(), rhs.low_gp()); if (!speculative_move) { mov(dst, Operand(0)); } mov(dst, Operand(1), LeaveCC, unsigned_cond); b(&cont); bind(&set_cond); if (!speculative_move) { mov(dst, Operand(0)); } mov(dst, Operand(1), LeaveCC, cond); bind(&cont); } } void LiftoffAssembler::emit_f32_set_cond(Condition cond, Register dst, DoubleRegister lhs, DoubleRegister rhs) { VFPCompareAndSetFlags(liftoff::GetFloatRegister(lhs), liftoff::GetFloatRegister(rhs)); mov(dst, Operand(0), LeaveCC); mov(dst, Operand(1), LeaveCC, cond); if (cond != ne) { // If V flag set, at least one of the arguments was a Nan -> false. mov(dst, Operand(0), LeaveCC, vs); } } void LiftoffAssembler::emit_f64_set_cond(Condition cond, Register dst, DoubleRegister lhs, DoubleRegister rhs) { VFPCompareAndSetFlags(lhs, rhs); mov(dst, Operand(0), LeaveCC); mov(dst, Operand(1), LeaveCC, cond); if (cond != ne) { // If V flag set, at least one of the arguments was a Nan -> false. mov(dst, Operand(0), LeaveCC, vs); } } bool LiftoffAssembler::emit_select(LiftoffRegister dst, Register condition, LiftoffRegister true_value, LiftoffRegister false_value, ValueType type) { return false; } void LiftoffAssembler::LoadTransform(LiftoffRegister dst, Register src_addr, Register offset_reg, uint32_t offset_imm, LoadType type, LoadTransformationKind transform, uint32_t* protected_load_pc) { UseScratchRegisterScope temps(this); Register actual_src_addr = liftoff::CalculateActualAddress( this, &temps, src_addr, offset_reg, offset_imm); *protected_load_pc = pc_offset(); MachineType memtype = type.mem_type(); if (transform == LoadTransformationKind::kExtend) { if (memtype == MachineType::Int8()) { vld1(Neon8, NeonListOperand(dst.low_fp()), NeonMemOperand(actual_src_addr)); vmovl(NeonS8, liftoff::GetSimd128Register(dst), dst.low_fp()); } else if (memtype == MachineType::Uint8()) { vld1(Neon8, NeonListOperand(dst.low_fp()), NeonMemOperand(actual_src_addr)); vmovl(NeonU8, liftoff::GetSimd128Register(dst), dst.low_fp()); } else if (memtype == MachineType::Int16()) { vld1(Neon16, NeonListOperand(dst.low_fp()), NeonMemOperand(actual_src_addr)); vmovl(NeonS16, liftoff::GetSimd128Register(dst), dst.low_fp()); } else if (memtype == MachineType::Uint16()) { vld1(Neon16, NeonListOperand(dst.low_fp()), NeonMemOperand(actual_src_addr)); vmovl(NeonU16, liftoff::GetSimd128Register(dst), dst.low_fp()); } else if (memtype == MachineType::Int32()) { vld1(Neon32, NeonListOperand(dst.low_fp()), NeonMemOperand(actual_src_addr)); vmovl(NeonS32, liftoff::GetSimd128Register(dst), dst.low_fp()); } else if (memtype == MachineType::Uint32()) { vld1(Neon32, NeonListOperand(dst.low_fp()), NeonMemOperand(actual_src_addr)); vmovl(NeonU32, liftoff::GetSimd128Register(dst), dst.low_fp()); } } else if (transform == LoadTransformationKind::kZeroExtend) { Simd128Register dest = liftoff::GetSimd128Register(dst); if (memtype == MachineType::Int32()) { vmov(dest, 0); vld1s(Neon32, NeonListOperand(dst.low_fp()), 0, NeonMemOperand(actual_src_addr)); } else { DCHECK_EQ(MachineType::Int64(), memtype); vmov(dest.high(), 0); vld1(Neon64, NeonListOperand(dest.low()), NeonMemOperand(actual_src_addr)); } } else { DCHECK_EQ(LoadTransformationKind::kSplat, transform); if (memtype == MachineType::Int8()) { vld1r(Neon8, NeonListOperand(liftoff::GetSimd128Register(dst)), NeonMemOperand(actual_src_addr)); } else if (memtype == MachineType::Int16()) { vld1r(Neon16, NeonListOperand(liftoff::GetSimd128Register(dst)), NeonMemOperand(actual_src_addr)); } else if (memtype == MachineType::Int32()) { vld1r(Neon32, NeonListOperand(liftoff::GetSimd128Register(dst)), NeonMemOperand(actual_src_addr)); } else if (memtype == MachineType::Int64()) { vld1(Neon32, NeonListOperand(dst.low_fp()), NeonMemOperand(actual_src_addr)); TurboAssembler::Move(dst.high_fp(), dst.low_fp()); } } } void LiftoffAssembler::emit_i8x16_swizzle(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { UseScratchRegisterScope temps(this); NeonListOperand table(liftoff::GetSimd128Register(lhs)); if (dst == lhs) { // dst will be overwritten, so keep the table somewhere else. QwNeonRegister tbl = temps.AcquireQ(); TurboAssembler::Move(tbl, liftoff::GetSimd128Register(lhs)); table = NeonListOperand(tbl); } vtbl(dst.low_fp(), table, rhs.low_fp()); vtbl(dst.high_fp(), table, rhs.high_fp()); } void LiftoffAssembler::emit_f64x2_splat(LiftoffRegister dst, LiftoffRegister src) { TurboAssembler::Move(dst.low_fp(), src.fp()); TurboAssembler::Move(dst.high_fp(), src.fp()); } void LiftoffAssembler::emit_f64x2_extract_lane(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx) { ExtractLane(dst.fp(), liftoff::GetSimd128Register(lhs), imm_lane_idx); } void LiftoffAssembler::emit_f64x2_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx) { ReplaceLane(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src1), src2.fp(), imm_lane_idx); } void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst, LiftoffRegister src) { vabs(dst.low_fp(), src.low_fp()); vabs(dst.high_fp(), src.high_fp()); } void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst, LiftoffRegister src) { vneg(dst.low_fp(), src.low_fp()); vneg(dst.high_fp(), src.high_fp()); } void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst, LiftoffRegister src) { vsqrt(dst.low_fp(), src.low_fp()); vsqrt(dst.high_fp(), src.high_fp()); } bool LiftoffAssembler::emit_f64x2_ceil(LiftoffRegister dst, LiftoffRegister src) { if (!CpuFeatures::IsSupported(ARMv8)) { return false; } CpuFeatureScope scope(this, ARMv8); vrintp(dst.low_fp(), src.low_fp()); vrintp(dst.high_fp(), src.high_fp()); return true; } bool LiftoffAssembler::emit_f64x2_floor(LiftoffRegister dst, LiftoffRegister src) { if (!CpuFeatures::IsSupported(ARMv8)) { return false; } CpuFeatureScope scope(this, ARMv8); vrintm(dst.low_fp(), src.low_fp()); vrintm(dst.high_fp(), src.high_fp()); return true; } bool LiftoffAssembler::emit_f64x2_trunc(LiftoffRegister dst, LiftoffRegister src) { if (!CpuFeatures::IsSupported(ARMv8)) { return false; } CpuFeatureScope scope(this, ARMv8); vrintz(dst.low_fp(), src.low_fp()); vrintz(dst.high_fp(), src.high_fp()); return true; } bool LiftoffAssembler::emit_f64x2_nearest_int(LiftoffRegister dst, LiftoffRegister src) { if (!CpuFeatures::IsSupported(ARMv8)) { return false; } CpuFeatureScope scope(this, ARMv8); vrintn(dst.low_fp(), src.low_fp()); vrintn(dst.high_fp(), src.high_fp()); return true; } void LiftoffAssembler::emit_f64x2_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vadd(dst.low_fp(), lhs.low_fp(), rhs.low_fp()); vadd(dst.high_fp(), lhs.high_fp(), rhs.high_fp()); } void LiftoffAssembler::emit_f64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vsub(dst.low_fp(), lhs.low_fp(), rhs.low_fp()); vsub(dst.high_fp(), lhs.high_fp(), rhs.high_fp()); } void LiftoffAssembler::emit_f64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmul(dst.low_fp(), lhs.low_fp(), rhs.low_fp()); vmul(dst.high_fp(), lhs.high_fp(), rhs.high_fp()); } void LiftoffAssembler::emit_f64x2_div(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vdiv(dst.low_fp(), lhs.low_fp(), rhs.low_fp()); vdiv(dst.high_fp(), lhs.high_fp(), rhs.high_fp()); } void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { Simd128Register dest = liftoff::GetSimd128Register(dst); Simd128Register left = liftoff::GetSimd128Register(lhs); Simd128Register right = liftoff::GetSimd128Register(rhs); liftoff::EmitFloatMinOrMax(this, dest.low(), left.low(), right.low(), liftoff::MinOrMax::kMin); liftoff::EmitFloatMinOrMax(this, dest.high(), left.high(), right.high(), liftoff::MinOrMax::kMin); } void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { Simd128Register dest = liftoff::GetSimd128Register(dst); Simd128Register left = liftoff::GetSimd128Register(lhs); Simd128Register right = liftoff::GetSimd128Register(rhs); liftoff::EmitFloatMinOrMax(this, dest.low(), left.low(), right.low(), liftoff::MinOrMax::kMax); liftoff::EmitFloatMinOrMax(this, dest.high(), left.high(), right.high(), liftoff::MinOrMax::kMax); } void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { QwNeonRegister dest = liftoff::GetSimd128Register(dst); QwNeonRegister left = liftoff::GetSimd128Register(lhs); QwNeonRegister right = liftoff::GetSimd128Register(rhs); if (dst != rhs) { vmov(dest, left); } VFPCompareAndSetFlags(right.low(), left.low()); vmov(dest.low(), right.low(), mi); VFPCompareAndSetFlags(right.high(), left.high()); vmov(dest.high(), right.high(), mi); } void LiftoffAssembler::emit_f64x2_pmax(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { QwNeonRegister dest = liftoff::GetSimd128Register(dst); QwNeonRegister left = liftoff::GetSimd128Register(lhs); QwNeonRegister right = liftoff::GetSimd128Register(rhs); if (dst != rhs) { vmov(dest, left); } VFPCompareAndSetFlags(right.low(), left.low()); vmov(dest.low(), right.low(), gt); VFPCompareAndSetFlags(right.high(), left.high()); vmov(dest.high(), right.high(), gt); } void LiftoffAssembler::emit_f32x4_splat(LiftoffRegister dst, LiftoffRegister src) { vdup(Neon32, liftoff::GetSimd128Register(dst), src.fp(), 0); } void LiftoffAssembler::emit_f32x4_extract_lane(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx) { ExtractLane(liftoff::GetFloatRegister(dst.fp()), liftoff::GetSimd128Register(lhs), imm_lane_idx); } void LiftoffAssembler::emit_f32x4_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx) { ReplaceLane(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src1), liftoff::GetFloatRegister(src2.fp()), imm_lane_idx); } void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst, LiftoffRegister src) { vabs(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); } void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst, LiftoffRegister src) { vneg(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); } void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst, LiftoffRegister src) { // The list of d registers available to us is from d0 to d15, which always // maps to 2 s registers. LowDwVfpRegister dst_low = LowDwVfpRegister::from_code(dst.low_fp().code()); LowDwVfpRegister src_low = LowDwVfpRegister::from_code(src.low_fp().code()); LowDwVfpRegister dst_high = LowDwVfpRegister::from_code(dst.high_fp().code()); LowDwVfpRegister src_high = LowDwVfpRegister::from_code(src.high_fp().code()); vsqrt(dst_low.low(), src_low.low()); vsqrt(dst_low.high(), src_low.high()); vsqrt(dst_high.low(), src_high.low()); vsqrt(dst_high.high(), src_high.high()); } bool LiftoffAssembler::emit_f32x4_ceil(LiftoffRegister dst, LiftoffRegister src) { if (!CpuFeatures::IsSupported(ARMv8)) { return false; } CpuFeatureScope scope(this, ARMv8); vrintp(NeonS32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); return true; } bool LiftoffAssembler::emit_f32x4_floor(LiftoffRegister dst, LiftoffRegister src) { if (!CpuFeatures::IsSupported(ARMv8)) { return false; } CpuFeatureScope scope(this, ARMv8); vrintm(NeonS32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); return true; } bool LiftoffAssembler::emit_f32x4_trunc(LiftoffRegister dst, LiftoffRegister src) { if (!CpuFeatures::IsSupported(ARMv8)) { return false; } CpuFeatureScope scope(this, ARMv8); vrintz(NeonS32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); return true; } bool LiftoffAssembler::emit_f32x4_nearest_int(LiftoffRegister dst, LiftoffRegister src) { if (!CpuFeatures::IsSupported(ARMv8)) { return false; } CpuFeatureScope scope(this, ARMv8); vrintn(NeonS32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); return true; } void LiftoffAssembler::emit_f32x4_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vadd(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_f32x4_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vsub(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_f32x4_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmul(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_f32x4_div(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { // The list of d registers available to us is from d0 to d15, which always // maps to 2 s registers. LowDwVfpRegister dst_low = LowDwVfpRegister::from_code(dst.low_fp().code()); LowDwVfpRegister lhs_low = LowDwVfpRegister::from_code(lhs.low_fp().code()); LowDwVfpRegister rhs_low = LowDwVfpRegister::from_code(rhs.low_fp().code()); LowDwVfpRegister dst_high = LowDwVfpRegister::from_code(dst.high_fp().code()); LowDwVfpRegister lhs_high = LowDwVfpRegister::from_code(lhs.high_fp().code()); LowDwVfpRegister rhs_high = LowDwVfpRegister::from_code(rhs.high_fp().code()); vdiv(dst_low.low(), lhs_low.low(), rhs_low.low()); vdiv(dst_low.high(), lhs_low.high(), rhs_low.high()); vdiv(dst_high.low(), lhs_high.low(), rhs_high.low()); vdiv(dst_high.high(), lhs_high.high(), rhs_high.high()); } void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmin(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmax(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { UseScratchRegisterScope temps(this); QwNeonRegister tmp = liftoff::GetSimd128Register(dst); if (dst == lhs || dst == rhs) { tmp = temps.AcquireQ(); } QwNeonRegister left = liftoff::GetSimd128Register(lhs); QwNeonRegister right = liftoff::GetSimd128Register(rhs); vcgt(tmp, left, right); vbsl(tmp, right, left); if (dst == lhs || dst == rhs) { vmov(liftoff::GetSimd128Register(dst), tmp); } } void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { UseScratchRegisterScope temps(this); QwNeonRegister tmp = liftoff::GetSimd128Register(dst); if (dst == lhs || dst == rhs) { tmp = temps.AcquireQ(); } QwNeonRegister left = liftoff::GetSimd128Register(lhs); QwNeonRegister right = liftoff::GetSimd128Register(rhs); vcgt(tmp, right, left); vbsl(tmp, right, left); if (dst == lhs || dst == rhs) { vmov(liftoff::GetSimd128Register(dst), tmp); } } void LiftoffAssembler::emit_i64x2_splat(LiftoffRegister dst, LiftoffRegister src) { Simd128Register dst_simd = liftoff::GetSimd128Register(dst); vdup(Neon32, dst_simd, src.low_gp()); ReplaceLane(dst_simd, dst_simd, src.high_gp(), NeonS32, 1); ReplaceLane(dst_simd, dst_simd, src.high_gp(), NeonS32, 3); } void LiftoffAssembler::emit_i64x2_extract_lane(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx) { ExtractLane(dst.low_gp(), liftoff::GetSimd128Register(lhs), NeonS32, imm_lane_idx * 2); ExtractLane(dst.high_gp(), liftoff::GetSimd128Register(lhs), NeonS32, imm_lane_idx * 2 + 1); } void LiftoffAssembler::emit_i64x2_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx) { Simd128Register dst_simd = liftoff::GetSimd128Register(dst); Simd128Register src1_simd = liftoff::GetSimd128Register(src1); ReplaceLane(dst_simd, src1_simd, src2.low_gp(), NeonS32, imm_lane_idx * 2); ReplaceLane(dst_simd, dst_simd, src2.high_gp(), NeonS32, imm_lane_idx * 2 + 1); } void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst, LiftoffRegister src) { UseScratchRegisterScope temps(this); QwNeonRegister zero = dst == src ? temps.AcquireQ() : liftoff::GetSimd128Register(dst); vmov(zero, uint64_t{0}); vsub(Neon64, liftoff::GetSimd128Register(dst), zero, liftoff::GetSimd128Register(src)); } void LiftoffAssembler::emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShift(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i64x2_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { vshl(NeonS64, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), rhs & 63); } void LiftoffAssembler::emit_i64x2_shr_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShift(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i64x2_shri_s(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { liftoff::EmitSimdShiftImmediate(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i64x2_shr_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShift(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i64x2_shri_u(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { liftoff::EmitSimdShiftImmediate(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i64x2_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vadd(Neon64, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i64x2_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vsub(Neon64, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { UseScratchRegisterScope temps(this); QwNeonRegister dst_neon = liftoff::GetSimd128Register(dst); QwNeonRegister left = liftoff::GetSimd128Register(lhs); QwNeonRegister right = liftoff::GetSimd128Register(rhs); // These temporary registers will be modified. We can directly modify lhs and // rhs if they are not uesd, saving on temporaries. QwNeonRegister tmp1 = left; QwNeonRegister tmp2 = right; LiftoffRegList used_plus_dst = cache_state()->used_registers | LiftoffRegList::ForRegs(dst); if (used_plus_dst.has(lhs) && used_plus_dst.has(rhs)) { tmp1 = temps.AcquireQ(); // We only have 1 scratch Q register, so acquire another ourselves. LiftoffRegList pinned = LiftoffRegList::ForRegs(dst); LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); tmp2 = liftoff::GetSimd128Register(unused_pair); } else if (used_plus_dst.has(lhs)) { tmp1 = temps.AcquireQ(); } else if (used_plus_dst.has(rhs)) { tmp2 = temps.AcquireQ(); } // Algorithm from code-generator-arm.cc, refer to comments there for details. if (tmp1 != left) { vmov(tmp1, left); } if (tmp2 != right) { vmov(tmp2, right); } vtrn(Neon32, tmp1.low(), tmp1.high()); vtrn(Neon32, tmp2.low(), tmp2.high()); vmull(NeonU32, dst_neon, tmp1.low(), tmp2.high()); vmlal(NeonU32, dst_neon, tmp1.high(), tmp2.low()); vshl(NeonU64, dst_neon, dst_neon, 32); vmlal(NeonU32, dst_neon, tmp1.low(), tmp2.low()); } void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst, LiftoffRegister src) { vdup(Neon32, liftoff::GetSimd128Register(dst), src.gp()); } void LiftoffAssembler::emit_i32x4_extract_lane(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx) { ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS32, imm_lane_idx); } void LiftoffAssembler::emit_i32x4_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx) { ReplaceLane(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src1), src2.gp(), NeonS32, imm_lane_idx); } void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst, LiftoffRegister src) { vneg(Neon32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); } void LiftoffAssembler::emit_v32x4_anytrue(LiftoffRegister dst, LiftoffRegister src) { liftoff::EmitAnyTrue(this, dst, src); } void LiftoffAssembler::emit_v32x4_alltrue(LiftoffRegister dst, LiftoffRegister src) { UseScratchRegisterScope temps(this); DwVfpRegister scratch = temps.AcquireD(); vpmin(NeonU32, scratch, src.low_fp(), src.high_fp()); vpmin(NeonU32, scratch, scratch, scratch); ExtractLane(dst.gp(), scratch, NeonS32, 0); cmp(dst.gp(), Operand(0)); mov(dst.gp(), Operand(1), LeaveCC, ne); } void LiftoffAssembler::emit_i32x4_bitmask(LiftoffRegister dst, LiftoffRegister src) { UseScratchRegisterScope temps(this); Simd128Register tmp = liftoff::GetSimd128Register(src); Simd128Register mask = temps.AcquireQ(); if (cache_state()->is_used(src)) { // We only have 1 scratch Q register, so try and reuse src. LiftoffRegList pinned = LiftoffRegList::ForRegs(src); LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); mask = liftoff::GetSimd128Register(unused_pair); } vshr(NeonS32, tmp, liftoff::GetSimd128Register(src), 31); // Set i-th bit of each lane i. When AND with tmp, the lanes that // are signed will have i-th bit set, unsigned will be 0. vmov(mask.low(), Double((uint64_t)0x0000'0002'0000'0001)); vmov(mask.high(), Double((uint64_t)0x0000'0008'0000'0004)); vand(tmp, mask, tmp); vpadd(Neon32, tmp.low(), tmp.low(), tmp.high()); vpadd(Neon32, tmp.low(), tmp.low(), kDoubleRegZero); VmovLow(dst.gp(), tmp.low()); } void LiftoffAssembler::emit_i32x4_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShift(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i32x4_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { vshl(NeonS32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), rhs & 31); } void LiftoffAssembler::emit_i32x4_shr_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShift(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i32x4_shri_s(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { liftoff::EmitSimdShiftImmediate(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i32x4_shr_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShift(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i32x4_shri_u(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { liftoff::EmitSimdShiftImmediate(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i32x4_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vadd(Neon32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i32x4_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vsub(Neon32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i32x4_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmul(Neon32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i32x4_min_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmin(NeonS32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i32x4_min_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmin(NeonU32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i32x4_max_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmax(NeonS32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i32x4_max_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmax(NeonU32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { QwNeonRegister dest = liftoff::GetSimd128Register(dst); QwNeonRegister left = liftoff::GetSimd128Register(lhs); QwNeonRegister right = liftoff::GetSimd128Register(rhs); UseScratchRegisterScope temps(this); Simd128Register scratch = temps.AcquireQ(); vmull(NeonS16, scratch, left.low(), right.low()); vpadd(Neon32, dest.low(), scratch.low(), scratch.high()); vmull(NeonS16, scratch, left.high(), right.high()); vpadd(Neon32, dest.high(), scratch.low(), scratch.high()); } void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst, LiftoffRegister src) { vdup(Neon16, liftoff::GetSimd128Register(dst), src.gp()); } void LiftoffAssembler::emit_i16x8_neg(LiftoffRegister dst, LiftoffRegister src) { vneg(Neon16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); } void LiftoffAssembler::emit_v16x8_anytrue(LiftoffRegister dst, LiftoffRegister src) { liftoff::EmitAnyTrue(this, dst, src); } void LiftoffAssembler::emit_v16x8_alltrue(LiftoffRegister dst, LiftoffRegister src) { UseScratchRegisterScope temps(this); DwVfpRegister scratch = temps.AcquireD(); vpmin(NeonU16, scratch, src.low_fp(), src.high_fp()); vpmin(NeonU16, scratch, scratch, scratch); vpmin(NeonU16, scratch, scratch, scratch); ExtractLane(dst.gp(), scratch, NeonS16, 0); cmp(dst.gp(), Operand(0)); mov(dst.gp(), Operand(1), LeaveCC, ne); } void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst, LiftoffRegister src) { UseScratchRegisterScope temps(this); Simd128Register tmp = liftoff::GetSimd128Register(src); Simd128Register mask = temps.AcquireQ(); if (cache_state()->is_used(src)) { // We only have 1 scratch Q register, so try and reuse src. LiftoffRegList pinned = LiftoffRegList::ForRegs(src); LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); mask = liftoff::GetSimd128Register(unused_pair); } vshr(NeonS16, tmp, liftoff::GetSimd128Register(src), 15); // Set i-th bit of each lane i. When AND with tmp, the lanes that // are signed will have i-th bit set, unsigned will be 0. vmov(mask.low(), Double((uint64_t)0x0008'0004'0002'0001)); vmov(mask.high(), Double((uint64_t)0x0080'0040'0020'0010)); vand(tmp, mask, tmp); vpadd(Neon16, tmp.low(), tmp.low(), tmp.high()); vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); vmov(NeonU16, dst.gp(), tmp.low(), 0); } void LiftoffAssembler::emit_i16x8_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShift(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i16x8_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { vshl(NeonS16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), rhs & 15); } void LiftoffAssembler::emit_i16x8_shr_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShift(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i16x8_shri_s(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { liftoff::EmitSimdShiftImmediate(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i16x8_shr_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShift(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i16x8_shri_u(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { liftoff::EmitSimdShiftImmediate(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i16x8_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vadd(Neon16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_add_sat_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vqadd(NeonS16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vsub(Neon16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_sub_sat_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vqsub(NeonS16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_sub_sat_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vqsub(NeonU16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmul(Neon16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_add_sat_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vqadd(NeonU16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_min_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmin(NeonS16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_min_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmin(NeonU16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_max_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmax(NeonS16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmax(NeonU16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_extract_lane_u(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx) { ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonU16, imm_lane_idx); } void LiftoffAssembler::emit_i16x8_extract_lane_s(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx) { ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS16, imm_lane_idx); } void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx) { ReplaceLane(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src1), src2.gp(), NeonS16, imm_lane_idx); } void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs, const uint8_t shuffle[16], bool is_swizzle) { Simd128Register dest = liftoff::GetSimd128Register(dst); Simd128Register src1 = liftoff::GetSimd128Register(lhs); Simd128Register src2 = liftoff::GetSimd128Register(rhs); UseScratchRegisterScope temps(this); Simd128Register scratch = temps.AcquireQ(); if ((src1 != src2) && src1.code() + 1 != src2.code()) { // vtbl requires the operands to be consecutive or the same. // If they are the same, we build a smaller list operand (table_size = 2). // If they are not the same, and not consecutive, we move the src1 and src2 // to q14 and q15, which will be unused since they are not allocatable in // Liftoff. If the operands are the same, then we build a smaller list // operand below. static_assert(!(kLiftoffAssemblerFpCacheRegs & (d28.bit() | d29.bit() | d30.bit() | d31.bit())), "This only works if q14-q15 (d28-d31) are not used."); vmov(q14, src1); src1 = q14; vmov(q15, src2); src2 = q15; } int table_size = src1 == src2 ? 2 : 4; int scratch_s_base = scratch.code() * 4; for (int j = 0; j < 4; j++) { uint32_t imm = 0; for (int i = 3; i >= 0; i--) { imm = (imm << 8) | shuffle[j * 4 + i]; } DCHECK_EQ(0, imm & (table_size == 2 ? 0xF0F0F0F0 : 0xE0E0E0E0)); // Ensure indices are in [0,15] if table_size is 2, or [0,31] if 4. vmov(SwVfpRegister::from_code(scratch_s_base + j), Float32::FromBits(imm)); } DwVfpRegister table_base = src1.low(); NeonListOperand table(table_base, table_size); if (dest != src1 && dest != src2) { vtbl(dest.low(), table, scratch.low()); vtbl(dest.high(), table, scratch.high()); } else { vtbl(scratch.low(), table, scratch.low()); vtbl(scratch.high(), table, scratch.high()); vmov(dest, scratch); } } void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst, LiftoffRegister src) { vdup(Neon8, liftoff::GetSimd128Register(dst), src.gp()); } void LiftoffAssembler::emit_i8x16_extract_lane_u(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx) { ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonU8, imm_lane_idx); } void LiftoffAssembler::emit_i8x16_extract_lane_s(LiftoffRegister dst, LiftoffRegister lhs, uint8_t imm_lane_idx) { ExtractLane(dst.gp(), liftoff::GetSimd128Register(lhs), NeonS8, imm_lane_idx); } void LiftoffAssembler::emit_i8x16_replace_lane(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, uint8_t imm_lane_idx) { ReplaceLane(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src1), src2.gp(), NeonS8, imm_lane_idx); } void LiftoffAssembler::emit_i8x16_neg(LiftoffRegister dst, LiftoffRegister src) { vneg(Neon8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); } void LiftoffAssembler::emit_v8x16_anytrue(LiftoffRegister dst, LiftoffRegister src) { liftoff::EmitAnyTrue(this, dst, src); } void LiftoffAssembler::emit_v8x16_alltrue(LiftoffRegister dst, LiftoffRegister src) { UseScratchRegisterScope temps(this); DwVfpRegister scratch = temps.AcquireD(); vpmin(NeonU8, scratch, src.low_fp(), src.high_fp()); vpmin(NeonU8, scratch, scratch, scratch); vpmin(NeonU8, scratch, scratch, scratch); vpmin(NeonU8, scratch, scratch, scratch); ExtractLane(dst.gp(), scratch, NeonS8, 0); cmp(dst.gp(), Operand(0)); mov(dst.gp(), Operand(1), LeaveCC, ne); } void LiftoffAssembler::emit_i8x16_bitmask(LiftoffRegister dst, LiftoffRegister src) { UseScratchRegisterScope temps(this); Simd128Register tmp = liftoff::GetSimd128Register(src); Simd128Register mask = temps.AcquireQ(); if (cache_state()->is_used(src)) { // We only have 1 scratch Q register, so try and reuse src. LiftoffRegList pinned = LiftoffRegList::ForRegs(src); LiftoffRegister unused_pair = GetUnusedRegister(kFpRegPair, pinned); mask = liftoff::GetSimd128Register(unused_pair); } vshr(NeonS8, tmp, liftoff::GetSimd128Register(src), 7); // Set i-th bit of each lane i. When AND with tmp, the lanes that // are signed will have i-th bit set, unsigned will be 0. vmov(mask.low(), Double((uint64_t)0x8040'2010'0804'0201)); vmov(mask.high(), Double((uint64_t)0x8040'2010'0804'0201)); vand(tmp, mask, tmp); vext(mask, tmp, tmp, 8); vzip(Neon8, mask, tmp); vpadd(Neon16, tmp.low(), tmp.low(), tmp.high()); vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); vpadd(Neon16, tmp.low(), tmp.low(), tmp.low()); vmov(NeonU16, dst.gp(), tmp.low(), 0); } void LiftoffAssembler::emit_i8x16_shl(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShift(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i8x16_shli(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { vshl(NeonS8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), rhs & 7); } void LiftoffAssembler::emit_i8x16_shr_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShift(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i8x16_shri_s(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { liftoff::EmitSimdShiftImmediate(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i8x16_shr_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::EmitSimdShift(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i8x16_shri_u(LiftoffRegister dst, LiftoffRegister lhs, int32_t rhs) { liftoff::EmitSimdShiftImmediate(this, dst, lhs, rhs); } void LiftoffAssembler::emit_i8x16_add(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vadd(Neon8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_add_sat_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vqadd(NeonS8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_sub(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vsub(Neon8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_sub_sat_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vqsub(NeonS8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_sub_sat_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vqsub(NeonU8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_mul(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmul(Neon8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_add_sat_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vqadd(NeonU8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_min_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmin(NeonS8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_min_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmin(NeonU8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_max_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmax(NeonS8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_max_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vmax(NeonU8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_eq(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vceq(Neon8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_ne(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vceq(Neon8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst)); } void LiftoffAssembler::emit_i8x16_gt_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vcgt(NeonS8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_gt_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vcgt(NeonU8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_ge_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vcge(NeonS8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_ge_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vcge(NeonU8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_eq(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vceq(Neon16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_ne(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vceq(Neon16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst)); } void LiftoffAssembler::emit_i16x8_gt_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vcgt(NeonS16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_gt_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vcgt(NeonU16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_ge_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vcge(NeonS16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_ge_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vcge(NeonU16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i32x4_eq(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vceq(Neon32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i32x4_ne(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vceq(Neon32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst)); } void LiftoffAssembler::emit_i32x4_gt_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vcgt(NeonS32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i32x4_gt_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vcgt(NeonU32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i32x4_ge_s(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vcge(NeonS32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i32x4_ge_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vcge(NeonU32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vceq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vceq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(dst)); } void LiftoffAssembler::emit_f32x4_lt(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vcgt(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(rhs), liftoff::GetSimd128Register(lhs)); } void LiftoffAssembler::emit_f32x4_le(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vcge(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(rhs), liftoff::GetSimd128Register(lhs)); } void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::F64x2Compare(this, dst, lhs, rhs, eq); } void LiftoffAssembler::emit_f64x2_ne(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::F64x2Compare(this, dst, lhs, rhs, ne); } void LiftoffAssembler::emit_f64x2_lt(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::F64x2Compare(this, dst, lhs, rhs, lt); } void LiftoffAssembler::emit_f64x2_le(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::F64x2Compare(this, dst, lhs, rhs, le); } void LiftoffAssembler::emit_s128_const(LiftoffRegister dst, const uint8_t imms[16]) { uint64_t vals[2]; memcpy(vals, imms, sizeof(vals)); vmov(dst.low_fp(), Double(vals[0])); vmov(dst.high_fp(), Double(vals[1])); } void LiftoffAssembler::emit_s128_not(LiftoffRegister dst, LiftoffRegister src) { vmvn(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); } void LiftoffAssembler::emit_s128_and(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vand(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_s128_or(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vorr(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_s128_xor(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { veor(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_s128_select(LiftoffRegister dst, LiftoffRegister src1, LiftoffRegister src2, LiftoffRegister mask) { if (dst != mask) { vmov(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(mask)); } vbsl(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src1), liftoff::GetSimd128Register(src2)); } void LiftoffAssembler::emit_i32x4_sconvert_f32x4(LiftoffRegister dst, LiftoffRegister src) { vcvt_s32_f32(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); } void LiftoffAssembler::emit_i32x4_uconvert_f32x4(LiftoffRegister dst, LiftoffRegister src) { vcvt_u32_f32(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); } void LiftoffAssembler::emit_f32x4_sconvert_i32x4(LiftoffRegister dst, LiftoffRegister src) { vcvt_f32_s32(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); } void LiftoffAssembler::emit_f32x4_uconvert_i32x4(LiftoffRegister dst, LiftoffRegister src) { vcvt_f32_u32(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); } void LiftoffAssembler::emit_i8x16_sconvert_i16x8(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::S128NarrowOp(this, NeonS8, NeonS8, dst, lhs, rhs); } void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::S128NarrowOp(this, NeonU8, NeonS8, dst, lhs, rhs); } void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::S128NarrowOp(this, NeonS16, NeonS16, dst, lhs, rhs); } void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { liftoff::S128NarrowOp(this, NeonU16, NeonS16, dst, lhs, rhs); } void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst, LiftoffRegister src) { vmovl(NeonS8, liftoff::GetSimd128Register(dst), src.low_fp()); } void LiftoffAssembler::emit_i16x8_sconvert_i8x16_high(LiftoffRegister dst, LiftoffRegister src) { vmovl(NeonS8, liftoff::GetSimd128Register(dst), src.high_fp()); } void LiftoffAssembler::emit_i16x8_uconvert_i8x16_low(LiftoffRegister dst, LiftoffRegister src) { vmovl(NeonU8, liftoff::GetSimd128Register(dst), src.low_fp()); } void LiftoffAssembler::emit_i16x8_uconvert_i8x16_high(LiftoffRegister dst, LiftoffRegister src) { vmovl(NeonU8, liftoff::GetSimd128Register(dst), src.high_fp()); } void LiftoffAssembler::emit_i32x4_sconvert_i16x8_low(LiftoffRegister dst, LiftoffRegister src) { vmovl(NeonS16, liftoff::GetSimd128Register(dst), src.low_fp()); } void LiftoffAssembler::emit_i32x4_sconvert_i16x8_high(LiftoffRegister dst, LiftoffRegister src) { vmovl(NeonS16, liftoff::GetSimd128Register(dst), src.high_fp()); } void LiftoffAssembler::emit_i32x4_uconvert_i16x8_low(LiftoffRegister dst, LiftoffRegister src) { vmovl(NeonU16, liftoff::GetSimd128Register(dst), src.low_fp()); } void LiftoffAssembler::emit_i32x4_uconvert_i16x8_high(LiftoffRegister dst, LiftoffRegister src) { vmovl(NeonU16, liftoff::GetSimd128Register(dst), src.high_fp()); } void LiftoffAssembler::emit_s128_and_not(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vbic(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_rounding_average_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vrhadd(NeonU8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i16x8_rounding_average_u(LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { vrhadd(NeonU16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), liftoff::GetSimd128Register(rhs)); } void LiftoffAssembler::emit_i8x16_abs(LiftoffRegister dst, LiftoffRegister src) { vabs(Neon8, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); } void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst, LiftoffRegister src) { vabs(Neon16, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); } void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst, LiftoffRegister src) { vabs(Neon32, liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(src)); } void LiftoffAssembler::StackCheck(Label* ool_code, Register limit_address) { ldr(limit_address, MemOperand(limit_address)); cmp(sp, limit_address); b(ool_code, ls); } void LiftoffAssembler::CallTrapCallbackForTesting() { PrepareCallCFunction(0, 0); CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(), 0); } void LiftoffAssembler::AssertUnreachable(AbortReason reason) { // Asserts unreachable within the wasm code. TurboAssembler::AssertUnreachable(reason); } void LiftoffAssembler::PushRegisters(LiftoffRegList regs) { RegList core_regs = regs.GetGpList(); if (core_regs != 0) { stm(db_w, sp, core_regs); } LiftoffRegList fp_regs = regs & kFpCacheRegList; while (!fp_regs.is_empty()) { LiftoffRegister reg = fp_regs.GetFirstRegSet(); DoubleRegister first = reg.fp(); DoubleRegister last = first; fp_regs.clear(reg); while (!fp_regs.is_empty()) { LiftoffRegister reg = fp_regs.GetFirstRegSet(); int code = reg.fp().code(); // vstm can not push more than 16 registers. We have to make sure the // condition is met. if ((code != last.code() + 1) || ((code - first.code() + 1) > 16)) break; last = reg.fp(); fp_regs.clear(reg); } vstm(db_w, sp, first, last); } } void LiftoffAssembler::PopRegisters(LiftoffRegList regs) { LiftoffRegList fp_regs = regs & kFpCacheRegList; while (!fp_regs.is_empty()) { LiftoffRegister reg = fp_regs.GetLastRegSet(); DoubleRegister last = reg.fp(); DoubleRegister first = last; fp_regs.clear(reg); while (!fp_regs.is_empty()) { LiftoffRegister reg = fp_regs.GetLastRegSet(); int code = reg.fp().code(); if ((code != first.code() - 1) || ((last.code() - code + 1) > 16)) break; first = reg.fp(); fp_regs.clear(reg); } vldm(ia_w, sp, first, last); } RegList core_regs = regs.GetGpList(); if (core_regs != 0) { ldm(ia_w, sp, core_regs); } } void LiftoffAssembler::DropStackSlotsAndRet(uint32_t num_stack_slots) { Drop(num_stack_slots); Ret(); } void LiftoffAssembler::CallC(const wasm::FunctionSig* sig, const LiftoffRegister* args, const LiftoffRegister* rets, ValueType out_argument_type, int stack_bytes, ExternalReference ext_ref) { // Arguments are passed by pushing them all to the stack and then passing // a pointer to them. DCHECK(IsAligned(stack_bytes, kSystemPointerSize)); // Reserve space in the stack. AllocateStackSpace(stack_bytes); int arg_bytes = 0; for (ValueType param_type : sig->parameters()) { switch (param_type.kind()) { case ValueType::kI32: str(args->gp(), MemOperand(sp, arg_bytes)); break; case ValueType::kI64: str(args->low_gp(), MemOperand(sp, arg_bytes)); str(args->high_gp(), MemOperand(sp, arg_bytes + kSystemPointerSize)); break; case ValueType::kF32: vstr(liftoff::GetFloatRegister(args->fp()), MemOperand(sp, arg_bytes)); break; case ValueType::kF64: vstr(args->fp(), MemOperand(sp, arg_bytes)); break; case ValueType::kS128: vstr(args->low_fp(), MemOperand(sp, arg_bytes)); vstr(args->high_fp(), MemOperand(sp, arg_bytes + 2 * kSystemPointerSize)); break; default: UNREACHABLE(); } args++; arg_bytes += param_type.element_size_bytes(); } DCHECK_LE(arg_bytes, stack_bytes); // Pass a pointer to the buffer with the arguments to the C function. mov(r0, sp); // Now call the C function. constexpr int kNumCCallArgs = 1; PrepareCallCFunction(kNumCCallArgs); CallCFunction(ext_ref, kNumCCallArgs); // Move return value to the right register. const LiftoffRegister* result_reg = rets; if (sig->return_count() > 0) { DCHECK_EQ(1, sig->return_count()); constexpr Register kReturnReg = r0; if (kReturnReg != rets->gp()) { Move(*rets, LiftoffRegister(kReturnReg), sig->GetReturn(0)); } result_reg++; } // Load potential output value from the buffer on the stack. if (out_argument_type != kWasmStmt) { switch (out_argument_type.kind()) { case ValueType::kI32: ldr(result_reg->gp(), MemOperand(sp)); break; case ValueType::kI64: ldr(result_reg->low_gp(), MemOperand(sp)); ldr(result_reg->high_gp(), MemOperand(sp, kSystemPointerSize)); break; case ValueType::kF32: vldr(liftoff::GetFloatRegister(result_reg->fp()), MemOperand(sp)); break; case ValueType::kF64: vldr(result_reg->fp(), MemOperand(sp)); break; case ValueType::kS128: vld1(Neon8, NeonListOperand(result_reg->low_fp(), 2), NeonMemOperand(sp)); break; default: UNREACHABLE(); } } add(sp, sp, Operand(stack_bytes)); } void LiftoffAssembler::CallNativeWasmCode(Address addr) { Call(addr, RelocInfo::WASM_CALL); } void LiftoffAssembler::TailCallNativeWasmCode(Address addr) { Jump(addr, RelocInfo::WASM_CALL); } void LiftoffAssembler::CallIndirect(const wasm::FunctionSig* sig, compiler::CallDescriptor* call_descriptor, Register target) { DCHECK(target != no_reg); Call(target); } void LiftoffAssembler::TailCallIndirect(Register target) { DCHECK(target != no_reg); Jump(target); } void LiftoffAssembler::CallRuntimeStub(WasmCode::RuntimeStubId sid) { // A direct call to a wasm runtime stub defined in this module. // Just encode the stub index. This will be patched at relocation. Call(static_cast
(sid), RelocInfo::WASM_STUB_CALL); } void LiftoffAssembler::AllocateStackSlot(Register addr, uint32_t size) { AllocateStackSpace(size); mov(addr, sp); } void LiftoffAssembler::DeallocateStackSlot(uint32_t size) { add(sp, sp, Operand(size)); } void LiftoffStackSlots::Construct() { for (auto& slot : slots_) { const LiftoffAssembler::VarState& src = slot.src_; switch (src.loc()) { case LiftoffAssembler::VarState::kStack: { switch (src.type().kind()) { // i32 and i64 can be treated as similar cases, i64 being previously // split into two i32 registers case ValueType::kI32: case ValueType::kI64: case ValueType::kF32: { UseScratchRegisterScope temps(asm_); Register scratch = temps.Acquire(); asm_->ldr(scratch, liftoff::GetHalfStackSlot(slot.src_offset_, slot.half_)); asm_->Push(scratch); } break; case ValueType::kF64: { UseScratchRegisterScope temps(asm_); DwVfpRegister scratch = temps.AcquireD(); asm_->vldr(scratch, liftoff::GetStackSlot(slot.src_offset_)); asm_->vpush(scratch); } break; case ValueType::kS128: { MemOperand mem_op = liftoff::GetStackSlot(slot.src_offset_); UseScratchRegisterScope temps(asm_); Register addr = liftoff::CalculateActualAddress( asm_, &temps, mem_op.rn(), no_reg, mem_op.offset()); QwNeonRegister scratch = temps.AcquireQ(); asm_->vld1(Neon8, NeonListOperand(scratch), NeonMemOperand(addr)); asm_->vpush(scratch); break; } default: UNREACHABLE(); } break; } case LiftoffAssembler::VarState::kRegister: switch (src.type().kind()) { case ValueType::kI64: { LiftoffRegister reg = slot.half_ == kLowWord ? src.reg().low() : src.reg().high(); asm_->push(reg.gp()); } break; case ValueType::kI32: asm_->push(src.reg().gp()); break; case ValueType::kF32: asm_->vpush(liftoff::GetFloatRegister(src.reg().fp())); break; case ValueType::kF64: asm_->vpush(src.reg().fp()); break; case ValueType::kS128: asm_->vpush(liftoff::GetSimd128Register(src.reg())); break; default: UNREACHABLE(); } break; case LiftoffAssembler::VarState::kIntConst: { DCHECK(src.type() == kWasmI32 || src.type() == kWasmI64); UseScratchRegisterScope temps(asm_); Register scratch = temps.Acquire(); // The high word is the sign extension of the low word. asm_->mov(scratch, Operand(slot.half_ == kLowWord ? src.i32_const() : src.i32_const() >> 31)); asm_->push(scratch); break; } } } } } // namespace wasm } // namespace internal } // namespace v8 #endif // V8_WASM_BASELINE_ARM_LIFTOFF_ASSEMBLER_ARM_H_