// Copyright 2013 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "src/compiler/code-generator.h" #include "src/assembler-inl.h" #include "src/callable.h" #include "src/compiler/code-generator-impl.h" #include "src/compiler/gap-resolver.h" #include "src/compiler/node-matchers.h" #include "src/compiler/osr.h" #include "src/frame-constants.h" #include "src/frames.h" #include "src/heap/heap-inl.h" #include "src/ia32/assembler-ia32.h" #include "src/ia32/macro-assembler-ia32.h" #include "src/optimized-compilation-info.h" #include "src/wasm/wasm-code-manager.h" #include "src/wasm/wasm-objects.h" namespace v8 { namespace internal { namespace compiler { #define __ tasm()-> #define kScratchDoubleReg xmm0 // Adds IA-32 specific methods for decoding operands. class IA32OperandConverter : public InstructionOperandConverter { public: IA32OperandConverter(CodeGenerator* gen, Instruction* instr) : InstructionOperandConverter(gen, instr) {} Operand InputOperand(size_t index, int extra = 0) { return ToOperand(instr_->InputAt(index), extra); } Immediate InputImmediate(size_t index) { return ToImmediate(instr_->InputAt(index)); } Operand OutputOperand() { return ToOperand(instr_->Output()); } Operand ToOperand(InstructionOperand* op, int extra = 0) { if (op->IsRegister()) { DCHECK_EQ(0, extra); return Operand(ToRegister(op)); } else if (op->IsFPRegister()) { DCHECK_EQ(0, extra); return Operand(ToDoubleRegister(op)); } DCHECK(op->IsStackSlot() || op->IsFPStackSlot()); return SlotToOperand(AllocatedOperand::cast(op)->index(), extra); } Operand SlotToOperand(int slot, int extra = 0) { FrameOffset offset = frame_access_state()->GetFrameOffset(slot); return Operand(offset.from_stack_pointer() ? esp : ebp, offset.offset() + extra); } Immediate ToImmediate(InstructionOperand* operand) { Constant constant = ToConstant(operand); if (constant.type() == Constant::kInt32 && RelocInfo::IsWasmReference(constant.rmode())) { return Immediate(static_cast
(constant.ToInt32()), constant.rmode()); } switch (constant.type()) { case Constant::kInt32: return Immediate(constant.ToInt32()); case Constant::kFloat32: return Immediate::EmbeddedNumber(constant.ToFloat32()); case Constant::kFloat64: return Immediate::EmbeddedNumber(constant.ToFloat64().value()); case Constant::kExternalReference: return Immediate(constant.ToExternalReference()); case Constant::kHeapObject: return Immediate(constant.ToHeapObject()); case Constant::kInt64: break; case Constant::kRpoNumber: return Immediate::CodeRelativeOffset(ToLabel(operand)); } UNREACHABLE(); } static size_t NextOffset(size_t* offset) { size_t i = *offset; (*offset)++; return i; } static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) { STATIC_ASSERT(0 == static_cast(times_1)); STATIC_ASSERT(1 == static_cast(times_2)); STATIC_ASSERT(2 == static_cast(times_4)); STATIC_ASSERT(3 == static_cast(times_8)); int scale = static_cast(mode - one); DCHECK(scale >= 0 && scale < 4); return static_cast(scale); } Operand MemoryOperand(size_t* offset) { AddressingMode mode = AddressingModeField::decode(instr_->opcode()); switch (mode) { case kMode_MR: { Register base = InputRegister(NextOffset(offset)); int32_t disp = 0; return Operand(base, disp); } case kMode_MRI: { Register base = InputRegister(NextOffset(offset)); Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset))); return Operand(base, ctant.ToInt32(), ctant.rmode()); } case kMode_MR1: case kMode_MR2: case kMode_MR4: case kMode_MR8: { Register base = InputRegister(NextOffset(offset)); Register index = InputRegister(NextOffset(offset)); ScaleFactor scale = ScaleFor(kMode_MR1, mode); int32_t disp = 0; return Operand(base, index, scale, disp); } case kMode_MR1I: case kMode_MR2I: case kMode_MR4I: case kMode_MR8I: { Register base = InputRegister(NextOffset(offset)); Register index = InputRegister(NextOffset(offset)); ScaleFactor scale = ScaleFor(kMode_MR1I, mode); Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset))); return Operand(base, index, scale, ctant.ToInt32(), ctant.rmode()); } case kMode_M1: case kMode_M2: case kMode_M4: case kMode_M8: { Register index = InputRegister(NextOffset(offset)); ScaleFactor scale = ScaleFor(kMode_M1, mode); int32_t disp = 0; return Operand(index, scale, disp); } case kMode_M1I: case kMode_M2I: case kMode_M4I: case kMode_M8I: { Register index = InputRegister(NextOffset(offset)); ScaleFactor scale = ScaleFor(kMode_M1I, mode); Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset))); return Operand(index, scale, ctant.ToInt32(), ctant.rmode()); } case kMode_MI: { Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset))); return Operand(ctant.ToInt32(), ctant.rmode()); } case kMode_None: UNREACHABLE(); } UNREACHABLE(); } Operand MemoryOperand(size_t first_input = 0) { return MemoryOperand(&first_input); } Operand NextMemoryOperand(size_t offset = 0) { AddressingMode mode = AddressingModeField::decode(instr_->opcode()); Register base = InputRegister(NextOffset(&offset)); const int32_t disp = 4; if (mode == kMode_MR1) { Register index = InputRegister(NextOffset(&offset)); ScaleFactor scale = ScaleFor(kMode_MR1, kMode_MR1); return Operand(base, index, scale, disp); } else if (mode == kMode_MRI) { Constant ctant = ToConstant(instr_->InputAt(NextOffset(&offset))); return Operand(base, ctant.ToInt32() + disp, ctant.rmode()); } else { UNREACHABLE(); } } }; namespace { bool HasImmediateInput(Instruction* instr, size_t index) { return instr->InputAt(index)->IsImmediate(); } class OutOfLineLoadFloat32NaN final : public OutOfLineCode { public: OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result) : OutOfLineCode(gen), result_(result) {} void Generate() final { __ xorps(result_, result_); __ divss(result_, result_); } private: XMMRegister const result_; }; class OutOfLineLoadFloat64NaN final : public OutOfLineCode { public: OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result) : OutOfLineCode(gen), result_(result) {} void Generate() final { __ xorpd(result_, result_); __ divsd(result_, result_); } private: XMMRegister const result_; }; class OutOfLineTruncateDoubleToI final : public OutOfLineCode { public: OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result, XMMRegister input, StubCallMode stub_mode) : OutOfLineCode(gen), result_(result), input_(input), stub_mode_(stub_mode), isolate_(gen->isolate()), zone_(gen->zone()) {} void Generate() final { __ sub(esp, Immediate(kDoubleSize)); __ movsd(MemOperand(esp, 0), input_); if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) { // A direct call to a wasm runtime stub defined in this module. // Just encode the stub index. This will be patched at relocation. __ wasm_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL); } else { __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET); } __ mov(result_, MemOperand(esp, 0)); __ add(esp, Immediate(kDoubleSize)); } private: Register const result_; XMMRegister const input_; StubCallMode stub_mode_; Isolate* isolate_; Zone* zone_; }; class OutOfLineRecordWrite final : public OutOfLineCode { public: OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand, Register value, Register scratch0, Register scratch1, RecordWriteMode mode) : OutOfLineCode(gen), object_(object), operand_(operand), value_(value), scratch0_(scratch0), scratch1_(scratch1), mode_(mode), zone_(gen->zone()) {} void SaveRegisters(RegList registers) { DCHECK_LT(0, NumRegs(registers)); for (int i = 0; i < Register::kNumRegisters; ++i) { if ((registers >> i) & 1u) { __ push(Register::from_code(i)); } } } void RestoreRegisters(RegList registers) { DCHECK_LT(0, NumRegs(registers)); for (int i = Register::kNumRegisters - 1; i >= 0; --i) { if ((registers >> i) & 1u) { __ pop(Register::from_code(i)); } } } void Generate() final { if (mode_ > RecordWriteMode::kValueIsPointer) { __ JumpIfSmi(value_, exit()); } __ CheckPageFlag(value_, scratch0_, MemoryChunk::kPointersToHereAreInterestingMask, zero, exit()); __ lea(scratch1_, operand_); RememberedSetAction const remembered_set_action = mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET : OMIT_REMEMBERED_SET; SaveFPRegsMode const save_fp_mode = frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs; __ CallRecordWriteStub(object_, scratch1_, remembered_set_action, save_fp_mode); } private: Register const object_; Operand const operand_; Register const value_; Register const scratch0_; Register const scratch1_; RecordWriteMode const mode_; Zone* zone_; }; void MoveOperandIfAliasedWithPoisonRegister(Instruction* call_instruction, CodeGenerator* gen) { IA32OperandConverter i(gen, call_instruction); int const poison_index = i.InputInt32(1); if (poison_index == -1) { // No aliasing -> nothing to move. return; } InstructionOperand* op = call_instruction->InputAt(poison_index); if (op->IsImmediate() || op->IsConstant()) { gen->tasm()->mov(kSpeculationPoisonRegister, i.ToImmediate(op)); } else { gen->tasm()->mov(kSpeculationPoisonRegister, i.InputOperand(poison_index)); } } void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen, InstructionCode opcode, Instruction* instr, IA32OperandConverter& i) { const MemoryAccessMode access_mode = static_cast(MiscField::decode(opcode)); if (access_mode == kMemoryAccessPoisoned) { Register value = i.OutputRegister(); codegen->tasm()->and_(value, kSpeculationPoisonRegister); } } } // namespace #define ASSEMBLE_COMPARE(asm_instr) \ do { \ if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \ size_t index = 0; \ Operand left = i.MemoryOperand(&index); \ if (HasImmediateInput(instr, index)) { \ __ asm_instr(left, i.InputImmediate(index)); \ } else { \ __ asm_instr(left, i.InputRegister(index)); \ } \ } else { \ if (HasImmediateInput(instr, 1)) { \ if (instr->InputAt(0)->IsRegister()) { \ __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ } else { \ __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ } \ } else { \ if (instr->InputAt(1)->IsRegister()) { \ __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ } else { \ __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ } \ } \ } \ } while (0) #define ASSEMBLE_IEEE754_BINOP(name) \ do { \ /* Pass two doubles as arguments on the stack. */ \ __ PrepareCallCFunction(4, eax); \ __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \ __ movsd(Operand(esp, 1 * kDoubleSize), i.InputDoubleRegister(1)); \ __ CallCFunction(ExternalReference::ieee754_##name##_function(), 4); \ /* Return value is in st(0) on ia32. */ \ /* Store it into the result register. */ \ __ sub(esp, Immediate(kDoubleSize)); \ __ fstp_d(Operand(esp, 0)); \ __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \ __ add(esp, Immediate(kDoubleSize)); \ } while (false) #define ASSEMBLE_IEEE754_UNOP(name) \ do { \ /* Pass one double as argument on the stack. */ \ __ PrepareCallCFunction(2, eax); \ __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \ __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \ /* Return value is in st(0) on ia32. */ \ /* Store it into the result register. */ \ __ sub(esp, Immediate(kDoubleSize)); \ __ fstp_d(Operand(esp, 0)); \ __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \ __ add(esp, Immediate(kDoubleSize)); \ } while (false) #define ASSEMBLE_BINOP(asm_instr) \ do { \ if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \ size_t index = 1; \ Operand right = i.MemoryOperand(&index); \ __ asm_instr(i.InputRegister(0), right); \ } else { \ if (HasImmediateInput(instr, 1)) { \ __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ } else { \ __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ } \ } \ } while (0) #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \ do { \ Label binop; \ __ bind(&binop); \ __ mov_inst(eax, i.MemoryOperand(1)); \ __ Move(i.TempRegister(0), eax); \ __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \ __ lock(); \ __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \ __ j(not_equal, &binop); \ } while (false) #define ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \ do { \ Label binop; \ __ bind(&binop); \ __ mov(i.OutputRegister(0), i.MemoryOperand(2)); \ __ mov(i.OutputRegister(1), i.NextMemoryOperand(2)); \ __ push(i.InputRegister(0)); \ __ push(i.InputRegister(1)); \ __ instr1(i.InputRegister(0), i.OutputRegister(0)); \ __ instr2(i.InputRegister(1), i.OutputRegister(1)); \ __ lock(); \ __ cmpxchg8b(i.MemoryOperand(2)); \ __ pop(i.InputRegister(1)); \ __ pop(i.InputRegister(0)); \ __ j(not_equal, &binop); \ } while (false); #define ASSEMBLE_MOVX(mov_instr) \ do { \ if (instr->addressing_mode() != kMode_None) { \ __ mov_instr(i.OutputRegister(), i.MemoryOperand()); \ } else if (instr->InputAt(0)->IsRegister()) { \ __ mov_instr(i.OutputRegister(), i.InputRegister(0)); \ } else { \ __ mov_instr(i.OutputRegister(), i.InputOperand(0)); \ } \ } while (0) #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \ do { \ XMMRegister src0 = i.InputSimd128Register(0); \ Operand src1 = i.InputOperand(instr->InputCount() == 2 ? 1 : 0); \ if (CpuFeatures::IsSupported(AVX)) { \ CpuFeatureScope avx_scope(tasm(), AVX); \ __ v##opcode(i.OutputSimd128Register(), src0, src1); \ } else { \ DCHECK_EQ(i.OutputSimd128Register(), src0); \ __ opcode(i.OutputSimd128Register(), src1); \ } \ } while (false) #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \ if (CpuFeatures::IsSupported(AVX)) { \ CpuFeatureScope avx_scope(tasm(), AVX); \ __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \ i.InputOperand(1), imm); \ } else { \ CpuFeatureScope sse_scope(tasm(), SSELevel); \ DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \ __ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm); \ } void CodeGenerator::AssembleDeconstructFrame() { __ mov(esp, ebp); __ pop(ebp); } void CodeGenerator::AssemblePrepareTailCall() { if (frame_access_state()->has_frame()) { __ mov(ebp, MemOperand(ebp, 0)); } frame_access_state()->SetFrameAccessToSP(); } void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg, Register, Register, Register) { // There are not enough temp registers left on ia32 for a call instruction // so we pick some scratch registers and save/restore them manually here. int scratch_count = 3; Register scratch1 = ebx; Register scratch2 = ecx; Register scratch3 = edx; DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3)); Label done; // Check if current frame is an arguments adaptor frame. __ cmp(Operand(ebp, StandardFrameConstants::kContextOffset), Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR))); __ j(not_equal, &done, Label::kNear); __ push(scratch1); __ push(scratch2); __ push(scratch3); // Load arguments count from current arguments adaptor frame (note, it // does not include receiver). Register caller_args_count_reg = scratch1; __ mov(caller_args_count_reg, Operand(ebp, ArgumentsAdaptorFrameConstants::kLengthOffset)); __ SmiUntag(caller_args_count_reg); ParameterCount callee_args_count(args_reg); __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2, scratch3, scratch_count); __ pop(scratch3); __ pop(scratch2); __ pop(scratch1); __ bind(&done); } namespace { void AdjustStackPointerForTailCall(TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp, bool allow_shrinkage = true) { int current_sp_offset = state->GetSPToFPSlotCount() + StandardFrameConstants::kFixedSlotCountAboveFp; int stack_slot_delta = new_slot_above_sp - current_sp_offset; if (stack_slot_delta > 0) { tasm->sub(esp, Immediate(stack_slot_delta * kPointerSize)); state->IncreaseSPDelta(stack_slot_delta); } else if (allow_shrinkage && stack_slot_delta < 0) { tasm->add(esp, Immediate(-stack_slot_delta * kPointerSize)); state->IncreaseSPDelta(stack_slot_delta); } } } // namespace void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr, int first_unused_stack_slot) { CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush); ZoneVector pushes(zone()); GetPushCompatibleMoves(instr, flags, &pushes); if (!pushes.empty() && (LocationOperand::cast(pushes.back()->destination()).index() + 1 == first_unused_stack_slot)) { IA32OperandConverter g(this, instr); for (auto move : pushes) { LocationOperand destination_location( LocationOperand::cast(move->destination())); InstructionOperand source(move->source()); AdjustStackPointerForTailCall(tasm(), frame_access_state(), destination_location.index()); if (source.IsStackSlot()) { LocationOperand source_location(LocationOperand::cast(source)); __ push(g.SlotToOperand(source_location.index())); } else if (source.IsRegister()) { LocationOperand source_location(LocationOperand::cast(source)); __ push(source_location.GetRegister()); } else if (source.IsImmediate()) { __ push(Immediate(ImmediateOperand::cast(source).inline_value())); } else { // Pushes of non-scalar data types is not supported. UNIMPLEMENTED(); } frame_access_state()->IncreaseSPDelta(1); move->Eliminate(); } } AdjustStackPointerForTailCall(tasm(), frame_access_state(), first_unused_stack_slot, false); } void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr, int first_unused_stack_slot) { AdjustStackPointerForTailCall(tasm(), frame_access_state(), first_unused_stack_slot); } // Check that {kJavaScriptCallCodeStartRegister} is correct. void CodeGenerator::AssembleCodeStartRegisterCheck() { __ push(eax); // Push eax so we can use it as a scratch register. __ ComputeCodeStartAddress(eax); __ cmp(eax, kJavaScriptCallCodeStartRegister); __ Assert(equal, AbortReason::kWrongFunctionCodeStart); __ pop(eax); // Restore eax. } // Check if the code object is marked for deoptimization. If it is, then it // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need // to: // 1. read from memory the word that contains that bit, which can be found in // the flags in the referenced {CodeDataContainer} object; // 2. test kMarkedForDeoptimizationBit in those flags; and // 3. if it is not zero then it jumps to the builtin. void CodeGenerator::BailoutIfDeoptimized() { int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize; __ mov(ebx, Operand(kJavaScriptCallCodeStartRegister, offset)); __ test(FieldOperand(ebx, CodeDataContainer::kKindSpecificFlagsOffset), Immediate(1 << Code::kMarkedForDeoptimizationBit)); // Ensure we're not serializing (otherwise we'd need to use an indirection to // access the builtin below). DCHECK(!isolate()->ShouldLoadConstantsFromRootList()); Handle code = isolate()->builtins()->builtin_handle( Builtins::kCompileLazyDeoptimizedCode); __ j(not_zero, code, RelocInfo::CODE_TARGET); } void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() { __ push(eax); // Push eax so we can use it as a scratch register. // Set a mask which has all bits set in the normal case, but has all // bits cleared if we are speculatively executing the wrong PC. __ ComputeCodeStartAddress(eax); __ mov(kSpeculationPoisonRegister, Immediate(0)); __ cmp(kJavaScriptCallCodeStartRegister, eax); __ mov(eax, Immediate(-1)); __ cmov(equal, kSpeculationPoisonRegister, eax); __ pop(eax); // Restore eax. } void CodeGenerator::AssembleRegisterArgumentPoisoning() { __ and_(kJSFunctionRegister, kSpeculationPoisonRegister); __ and_(kContextRegister, kSpeculationPoisonRegister); __ and_(esp, kSpeculationPoisonRegister); } // Assembles an instruction after register allocation, producing machine code. CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( Instruction* instr) { IA32OperandConverter i(this, instr); InstructionCode opcode = instr->opcode(); ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode); switch (arch_opcode) { case kArchCallCodeObject: { MoveOperandIfAliasedWithPoisonRegister(instr, this); if (HasImmediateInput(instr, 0)) { Handle code = i.InputCode(0); __ call(code, RelocInfo::CODE_TARGET); } else { Register reg = i.InputRegister(0); DCHECK_IMPLIES( HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister), reg == kJavaScriptCallCodeStartRegister); __ add(reg, Immediate(Code::kHeaderSize - kHeapObjectTag)); if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { __ RetpolineCall(reg); } else { __ call(reg); } } RecordCallPosition(instr); frame_access_state()->ClearSPDelta(); break; } case kArchCallWasmFunction: { MoveOperandIfAliasedWithPoisonRegister(instr, this); if (HasImmediateInput(instr, 0)) { Constant constant = i.ToConstant(instr->InputAt(0)); Address wasm_code = static_cast
(constant.ToInt32()); if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) { __ wasm_call(wasm_code, constant.rmode()); } else { if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { __ RetpolineCall(wasm_code, constant.rmode()); } else { __ call(wasm_code, constant.rmode()); } } } else { Register reg = i.InputRegister(0); if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { __ RetpolineCall(reg); } else { __ call(reg); } } RecordCallPosition(instr); frame_access_state()->ClearSPDelta(); break; } case kArchTailCallCodeObjectFromJSFunction: case kArchTailCallCodeObject: { MoveOperandIfAliasedWithPoisonRegister(instr, this); if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) { AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister, no_reg, no_reg, no_reg); } if (HasImmediateInput(instr, 0)) { Handle code = i.InputCode(0); __ jmp(code, RelocInfo::CODE_TARGET); } else { Register reg = i.InputRegister(0); DCHECK_IMPLIES( HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister), reg == kJavaScriptCallCodeStartRegister); __ add(reg, Immediate(Code::kHeaderSize - kHeapObjectTag)); if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { __ RetpolineJump(reg); } else { __ jmp(reg); } } frame_access_state()->ClearSPDelta(); frame_access_state()->SetFrameAccessToDefault(); break; } case kArchTailCallWasm: { MoveOperandIfAliasedWithPoisonRegister(instr, this); if (HasImmediateInput(instr, 0)) { Constant constant = i.ToConstant(instr->InputAt(0)); Address wasm_code = static_cast
(constant.ToInt32()); __ jmp(wasm_code, constant.rmode()); } else { Register reg = i.InputRegister(0); if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { __ RetpolineJump(reg); } else { __ jmp(reg); } } frame_access_state()->ClearSPDelta(); frame_access_state()->SetFrameAccessToDefault(); break; } case kArchTailCallAddress: { MoveOperandIfAliasedWithPoisonRegister(instr, this); CHECK(!HasImmediateInput(instr, 0)); Register reg = i.InputRegister(0); DCHECK_IMPLIES( HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister), reg == kJavaScriptCallCodeStartRegister); if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) { __ RetpolineJump(reg); } else { __ jmp(reg); } frame_access_state()->ClearSPDelta(); frame_access_state()->SetFrameAccessToDefault(); break; } case kArchCallJSFunction: { MoveOperandIfAliasedWithPoisonRegister(instr, this); Register func = i.InputRegister(0); if (FLAG_debug_code) { // Check the function's context matches the context argument. __ cmp(esi, FieldOperand(func, JSFunction::kContextOffset)); __ Assert(equal, AbortReason::kWrongFunctionContext); } static_assert(kJavaScriptCallCodeStartRegister == ecx, "ABI mismatch"); __ mov(ecx, FieldOperand(func, JSFunction::kCodeOffset)); __ add(ecx, Immediate(Code::kHeaderSize - kHeapObjectTag)); __ call(ecx); RecordCallPosition(instr); frame_access_state()->ClearSPDelta(); break; } case kArchPrepareCallCFunction: { // Frame alignment requires using FP-relative frame addressing. frame_access_state()->SetFrameAccessToFP(); int const num_parameters = MiscField::decode(instr->opcode()); __ PrepareCallCFunction(num_parameters, i.TempRegister(0)); break; } case kArchSaveCallerRegisters: { fp_mode_ = static_cast(MiscField::decode(instr->opcode())); DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs); // kReturnRegister0 should have been saved before entering the stub. int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0); DCHECK_EQ(0, bytes % kPointerSize); DCHECK_EQ(0, frame_access_state()->sp_delta()); frame_access_state()->IncreaseSPDelta(bytes / kPointerSize); DCHECK(!caller_registers_saved_); caller_registers_saved_ = true; break; } case kArchRestoreCallerRegisters: { DCHECK(fp_mode_ == static_cast(MiscField::decode(instr->opcode()))); DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs); // Don't overwrite the returned value. int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0); frame_access_state()->IncreaseSPDelta(-(bytes / kPointerSize)); DCHECK_EQ(0, frame_access_state()->sp_delta()); DCHECK(caller_registers_saved_); caller_registers_saved_ = false; break; } case kArchPrepareTailCall: AssemblePrepareTailCall(); break; case kArchCallCFunction: { MoveOperandIfAliasedWithPoisonRegister(instr, this); int const num_parameters = MiscField::decode(instr->opcode()); if (HasImmediateInput(instr, 0)) { ExternalReference ref = i.InputExternalReference(0); __ CallCFunction(ref, num_parameters); } else { Register func = i.InputRegister(0); __ CallCFunction(func, num_parameters); } frame_access_state()->SetFrameAccessToDefault(); // Ideally, we should decrement SP delta to match the change of stack // pointer in CallCFunction. However, for certain architectures (e.g. // ARM), there may be more strict alignment requirement, causing old SP // to be saved on the stack. In those cases, we can not calculate the SP // delta statically. frame_access_state()->ClearSPDelta(); if (caller_registers_saved_) { // Need to re-sync SP delta introduced in kArchSaveCallerRegisters. // Here, we assume the sequence to be: // kArchSaveCallerRegisters; // kArchCallCFunction; // kArchRestoreCallerRegisters; int bytes = __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0); frame_access_state()->IncreaseSPDelta(bytes / kPointerSize); } break; } case kArchJmp: AssembleArchJump(i.InputRpo(0)); break; case kArchBinarySearchSwitch: AssembleArchBinarySearchSwitch(instr); break; case kArchLookupSwitch: AssembleArchLookupSwitch(instr); break; case kArchTableSwitch: AssembleArchTableSwitch(instr); break; case kArchComment: __ RecordComment(reinterpret_cast(i.InputInt32(0))); break; case kArchDebugAbort: DCHECK(i.InputRegister(0) == edx); if (!frame_access_state()->has_frame()) { // We don't actually want to generate a pile of code for this, so just // claim there is a stack frame, without generating one. FrameScope scope(tasm(), StackFrame::NONE); __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS), RelocInfo::CODE_TARGET); } else { __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS), RelocInfo::CODE_TARGET); } __ int3(); break; case kArchDebugBreak: __ int3(); break; case kArchNop: case kArchThrowTerminator: // don't emit code for nops. break; case kArchDeoptimize: { int deopt_state_id = BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore()); CodeGenResult result = AssembleDeoptimizerCall(deopt_state_id, current_source_position_); if (result != kSuccess) return result; break; } case kArchRet: AssembleReturn(instr->InputAt(0)); break; case kArchStackPointer: __ mov(i.OutputRegister(), esp); break; case kArchFramePointer: __ mov(i.OutputRegister(), ebp); break; case kArchParentFramePointer: if (frame_access_state()->has_frame()) { __ mov(i.OutputRegister(), Operand(ebp, 0)); } else { __ mov(i.OutputRegister(), ebp); } break; case kArchTruncateDoubleToI: { auto result = i.OutputRegister(); auto input = i.InputDoubleRegister(0); auto ool = new (zone()) OutOfLineTruncateDoubleToI( this, result, input, DetermineStubCallMode()); __ cvttsd2si(result, Operand(input)); __ cmp(result, 1); __ j(overflow, ool->entry()); __ bind(ool->exit()); break; } case kArchStoreWithWriteBarrier: { RecordWriteMode mode = static_cast(MiscField::decode(instr->opcode())); Register object = i.InputRegister(0); size_t index = 0; Operand operand = i.MemoryOperand(&index); Register value = i.InputRegister(index); Register scratch0 = i.TempRegister(0); Register scratch1 = i.TempRegister(1); auto ool = new (zone()) OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1, mode); __ mov(operand, value); __ CheckPageFlag(object, scratch0, MemoryChunk::kPointersFromHereAreInterestingMask, not_zero, ool->entry()); __ bind(ool->exit()); break; } case kArchStackSlot: { FrameOffset offset = frame_access_state()->GetFrameOffset(i.InputInt32(0)); Register base = offset.from_stack_pointer() ? esp : ebp; __ lea(i.OutputRegister(), Operand(base, offset.offset())); break; } case kIeee754Float64Acos: ASSEMBLE_IEEE754_UNOP(acos); break; case kIeee754Float64Acosh: ASSEMBLE_IEEE754_UNOP(acosh); break; case kIeee754Float64Asin: ASSEMBLE_IEEE754_UNOP(asin); break; case kIeee754Float64Asinh: ASSEMBLE_IEEE754_UNOP(asinh); break; case kIeee754Float64Atan: ASSEMBLE_IEEE754_UNOP(atan); break; case kIeee754Float64Atanh: ASSEMBLE_IEEE754_UNOP(atanh); break; case kIeee754Float64Atan2: ASSEMBLE_IEEE754_BINOP(atan2); break; case kIeee754Float64Cbrt: ASSEMBLE_IEEE754_UNOP(cbrt); break; case kIeee754Float64Cos: ASSEMBLE_IEEE754_UNOP(cos); break; case kIeee754Float64Cosh: ASSEMBLE_IEEE754_UNOP(cosh); break; case kIeee754Float64Expm1: ASSEMBLE_IEEE754_UNOP(expm1); break; case kIeee754Float64Exp: ASSEMBLE_IEEE754_UNOP(exp); break; case kIeee754Float64Log: ASSEMBLE_IEEE754_UNOP(log); break; case kIeee754Float64Log1p: ASSEMBLE_IEEE754_UNOP(log1p); break; case kIeee754Float64Log2: ASSEMBLE_IEEE754_UNOP(log2); break; case kIeee754Float64Log10: ASSEMBLE_IEEE754_UNOP(log10); break; case kIeee754Float64Pow: { // TODO(bmeurer): Improve integration of the stub. if (i.InputDoubleRegister(1) != xmm2) { __ movaps(xmm2, i.InputDoubleRegister(0)); __ movaps(xmm1, i.InputDoubleRegister(1)); } else { __ movaps(xmm0, i.InputDoubleRegister(0)); __ movaps(xmm1, xmm2); __ movaps(xmm2, xmm0); } __ Call(BUILTIN_CODE(isolate(), MathPowInternal), RelocInfo::CODE_TARGET); __ movaps(i.OutputDoubleRegister(), xmm3); break; } case kIeee754Float64Sin: ASSEMBLE_IEEE754_UNOP(sin); break; case kIeee754Float64Sinh: ASSEMBLE_IEEE754_UNOP(sinh); break; case kIeee754Float64Tan: ASSEMBLE_IEEE754_UNOP(tan); break; case kIeee754Float64Tanh: ASSEMBLE_IEEE754_UNOP(tanh); break; case kIA32Add: ASSEMBLE_BINOP(add); break; case kIA32And: ASSEMBLE_BINOP(and_); break; case kIA32Cmp: ASSEMBLE_COMPARE(cmp); break; case kIA32Cmp16: ASSEMBLE_COMPARE(cmpw); break; case kIA32Cmp8: ASSEMBLE_COMPARE(cmpb); break; case kIA32Test: ASSEMBLE_COMPARE(test); break; case kIA32Test16: ASSEMBLE_COMPARE(test_w); break; case kIA32Test8: ASSEMBLE_COMPARE(test_b); break; case kIA32Imul: if (HasImmediateInput(instr, 1)) { __ imul(i.OutputRegister(), i.InputOperand(0), i.InputInt32(1)); } else { __ imul(i.OutputRegister(), i.InputOperand(1)); } break; case kIA32ImulHigh: __ imul(i.InputRegister(1)); break; case kIA32UmulHigh: __ mul(i.InputRegister(1)); break; case kIA32Idiv: __ cdq(); __ idiv(i.InputOperand(1)); break; case kIA32Udiv: __ Move(edx, Immediate(0)); __ div(i.InputOperand(1)); break; case kIA32Not: __ not_(i.OutputOperand()); break; case kIA32Neg: __ neg(i.OutputOperand()); break; case kIA32Or: ASSEMBLE_BINOP(or_); break; case kIA32Xor: ASSEMBLE_BINOP(xor_); break; case kIA32Sub: ASSEMBLE_BINOP(sub); break; case kIA32Shl: if (HasImmediateInput(instr, 1)) { __ shl(i.OutputOperand(), i.InputInt5(1)); } else { __ shl_cl(i.OutputOperand()); } break; case kIA32Shr: if (HasImmediateInput(instr, 1)) { __ shr(i.OutputOperand(), i.InputInt5(1)); } else { __ shr_cl(i.OutputOperand()); } break; case kIA32Sar: if (HasImmediateInput(instr, 1)) { __ sar(i.OutputOperand(), i.InputInt5(1)); } else { __ sar_cl(i.OutputOperand()); } break; case kIA32AddPair: { // i.OutputRegister(0) == i.InputRegister(0) ... left low word. // i.InputRegister(1) ... left high word. // i.InputRegister(2) ... right low word. // i.InputRegister(3) ... right high word. bool use_temp = false; if (i.OutputRegister(0).code() == i.InputRegister(1).code() || i.OutputRegister(0).code() == i.InputRegister(3).code()) { // We cannot write to the output register directly, because it would // overwrite an input for adc. We have to use the temp register. use_temp = true; __ Move(i.TempRegister(0), i.InputRegister(0)); __ add(i.TempRegister(0), i.InputRegister(2)); } else { __ add(i.OutputRegister(0), i.InputRegister(2)); } if (i.OutputRegister(1).code() != i.InputRegister(1).code()) { __ Move(i.OutputRegister(1), i.InputRegister(1)); } __ adc(i.OutputRegister(1), Operand(i.InputRegister(3))); if (use_temp) { __ Move(i.OutputRegister(0), i.TempRegister(0)); } break; } case kIA32SubPair: { // i.OutputRegister(0) == i.InputRegister(0) ... left low word. // i.InputRegister(1) ... left high word. // i.InputRegister(2) ... right low word. // i.InputRegister(3) ... right high word. bool use_temp = false; if (i.OutputRegister(0).code() == i.InputRegister(1).code() || i.OutputRegister(0).code() == i.InputRegister(3).code()) { // We cannot write to the output register directly, because it would // overwrite an input for adc. We have to use the temp register. use_temp = true; __ Move(i.TempRegister(0), i.InputRegister(0)); __ sub(i.TempRegister(0), i.InputRegister(2)); } else { __ sub(i.OutputRegister(0), i.InputRegister(2)); } if (i.OutputRegister(1).code() != i.InputRegister(1).code()) { __ Move(i.OutputRegister(1), i.InputRegister(1)); } __ sbb(i.OutputRegister(1), Operand(i.InputRegister(3))); if (use_temp) { __ Move(i.OutputRegister(0), i.TempRegister(0)); } break; } case kIA32MulPair: { __ imul(i.OutputRegister(1), i.InputOperand(0)); __ mov(i.TempRegister(0), i.InputOperand(1)); __ imul(i.TempRegister(0), i.InputOperand(2)); __ add(i.OutputRegister(1), i.TempRegister(0)); __ mov(i.OutputRegister(0), i.InputOperand(0)); // Multiplies the low words and stores them in eax and edx. __ mul(i.InputRegister(2)); __ add(i.OutputRegister(1), i.TempRegister(0)); break; } case kIA32ShlPair: if (HasImmediateInput(instr, 2)) { __ ShlPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2)); } else { // Shift has been loaded into CL by the register allocator. __ ShlPair_cl(i.InputRegister(1), i.InputRegister(0)); } break; case kIA32ShrPair: if (HasImmediateInput(instr, 2)) { __ ShrPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2)); } else { // Shift has been loaded into CL by the register allocator. __ ShrPair_cl(i.InputRegister(1), i.InputRegister(0)); } break; case kIA32SarPair: if (HasImmediateInput(instr, 2)) { __ SarPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2)); } else { // Shift has been loaded into CL by the register allocator. __ SarPair_cl(i.InputRegister(1), i.InputRegister(0)); } break; case kIA32Ror: if (HasImmediateInput(instr, 1)) { __ ror(i.OutputOperand(), i.InputInt5(1)); } else { __ ror_cl(i.OutputOperand()); } break; case kIA32Lzcnt: __ Lzcnt(i.OutputRegister(), i.InputOperand(0)); break; case kIA32Tzcnt: __ Tzcnt(i.OutputRegister(), i.InputOperand(0)); break; case kIA32Popcnt: __ Popcnt(i.OutputRegister(), i.InputOperand(0)); break; case kIA32Bswap: __ bswap(i.OutputRegister()); break; case kArchWordPoisonOnSpeculation: DCHECK_EQ(i.OutputRegister(), i.InputRegister(0)); __ and_(i.InputRegister(0), kSpeculationPoisonRegister); break; case kLFence: __ lfence(); break; case kSSEFloat32Cmp: __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1)); break; case kSSEFloat32Add: __ addss(i.InputDoubleRegister(0), i.InputOperand(1)); break; case kSSEFloat32Sub: __ subss(i.InputDoubleRegister(0), i.InputOperand(1)); break; case kSSEFloat32Mul: __ mulss(i.InputDoubleRegister(0), i.InputOperand(1)); break; case kSSEFloat32Div: __ divss(i.InputDoubleRegister(0), i.InputOperand(1)); // Don't delete this mov. It may improve performance on some CPUs, // when there is a (v)mulss depending on the result. __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister()); break; case kSSEFloat32Sqrt: __ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0)); break; case kSSEFloat32Abs: { // TODO(bmeurer): Use 128-bit constants. __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ psrlq(kScratchDoubleReg, 33); __ andps(i.OutputDoubleRegister(), kScratchDoubleReg); break; } case kSSEFloat32Neg: { // TODO(bmeurer): Use 128-bit constants. __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ psllq(kScratchDoubleReg, 31); __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg); break; } case kSSEFloat32Round: { CpuFeatureScope sse_scope(tasm(), SSE4_1); RoundingMode const mode = static_cast(MiscField::decode(instr->opcode())); __ roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode); break; } case kSSEFloat64Cmp: __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1)); break; case kSSEFloat64Add: __ addsd(i.InputDoubleRegister(0), i.InputOperand(1)); break; case kSSEFloat64Sub: __ subsd(i.InputDoubleRegister(0), i.InputOperand(1)); break; case kSSEFloat64Mul: __ mulsd(i.InputDoubleRegister(0), i.InputOperand(1)); break; case kSSEFloat64Div: __ divsd(i.InputDoubleRegister(0), i.InputOperand(1)); // Don't delete this mov. It may improve performance on some CPUs, // when there is a (v)mulsd depending on the result. __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister()); break; case kSSEFloat32Max: { Label compare_nan, compare_swap, done_compare; if (instr->InputAt(1)->IsFPRegister()) { __ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); } else { __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1)); } auto ool = new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister()); __ j(parity_even, ool->entry()); __ j(above, &done_compare, Label::kNear); __ j(below, &compare_swap, Label::kNear); __ movmskps(i.TempRegister(0), i.InputDoubleRegister(0)); __ test(i.TempRegister(0), Immediate(1)); __ j(zero, &done_compare, Label::kNear); __ bind(&compare_swap); if (instr->InputAt(1)->IsFPRegister()) { __ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); } else { __ movss(i.InputDoubleRegister(0), i.InputOperand(1)); } __ bind(&done_compare); __ bind(ool->exit()); break; } case kSSEFloat64Max: { Label compare_nan, compare_swap, done_compare; if (instr->InputAt(1)->IsFPRegister()) { __ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); } else { __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1)); } auto ool = new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister()); __ j(parity_even, ool->entry()); __ j(above, &done_compare, Label::kNear); __ j(below, &compare_swap, Label::kNear); __ movmskpd(i.TempRegister(0), i.InputDoubleRegister(0)); __ test(i.TempRegister(0), Immediate(1)); __ j(zero, &done_compare, Label::kNear); __ bind(&compare_swap); if (instr->InputAt(1)->IsFPRegister()) { __ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); } else { __ movsd(i.InputDoubleRegister(0), i.InputOperand(1)); } __ bind(&done_compare); __ bind(ool->exit()); break; } case kSSEFloat32Min: { Label compare_swap, done_compare; if (instr->InputAt(1)->IsFPRegister()) { __ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); } else { __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1)); } auto ool = new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister()); __ j(parity_even, ool->entry()); __ j(below, &done_compare, Label::kNear); __ j(above, &compare_swap, Label::kNear); if (instr->InputAt(1)->IsFPRegister()) { __ movmskps(i.TempRegister(0), i.InputDoubleRegister(1)); } else { __ movss(kScratchDoubleReg, i.InputOperand(1)); __ movmskps(i.TempRegister(0), kScratchDoubleReg); } __ test(i.TempRegister(0), Immediate(1)); __ j(zero, &done_compare, Label::kNear); __ bind(&compare_swap); if (instr->InputAt(1)->IsFPRegister()) { __ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); } else { __ movss(i.InputDoubleRegister(0), i.InputOperand(1)); } __ bind(&done_compare); __ bind(ool->exit()); break; } case kSSEFloat64Min: { Label compare_swap, done_compare; if (instr->InputAt(1)->IsFPRegister()) { __ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); } else { __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1)); } auto ool = new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister()); __ j(parity_even, ool->entry()); __ j(below, &done_compare, Label::kNear); __ j(above, &compare_swap, Label::kNear); if (instr->InputAt(1)->IsFPRegister()) { __ movmskpd(i.TempRegister(0), i.InputDoubleRegister(1)); } else { __ movsd(kScratchDoubleReg, i.InputOperand(1)); __ movmskpd(i.TempRegister(0), kScratchDoubleReg); } __ test(i.TempRegister(0), Immediate(1)); __ j(zero, &done_compare, Label::kNear); __ bind(&compare_swap); if (instr->InputAt(1)->IsFPRegister()) { __ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); } else { __ movsd(i.InputDoubleRegister(0), i.InputOperand(1)); } __ bind(&done_compare); __ bind(ool->exit()); break; } case kSSEFloat64Mod: { // TODO(dcarney): alignment is wrong. __ sub(esp, Immediate(kDoubleSize)); // Move values to st(0) and st(1). __ movsd(Operand(esp, 0), i.InputDoubleRegister(1)); __ fld_d(Operand(esp, 0)); __ movsd(Operand(esp, 0), i.InputDoubleRegister(0)); __ fld_d(Operand(esp, 0)); // Loop while fprem isn't done. Label mod_loop; __ bind(&mod_loop); // This instructions traps on all kinds inputs, but we are assuming the // floating point control word is set to ignore them all. __ fprem(); // The following 2 instruction implicitly use eax. __ fnstsw_ax(); __ sahf(); __ j(parity_even, &mod_loop); // Move output to stack and clean up. __ fstp(1); __ fstp_d(Operand(esp, 0)); __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); __ add(esp, Immediate(kDoubleSize)); break; } case kSSEFloat64Abs: { // TODO(bmeurer): Use 128-bit constants. __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ psrlq(kScratchDoubleReg, 1); __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg); break; } case kSSEFloat64Neg: { // TODO(bmeurer): Use 128-bit constants. __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ psllq(kScratchDoubleReg, 63); __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg); break; } case kSSEFloat64Sqrt: __ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0)); break; case kSSEFloat64Round: { CpuFeatureScope sse_scope(tasm(), SSE4_1); RoundingMode const mode = static_cast(MiscField::decode(instr->opcode())); __ roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode); break; } case kSSEFloat32ToFloat64: __ cvtss2sd(i.OutputDoubleRegister(), i.InputOperand(0)); break; case kSSEFloat64ToFloat32: __ cvtsd2ss(i.OutputDoubleRegister(), i.InputOperand(0)); break; case kSSEFloat32ToInt32: __ cvttss2si(i.OutputRegister(), i.InputOperand(0)); break; case kSSEFloat32ToUint32: __ Cvttss2ui(i.OutputRegister(), i.InputOperand(0), kScratchDoubleReg); break; case kSSEFloat64ToInt32: __ cvttsd2si(i.OutputRegister(), i.InputOperand(0)); break; case kSSEFloat64ToUint32: __ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0), kScratchDoubleReg); break; case kSSEInt32ToFloat32: __ cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0)); break; case kSSEUint32ToFloat32: __ Cvtui2ss(i.OutputDoubleRegister(), i.InputOperand(0), i.TempRegister(0)); break; case kSSEInt32ToFloat64: __ cvtsi2sd(i.OutputDoubleRegister(), i.InputOperand(0)); break; case kSSEUint32ToFloat64: __ Cvtui2sd(i.OutputDoubleRegister(), i.InputOperand(0)); break; case kSSEFloat64ExtractLowWord32: if (instr->InputAt(0)->IsFPStackSlot()) { __ mov(i.OutputRegister(), i.InputOperand(0)); } else { __ movd(i.OutputRegister(), i.InputDoubleRegister(0)); } break; case kSSEFloat64ExtractHighWord32: if (instr->InputAt(0)->IsFPStackSlot()) { __ mov(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2)); } else { __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1); } break; case kSSEFloat64InsertLowWord32: __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0, true); break; case kSSEFloat64InsertHighWord32: __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1, true); break; case kSSEFloat64LoadLowWord32: __ movd(i.OutputDoubleRegister(), i.InputOperand(0)); break; case kAVXFloat32Add: { CpuFeatureScope avx_scope(tasm(), AVX); __ vaddss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), i.InputOperand(1)); break; } case kAVXFloat32Sub: { CpuFeatureScope avx_scope(tasm(), AVX); __ vsubss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), i.InputOperand(1)); break; } case kAVXFloat32Mul: { CpuFeatureScope avx_scope(tasm(), AVX); __ vmulss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), i.InputOperand(1)); break; } case kAVXFloat32Div: { CpuFeatureScope avx_scope(tasm(), AVX); __ vdivss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), i.InputOperand(1)); // Don't delete this mov. It may improve performance on some CPUs, // when there is a (v)mulss depending on the result. __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister()); break; } case kAVXFloat64Add: { CpuFeatureScope avx_scope(tasm(), AVX); __ vaddsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), i.InputOperand(1)); break; } case kAVXFloat64Sub: { CpuFeatureScope avx_scope(tasm(), AVX); __ vsubsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), i.InputOperand(1)); break; } case kAVXFloat64Mul: { CpuFeatureScope avx_scope(tasm(), AVX); __ vmulsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), i.InputOperand(1)); break; } case kAVXFloat64Div: { CpuFeatureScope avx_scope(tasm(), AVX); __ vdivsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), i.InputOperand(1)); // Don't delete this mov. It may improve performance on some CPUs, // when there is a (v)mulsd depending on the result. __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister()); break; } case kAVXFloat32Abs: { // TODO(bmeurer): Use RIP relative 128-bit constants. __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ psrlq(kScratchDoubleReg, 33); CpuFeatureScope avx_scope(tasm(), AVX); __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0)); break; } case kAVXFloat32Neg: { // TODO(bmeurer): Use RIP relative 128-bit constants. __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ psllq(kScratchDoubleReg, 31); CpuFeatureScope avx_scope(tasm(), AVX); __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0)); break; } case kAVXFloat64Abs: { // TODO(bmeurer): Use RIP relative 128-bit constants. __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ psrlq(kScratchDoubleReg, 1); CpuFeatureScope avx_scope(tasm(), AVX); __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0)); break; } case kAVXFloat64Neg: { // TODO(bmeurer): Use RIP relative 128-bit constants. __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ psllq(kScratchDoubleReg, 63); CpuFeatureScope avx_scope(tasm(), AVX); __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0)); break; } case kSSEFloat64SilenceNaN: __ xorpd(kScratchDoubleReg, kScratchDoubleReg); __ subsd(i.InputDoubleRegister(0), kScratchDoubleReg); break; case kIA32Movsxbl: ASSEMBLE_MOVX(movsx_b); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kIA32Movzxbl: ASSEMBLE_MOVX(movzx_b); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kIA32Movb: { size_t index = 0; Operand operand = i.MemoryOperand(&index); if (HasImmediateInput(instr, index)) { __ mov_b(operand, i.InputInt8(index)); } else { __ mov_b(operand, i.InputRegister(index)); } EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; } case kIA32Movsxwl: ASSEMBLE_MOVX(movsx_w); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kIA32Movzxwl: ASSEMBLE_MOVX(movzx_w); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; case kIA32Movw: { size_t index = 0; Operand operand = i.MemoryOperand(&index); if (HasImmediateInput(instr, index)) { __ mov_w(operand, i.InputInt16(index)); } else { __ mov_w(operand, i.InputRegister(index)); } EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); break; } case kIA32Movl: if (instr->HasOutput()) { __ mov(i.OutputRegister(), i.MemoryOperand()); EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i); } else { size_t index = 0; Operand operand = i.MemoryOperand(&index); if (HasImmediateInput(instr, index)) { __ mov(operand, i.InputImmediate(index)); } else { __ mov(operand, i.InputRegister(index)); } } break; case kIA32Movsd: if (instr->HasOutput()) { __ movsd(i.OutputDoubleRegister(), i.MemoryOperand()); } else { size_t index = 0; Operand operand = i.MemoryOperand(&index); __ movsd(operand, i.InputDoubleRegister(index)); } break; case kIA32Movss: if (instr->HasOutput()) { __ movss(i.OutputDoubleRegister(), i.MemoryOperand()); } else { size_t index = 0; Operand operand = i.MemoryOperand(&index); __ movss(operand, i.InputDoubleRegister(index)); } break; case kIA32Movdqu: if (instr->HasOutput()) { __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand()); } else { size_t index = 0; Operand operand = i.MemoryOperand(&index); __ Movdqu(operand, i.InputSimd128Register(index)); } break; case kIA32BitcastFI: if (instr->InputAt(0)->IsFPStackSlot()) { __ mov(i.OutputRegister(), i.InputOperand(0)); } else { __ movd(i.OutputRegister(), i.InputDoubleRegister(0)); } break; case kIA32BitcastIF: if (instr->InputAt(0)->IsRegister()) { __ movd(i.OutputDoubleRegister(), i.InputRegister(0)); } else { __ movss(i.OutputDoubleRegister(), i.InputOperand(0)); } break; case kIA32Lea: { AddressingMode mode = AddressingModeField::decode(instr->opcode()); // Shorten "leal" to "addl", "subl" or "shll" if the register allocation // and addressing mode just happens to work out. The "addl"/"subl" forms // in these cases are faster based on measurements. if (mode == kMode_MI) { __ Move(i.OutputRegister(), Immediate(i.InputInt32(0))); } else if (i.InputRegister(0) == i.OutputRegister()) { if (mode == kMode_MRI) { int32_t constant_summand = i.InputInt32(1); if (constant_summand > 0) { __ add(i.OutputRegister(), Immediate(constant_summand)); } else if (constant_summand < 0) { __ sub(i.OutputRegister(), Immediate(-constant_summand)); } } else if (mode == kMode_MR1) { if (i.InputRegister(1) == i.OutputRegister()) { __ shl(i.OutputRegister(), 1); } else { __ add(i.OutputRegister(), i.InputRegister(1)); } } else if (mode == kMode_M2) { __ shl(i.OutputRegister(), 1); } else if (mode == kMode_M4) { __ shl(i.OutputRegister(), 2); } else if (mode == kMode_M8) { __ shl(i.OutputRegister(), 3); } else { __ lea(i.OutputRegister(), i.MemoryOperand()); } } else if (mode == kMode_MR1 && i.InputRegister(1) == i.OutputRegister()) { __ add(i.OutputRegister(), i.InputRegister(0)); } else { __ lea(i.OutputRegister(), i.MemoryOperand()); } break; } case kIA32PushFloat32: if (instr->InputAt(0)->IsFPRegister()) { __ sub(esp, Immediate(kFloatSize)); __ movss(Operand(esp, 0), i.InputDoubleRegister(0)); frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize); } else if (HasImmediateInput(instr, 0)) { __ Move(kScratchDoubleReg, i.InputFloat32(0)); __ sub(esp, Immediate(kFloatSize)); __ movss(Operand(esp, 0), kScratchDoubleReg); frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize); } else { __ movss(kScratchDoubleReg, i.InputOperand(0)); __ sub(esp, Immediate(kFloatSize)); __ movss(Operand(esp, 0), kScratchDoubleReg); frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize); } break; case kIA32PushFloat64: if (instr->InputAt(0)->IsFPRegister()) { __ sub(esp, Immediate(kDoubleSize)); __ movsd(Operand(esp, 0), i.InputDoubleRegister(0)); frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize); } else if (HasImmediateInput(instr, 0)) { __ Move(kScratchDoubleReg, i.InputDouble(0)); __ sub(esp, Immediate(kDoubleSize)); __ movsd(Operand(esp, 0), kScratchDoubleReg); frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize); } else { __ movsd(kScratchDoubleReg, i.InputOperand(0)); __ sub(esp, Immediate(kDoubleSize)); __ movsd(Operand(esp, 0), kScratchDoubleReg); frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize); } break; case kIA32PushSimd128: if (instr->InputAt(0)->IsFPRegister()) { __ sub(esp, Immediate(kSimd128Size)); __ movups(Operand(esp, 0), i.InputSimd128Register(0)); } else { __ movups(kScratchDoubleReg, i.InputOperand(0)); __ sub(esp, Immediate(kSimd128Size)); __ movups(Operand(esp, 0), kScratchDoubleReg); } frame_access_state()->IncreaseSPDelta(kSimd128Size / kPointerSize); break; case kIA32Push: if (AddressingModeField::decode(instr->opcode()) != kMode_None) { size_t index = 0; Operand operand = i.MemoryOperand(&index); __ push(operand); frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize); } else if (instr->InputAt(0)->IsFPRegister()) { __ sub(esp, Immediate(kFloatSize)); __ movsd(Operand(esp, 0), i.InputDoubleRegister(0)); frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize); } else if (HasImmediateInput(instr, 0)) { __ push(i.InputImmediate(0)); frame_access_state()->IncreaseSPDelta(1); } else { __ push(i.InputOperand(0)); frame_access_state()->IncreaseSPDelta(1); } break; case kIA32Poke: { int slot = MiscField::decode(instr->opcode()); if (HasImmediateInput(instr, 0)) { __ mov(Operand(esp, slot * kPointerSize), i.InputImmediate(0)); } else { __ mov(Operand(esp, slot * kPointerSize), i.InputRegister(0)); } break; } case kIA32Peek: { int reverse_slot = i.InputInt32(0) + 1; int offset = FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot); if (instr->OutputAt(0)->IsFPRegister()) { LocationOperand* op = LocationOperand::cast(instr->OutputAt(0)); if (op->representation() == MachineRepresentation::kFloat64) { __ movsd(i.OutputDoubleRegister(), Operand(ebp, offset)); } else { DCHECK_EQ(MachineRepresentation::kFloat32, op->representation()); __ movss(i.OutputFloatRegister(), Operand(ebp, offset)); } } else { __ mov(i.OutputRegister(), Operand(ebp, offset)); } break; } case kSSEF32x4Splat: { DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); XMMRegister dst = i.OutputSimd128Register(); __ shufps(dst, dst, 0x0); break; } case kAVXF32x4Splat: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister src = i.InputFloatRegister(0); __ vshufps(i.OutputSimd128Register(), src, src, 0x0); break; } case kSSEF32x4ExtractLane: { DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); XMMRegister dst = i.OutputFloatRegister(); int8_t lane = i.InputInt8(1); if (lane != 0) { DCHECK_LT(lane, 4); __ shufps(dst, dst, lane); } break; } case kAVXF32x4ExtractLane: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputFloatRegister(); XMMRegister src = i.InputSimd128Register(0); int8_t lane = i.InputInt8(1); if (lane == 0) { if (dst != src) __ vmovaps(dst, src); } else { DCHECK_LT(lane, 4); __ vshufps(dst, src, src, lane); } break; } case kSSEF32x4ReplaceLane: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); __ insertps(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1) << 4); break; } case kAVXF32x4ReplaceLane: { CpuFeatureScope avx_scope(tasm(), AVX); __ vinsertps(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(2), i.InputInt8(1) << 4); break; } case kIA32F32x4SConvertI32x4: { __ Cvtdq2ps(i.OutputSimd128Register(), i.InputOperand(0)); break; } case kSSEF32x4UConvertI32x4: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits __ psubd(dst, kScratchDoubleReg); // get hi 16 bits __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly __ psrld(dst, 1); // divide by 2 to get in unsigned range __ cvtdq2ps(dst, dst); // convert hi exactly __ addps(dst, dst); // double hi, exactly __ addps(dst, kScratchDoubleReg); // add hi and lo, may round. break; } case kAVXF32x4UConvertI32x4: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(0); __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); // zeros __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, src, 0x55); // get lo 16 bits __ vpsubd(dst, src, kScratchDoubleReg); // get hi 16 bits __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly __ vpsrld(dst, dst, 1); // divide by 2 to get in unsigned range __ vcvtdq2ps(dst, dst); // convert hi exactly __ vaddps(dst, dst, dst); // double hi, exactly __ vaddps(dst, dst, kScratchDoubleReg); // add hi and lo, may round. break; } case kSSEF32x4Abs: { XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(0); if (src.is_reg(dst)) { __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ psrld(kScratchDoubleReg, 1); __ andps(dst, kScratchDoubleReg); } else { __ pcmpeqd(dst, dst); __ psrld(dst, 1); __ andps(dst, src); } break; } case kAVXF32x4Abs: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1); __ vandps(i.OutputSimd128Register(), kScratchDoubleReg, i.InputOperand(0)); break; } case kSSEF32x4Neg: { XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(0); if (src.is_reg(dst)) { __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ pslld(kScratchDoubleReg, 31); __ xorps(dst, kScratchDoubleReg); } else { __ pcmpeqd(dst, dst); __ pslld(dst, 31); __ xorps(dst, src); } break; } case kAVXF32x4Neg: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vpslld(kScratchDoubleReg, kScratchDoubleReg, 31); __ vxorps(i.OutputSimd128Register(), kScratchDoubleReg, i.InputOperand(0)); break; } case kIA32F32x4RecipApprox: { __ Rcpps(i.OutputSimd128Register(), i.InputOperand(0)); break; } case kIA32F32x4RecipSqrtApprox: { __ Rsqrtps(i.OutputSimd128Register(), i.InputOperand(0)); break; } case kSSEF32x4Add: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ addps(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXF32x4Add: { CpuFeatureScope avx_scope(tasm(), AVX); __ vaddps(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEF32x4AddHoriz: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE3); __ haddps(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXF32x4AddHoriz: { CpuFeatureScope avx_scope(tasm(), AVX); __ vhaddps(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEF32x4Sub: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ subps(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXF32x4Sub: { CpuFeatureScope avx_scope(tasm(), AVX); __ vsubps(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEF32x4Mul: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ mulps(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXF32x4Mul: { CpuFeatureScope avx_scope(tasm(), AVX); __ vmulps(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEF32x4Min: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ minps(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXF32x4Min: { CpuFeatureScope avx_scope(tasm(), AVX); __ vminps(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEF32x4Max: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ maxps(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXF32x4Max: { CpuFeatureScope avx_scope(tasm(), AVX); __ vmaxps(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEF32x4Eq: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ cmpeqps(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXF32x4Eq: { CpuFeatureScope avx_scope(tasm(), AVX); __ vcmpeqps(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEF32x4Ne: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ cmpneqps(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXF32x4Ne: { CpuFeatureScope avx_scope(tasm(), AVX); __ vcmpneqps(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEF32x4Lt: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ cmpltps(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXF32x4Lt: { CpuFeatureScope avx_scope(tasm(), AVX); __ vcmpltps(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEF32x4Le: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ cmpleps(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXF32x4Le: { CpuFeatureScope avx_scope(tasm(), AVX); __ vcmpleps(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kIA32I32x4Splat: { XMMRegister dst = i.OutputSimd128Register(); __ Movd(dst, i.InputOperand(0)); __ Pshufd(dst, dst, 0x0); break; } case kIA32I32x4ExtractLane: { __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1)); break; } case kSSEI32x4ReplaceLane: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); __ pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); break; } case kAVXI32x4ReplaceLane: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpinsrd(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(2), i.InputInt8(1)); break; } case kSSEI32x4SConvertF32x4: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); XMMRegister dst = i.OutputSimd128Register(); // NAN->0 __ movaps(kScratchDoubleReg, dst); __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg); __ pand(dst, kScratchDoubleReg); // Set top bit if >= 0 (but not -0.0!) __ pxor(kScratchDoubleReg, dst); // Convert __ cvttps2dq(dst, dst); // Set top bit if >=0 is now < 0 __ pand(kScratchDoubleReg, dst); __ psrad(kScratchDoubleReg, 31); // Set positive overflow lanes to 0x7FFFFFFF __ pxor(dst, kScratchDoubleReg); break; } case kAVXI32x4SConvertF32x4: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(0); // NAN->0 __ vcmpeqps(kScratchDoubleReg, src, src); __ vpand(dst, src, kScratchDoubleReg); // Set top bit if >= 0 (but not -0.0!) __ vpxor(kScratchDoubleReg, kScratchDoubleReg, dst); // Convert __ vcvttps2dq(dst, dst); // Set top bit if >=0 is now < 0 __ vpand(kScratchDoubleReg, kScratchDoubleReg, dst); __ vpsrad(kScratchDoubleReg, kScratchDoubleReg, 31); // Set positive overflow lanes to 0x7FFFFFFF __ vpxor(dst, dst, kScratchDoubleReg); break; } case kIA32I32x4SConvertI16x8Low: { __ Pmovsxwd(i.OutputSimd128Register(), i.InputOperand(0)); break; } case kIA32I32x4SConvertI16x8High: { XMMRegister dst = i.OutputSimd128Register(); __ Palignr(dst, i.InputOperand(0), 8); __ Pmovsxwd(dst, dst); break; } case kIA32I32x4Neg: { XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(0); if (src.is_reg(dst)) { __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ Psignd(dst, kScratchDoubleReg); } else { __ Pxor(dst, dst); __ Psubd(dst, src); } break; } case kSSEI32x4Shl: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pslld(i.OutputSimd128Register(), i.InputInt8(1)); break; } case kAVXI32x4Shl: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpslld(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputInt8(1)); break; } case kSSEI32x4ShrS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ psrad(i.OutputSimd128Register(), i.InputInt8(1)); break; } case kAVXI32x4ShrS: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpsrad(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputInt8(1)); break; } case kSSEI32x4Add: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ paddd(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI32x4Add: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpaddd(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI32x4AddHoriz: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSSE3); __ phaddd(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI32x4AddHoriz: { CpuFeatureScope avx_scope(tasm(), AVX); __ vphaddd(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI32x4Sub: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ psubd(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI32x4Sub: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpsubd(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI32x4Mul: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); __ pmulld(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI32x4Mul: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpmulld(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI32x4MinS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); __ pminsd(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI32x4MinS: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpminsd(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI32x4MaxS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); __ pmaxsd(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI32x4MaxS: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI32x4Eq: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI32x4Eq: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI32x4Ne: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1)); __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); break; } case kAVXI32x4Ne: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(), kScratchDoubleReg); break; } case kSSEI32x4GtS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pcmpgtd(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI32x4GtS: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI32x4GeS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(1); __ pminsd(dst, src); __ pcmpeqd(dst, src); break; } case kAVXI32x4GeS: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister src1 = i.InputSimd128Register(0); Operand src2 = i.InputOperand(1); __ vpminsd(kScratchDoubleReg, src1, src2); __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2); break; } case kSSEI32x4UConvertF32x4: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); // NAN->0, negative->0 __ pxor(kScratchDoubleReg, kScratchDoubleReg); __ maxps(dst, kScratchDoubleReg); // scratch: float representation of max_signed __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ psrld(kScratchDoubleReg, 1); // 0x7fffffff __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000 // tmp: convert (src-max_signed). // Positive overflow lanes -> 0x7FFFFFFF // Negative lanes -> 0 __ movaps(tmp, dst); __ subps(tmp, kScratchDoubleReg); __ cmpleps(kScratchDoubleReg, tmp); __ cvttps2dq(tmp, tmp); __ pxor(tmp, kScratchDoubleReg); __ pxor(kScratchDoubleReg, kScratchDoubleReg); __ pmaxsd(tmp, kScratchDoubleReg); // convert. Overflow lanes above max_signed will be 0x80000000 __ cvttps2dq(dst, dst); // Add (src-max_signed) for overflow lanes. __ paddd(dst, tmp); break; } case kAVXI32x4UConvertF32x4: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); // NAN->0, negative->0 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vmaxps(dst, dst, kScratchDoubleReg); // scratch: float representation of max_signed __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1); // 0x7fffffff __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000 // tmp: convert (src-max_signed). // Positive overflow lanes -> 0x7FFFFFFF // Negative lanes -> 0 __ vsubps(tmp, dst, kScratchDoubleReg); __ vcmpleps(kScratchDoubleReg, kScratchDoubleReg, tmp); __ vcvttps2dq(tmp, tmp); __ vpxor(tmp, tmp, kScratchDoubleReg); __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vpmaxsd(tmp, tmp, kScratchDoubleReg); // convert. Overflow lanes above max_signed will be 0x80000000 __ vcvttps2dq(dst, dst); // Add (src-max_signed) for overflow lanes. __ vpaddd(dst, dst, tmp); break; } case kIA32I32x4UConvertI16x8Low: { __ Pmovzxwd(i.OutputSimd128Register(), i.InputOperand(0)); break; } case kIA32I32x4UConvertI16x8High: { XMMRegister dst = i.OutputSimd128Register(); __ Palignr(dst, i.InputOperand(0), 8); __ Pmovzxwd(dst, dst); break; } case kSSEI32x4ShrU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ psrld(i.OutputSimd128Register(), i.InputInt8(1)); break; } case kAVXI32x4ShrU: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpsrld(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputInt8(1)); break; } case kSSEI32x4MinU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); __ pminud(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI32x4MinU: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpminud(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI32x4MaxU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); __ pmaxud(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI32x4MaxU: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpmaxud(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI32x4GtU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(1); __ pmaxud(dst, src); __ pcmpeqd(dst, src); __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ pxor(dst, kScratchDoubleReg); break; } case kAVXI32x4GtU: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src1 = i.InputSimd128Register(0); Operand src2 = i.InputOperand(1); __ vpmaxud(kScratchDoubleReg, src1, src2); __ vpcmpeqd(dst, kScratchDoubleReg, src2); __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vpxor(dst, dst, kScratchDoubleReg); break; } case kSSEI32x4GeU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(1); __ pminud(dst, src); __ pcmpeqd(dst, src); break; } case kAVXI32x4GeU: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister src1 = i.InputSimd128Register(0); Operand src2 = i.InputOperand(1); __ vpminud(kScratchDoubleReg, src1, src2); __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2); break; } case kIA32I16x8Splat: { XMMRegister dst = i.OutputSimd128Register(); __ Movd(dst, i.InputOperand(0)); __ Pshuflw(dst, dst, 0x0); __ Pshufd(dst, dst, 0x0); break; } case kIA32I16x8ExtractLane: { Register dst = i.OutputRegister(); __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1)); __ movsx_w(dst, dst); break; } case kSSEI16x8ReplaceLane: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); break; } case kAVXI16x8ReplaceLane: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpinsrw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(2), i.InputInt8(1)); break; } case kIA32I16x8SConvertI8x16Low: { __ Pmovsxbw(i.OutputSimd128Register(), i.InputOperand(0)); break; } case kIA32I16x8SConvertI8x16High: { XMMRegister dst = i.OutputSimd128Register(); __ Palignr(dst, i.InputOperand(0), 8); __ Pmovsxbw(dst, dst); break; } case kIA32I16x8Neg: { XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(0); if (src.is_reg(dst)) { __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ Psignw(dst, kScratchDoubleReg); } else { __ Pxor(dst, dst); __ Psubw(dst, src); } break; } case kSSEI16x8Shl: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ psllw(i.OutputSimd128Register(), i.InputInt8(1)); break; } case kAVXI16x8Shl: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpsllw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputInt8(1)); break; } case kSSEI16x8ShrS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ psraw(i.OutputSimd128Register(), i.InputInt8(1)); break; } case kAVXI16x8ShrS: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpsraw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputInt8(1)); break; } case kSSEI16x8SConvertI32x4: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ packssdw(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI16x8SConvertI32x4: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpackssdw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI16x8Add: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ paddw(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI16x8Add: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpaddw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI16x8AddSaturateS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ paddsw(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI16x8AddSaturateS: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpaddsw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI16x8AddHoriz: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSSE3); __ phaddw(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI16x8AddHoriz: { CpuFeatureScope avx_scope(tasm(), AVX); __ vphaddw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI16x8Sub: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ psubw(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI16x8Sub: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpsubw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI16x8SubSaturateS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ psubsw(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI16x8SubSaturateS: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpsubsw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI16x8Mul: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pmullw(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI16x8Mul: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpmullw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI16x8MinS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pminsw(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI16x8MinS: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpminsw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI16x8MaxS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pmaxsw(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI16x8MaxS: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI16x8Eq: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI16x8Eq: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI16x8Ne: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1)); __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); break; } case kAVXI16x8Ne: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(), kScratchDoubleReg); break; } case kSSEI16x8GtS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pcmpgtw(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI16x8GtS: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI16x8GeS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(1); __ pminsw(dst, src); __ pcmpeqw(dst, src); break; } case kAVXI16x8GeS: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister src1 = i.InputSimd128Register(0); Operand src2 = i.InputOperand(1); __ vpminsw(kScratchDoubleReg, src1, src2); __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2); break; } case kIA32I16x8UConvertI8x16Low: { __ Pmovzxbw(i.OutputSimd128Register(), i.InputOperand(0)); break; } case kIA32I16x8UConvertI8x16High: { XMMRegister dst = i.OutputSimd128Register(); __ Palignr(dst, i.InputOperand(0), 8); __ Pmovzxbw(dst, dst); break; } case kSSEI16x8ShrU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ psrlw(i.OutputSimd128Register(), i.InputInt8(1)); break; } case kAVXI16x8ShrU: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpsrlw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputInt8(1)); break; } case kSSEI16x8UConvertI32x4: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); // Change negative lanes to 0x7FFFFFFF __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ psrld(kScratchDoubleReg, 1); __ pminud(dst, kScratchDoubleReg); __ pminud(kScratchDoubleReg, i.InputOperand(1)); __ packusdw(dst, kScratchDoubleReg); break; } case kAVXI16x8UConvertI32x4: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); // Change negative lanes to 0x7FFFFFFF __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1); __ vpminud(dst, kScratchDoubleReg, i.InputSimd128Register(0)); __ vpminud(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1)); __ vpackusdw(dst, dst, kScratchDoubleReg); break; } case kSSEI16x8AddSaturateU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ paddusw(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI16x8AddSaturateU: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpaddusw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI16x8SubSaturateU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ psubusw(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI16x8SubSaturateU: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpsubusw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI16x8MinU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); __ pminuw(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI16x8MinU: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpminuw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI16x8MaxU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); __ pmaxuw(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI16x8MaxU: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI16x8GtU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(1); __ pmaxuw(dst, src); __ pcmpeqw(dst, src); __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); __ pxor(dst, kScratchDoubleReg); break; } case kAVXI16x8GtU: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src1 = i.InputSimd128Register(0); Operand src2 = i.InputOperand(1); __ vpmaxuw(kScratchDoubleReg, src1, src2); __ vpcmpeqw(dst, kScratchDoubleReg, src2); __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vpxor(dst, dst, kScratchDoubleReg); break; } case kSSEI16x8GeU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(1); __ pminuw(dst, src); __ pcmpeqw(dst, src); break; } case kAVXI16x8GeU: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister src1 = i.InputSimd128Register(0); Operand src2 = i.InputOperand(1); __ vpminuw(kScratchDoubleReg, src1, src2); __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2); break; } case kIA32I8x16Splat: { XMMRegister dst = i.OutputSimd128Register(); __ Movd(dst, i.InputOperand(0)); __ Pxor(kScratchDoubleReg, kScratchDoubleReg); __ Pshufb(dst, kScratchDoubleReg); break; } case kIA32I8x16ExtractLane: { Register dst = i.OutputRegister(); __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1)); __ movsx_b(dst, dst); break; } case kSSEI8x16ReplaceLane: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); break; } case kAVXI8x16ReplaceLane: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpinsrb(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(2), i.InputInt8(1)); break; } case kSSEI8x16SConvertI16x8: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ packsswb(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI8x16SConvertI16x8: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpacksswb(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kIA32I8x16Neg: { XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(0); if (src.is_reg(dst)) { __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ Psignb(dst, kScratchDoubleReg); } else { __ Pxor(dst, dst); __ Psubb(dst, src); } break; } case kSSEI8x16Shl: { XMMRegister dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); int8_t shift = i.InputInt8(1) & 0x7; if (shift < 4) { // For small shifts, doubling is faster. for (int i = 0; i < shift; ++i) { __ paddb(dst, dst); } } else { // Mask off the unwanted bits before word-shifting. __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); __ psrlw(kScratchDoubleReg, 8 + shift); __ packuswb(kScratchDoubleReg, kScratchDoubleReg); __ pand(dst, kScratchDoubleReg); __ psllw(dst, shift); } break; } case kAVXI8x16Shl: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(0); int8_t shift = i.InputInt8(1) & 0x7; if (shift < 4) { // For small shifts, doubling is faster. for (int i = 0; i < shift; ++i) { __ vpaddb(dst, src, src); src = dst; } } else { // Mask off the unwanted bits before word-shifting. __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8 + shift); __ vpackuswb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vpand(dst, src, kScratchDoubleReg); __ vpsllw(dst, dst, shift); } break; } case kIA32I8x16ShrS: { XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(0); int8_t shift = i.InputInt8(1) & 0x7; // Unpack the bytes into words, do arithmetic shifts, and repack. __ Punpckhbw(kScratchDoubleReg, src); __ Punpcklbw(dst, src); __ Psraw(kScratchDoubleReg, 8 + shift); __ Psraw(dst, 8 + shift); __ Packsswb(dst, kScratchDoubleReg); break; } case kSSEI8x16Add: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ paddb(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI8x16Add: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpaddb(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI8x16AddSaturateS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ paddsb(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI8x16AddSaturateS: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpaddsb(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI8x16Sub: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ psubb(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI8x16Sub: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpsubb(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI8x16SubSaturateS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ psubsb(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI8x16SubSaturateS: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpsubsb(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI8x16Mul: { XMMRegister dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); XMMRegister right = i.InputSimd128Register(1); XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); // I16x8 view of I8x16 // left = AAaa AAaa ... AAaa AAaa // right= BBbb BBbb ... BBbb BBbb // t = 00AA 00AA ... 00AA 00AA // s = 00BB 00BB ... 00BB 00BB __ movaps(tmp, dst); __ movaps(kScratchDoubleReg, right); __ psrlw(tmp, 8); __ psrlw(kScratchDoubleReg, 8); // dst = left * 256 __ psllw(dst, 8); // t = I16x8Mul(t, s) // => __PP __PP ... __PP __PP __ pmullw(tmp, kScratchDoubleReg); // dst = I16x8Mul(left * 256, right) // => pp__ pp__ ... pp__ pp__ __ pmullw(dst, right); // t = I16x8Shl(t, 8) // => PP00 PP00 ... PP00 PP00 __ psllw(tmp, 8); // dst = I16x8Shr(dst, 8) // => 00pp 00pp ... 00pp 00pp __ psrlw(dst, 8); // dst = I16x8Or(dst, t) // => PPpp PPpp ... PPpp PPpp __ por(dst, tmp); break; } case kAVXI8x16Mul: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); XMMRegister left = i.InputSimd128Register(0); XMMRegister right = i.InputSimd128Register(1); XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0)); // I16x8 view of I8x16 // left = AAaa AAaa ... AAaa AAaa // right= BBbb BBbb ... BBbb BBbb // t = 00AA 00AA ... 00AA 00AA // s = 00BB 00BB ... 00BB 00BB __ vpsrlw(tmp, left, 8); __ vpsrlw(kScratchDoubleReg, right, 8); // t = I16x8Mul(t0, t1) // => __PP __PP ... __PP __PP __ vpmullw(tmp, tmp, kScratchDoubleReg); // s = left * 256 __ vpsllw(kScratchDoubleReg, left, 8); // dst = I16x8Mul(left * 256, right) // => pp__ pp__ ... pp__ pp__ __ vpmullw(dst, kScratchDoubleReg, right); // dst = I16x8Shr(dst, 8) // => 00pp 00pp ... 00pp 00pp __ vpsrlw(dst, dst, 8); // t = I16x8Shl(t, 8) // => PP00 PP00 ... PP00 PP00 __ vpsllw(tmp, tmp, 8); // dst = I16x8Or(dst, t) // => PPpp PPpp ... PPpp PPpp __ vpor(dst, dst, tmp); break; } case kSSEI8x16MinS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); __ pminsb(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI8x16MinS: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpminsb(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI8x16MaxS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); __ pmaxsb(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI8x16MaxS: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI8x16Eq: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI8x16Eq: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI8x16Ne: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1)); __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg); __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); break; } case kAVXI8x16Ne: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(), kScratchDoubleReg); break; } case kSSEI8x16GtS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pcmpgtb(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI8x16GtS: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI8x16GeS: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(1); __ pminsb(dst, src); __ pcmpeqb(dst, src); break; } case kAVXI8x16GeS: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister src1 = i.InputSimd128Register(0); Operand src2 = i.InputOperand(1); __ vpminsb(kScratchDoubleReg, src1, src2); __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2); break; } case kSSEI8x16UConvertI16x8: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); // Change negative lanes to 0x7FFF __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg); __ psrlw(kScratchDoubleReg, 1); __ pminuw(dst, kScratchDoubleReg); __ pminuw(kScratchDoubleReg, i.InputOperand(1)); __ packuswb(dst, kScratchDoubleReg); break; } case kAVXI8x16UConvertI16x8: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); // Change negative lanes to 0x7FFF __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 1); __ vpminuw(dst, kScratchDoubleReg, i.InputSimd128Register(0)); __ vpminuw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1)); __ vpackuswb(dst, dst, kScratchDoubleReg); break; } case kSSEI8x16AddSaturateU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ paddusb(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI8x16AddSaturateU: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpaddusb(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI8x16SubSaturateU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ psubusb(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI8x16SubSaturateU: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpsubusb(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kIA32I8x16ShrU: { XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(0); int8_t shift = i.InputInt8(1) & 0x7; // Unpack the bytes into words, do logical shifts, and repack. __ Punpckhbw(kScratchDoubleReg, src); __ Punpcklbw(dst, src); __ Psrlw(kScratchDoubleReg, 8 + shift); __ Psrlw(dst, 8 + shift); __ Packuswb(dst, kScratchDoubleReg); break; } case kSSEI8x16MinU: { XMMRegister dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); __ pminub(dst, i.InputOperand(1)); break; } case kAVXI8x16MinU: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpminub(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI8x16MaxU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pmaxub(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXI8x16MaxU: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSEI8x16GtU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(1); __ pmaxub(dst, src); __ pcmpeqb(dst, src); __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg); __ pxor(dst, kScratchDoubleReg); break; } case kAVXI8x16GtU: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src1 = i.InputSimd128Register(0); Operand src2 = i.InputOperand(1); __ vpmaxub(kScratchDoubleReg, src1, src2); __ vpcmpeqb(dst, kScratchDoubleReg, src2); __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vpxor(dst, dst, kScratchDoubleReg); break; } case kSSEI8x16GeU: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(1); __ pminub(dst, src); __ pcmpeqb(dst, src); break; } case kAVXI8x16GeU: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister src1 = i.InputSimd128Register(0); Operand src2 = i.InputOperand(1); __ vpminub(kScratchDoubleReg, src1, src2); __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2); break; } case kIA32S128Zero: { XMMRegister dst = i.OutputSimd128Register(); __ Pxor(dst, dst); break; } case kSSES128Not: { XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(0); if (src.is_reg(dst)) { __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ pxor(dst, kScratchDoubleReg); } else { __ pcmpeqd(dst, dst); __ pxor(dst, src); } break; } case kAVXS128Not: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); __ vpxor(i.OutputSimd128Register(), kScratchDoubleReg, i.InputOperand(0)); break; } case kSSES128And: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pand(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXS128And: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpand(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSES128Or: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ por(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXS128Or: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpor(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSES128Xor: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ pxor(i.OutputSimd128Register(), i.InputOperand(1)); break; } case kAVXS128Xor: { CpuFeatureScope avx_scope(tasm(), AVX); __ vpxor(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputOperand(1)); break; } case kSSES128Select: { DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); // Mask used here is stored in dst. XMMRegister dst = i.OutputSimd128Register(); __ movaps(kScratchDoubleReg, i.InputSimd128Register(1)); __ xorps(kScratchDoubleReg, i.InputSimd128Register(2)); __ andps(dst, kScratchDoubleReg); __ xorps(dst, i.InputSimd128Register(2)); break; } case kAVXS128Select: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); __ vxorps(kScratchDoubleReg, i.InputSimd128Register(2), i.InputOperand(1)); __ vandps(dst, kScratchDoubleReg, i.InputOperand(0)); __ vxorps(dst, dst, i.InputSimd128Register(2)); break; } case kIA32S8x16Shuffle: { XMMRegister dst = i.OutputSimd128Register(); Operand src0 = i.InputOperand(0); Register tmp = i.TempRegister(0); // Prepare 16 byte aligned buffer for shuffle control mask __ mov(tmp, esp); __ and_(esp, -16); if (instr->InputCount() == 5) { // only one input operand DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); for (int j = 4; j > 0; j--) { uint32_t mask = i.InputUint32(j); __ push(Immediate(mask)); } __ Pshufb(dst, Operand(esp, 0)); } else { // two input operands DCHECK_EQ(6, instr->InputCount()); __ movups(kScratchDoubleReg, src0); for (int j = 5; j > 1; j--) { uint32_t lanes = i.InputUint32(j); uint32_t mask = 0; for (int k = 0; k < 32; k += 8) { uint8_t lane = lanes >> k; mask |= (lane < kSimd128Size ? lane : 0x80) << k; } __ push(Immediate(mask)); } __ Pshufb(kScratchDoubleReg, Operand(esp, 0)); Operand src1 = i.InputOperand(1); if (!src1.is_reg(dst)) __ movups(dst, src1); for (int j = 5; j > 1; j--) { uint32_t lanes = i.InputUint32(j); uint32_t mask = 0; for (int k = 0; k < 32; k += 8) { uint8_t lane = lanes >> k; mask |= (lane >= kSimd128Size ? (lane & 0xF) : 0x80) << k; } __ push(Immediate(mask)); } __ Pshufb(dst, Operand(esp, 0)); __ por(dst, kScratchDoubleReg); } __ mov(esp, tmp); break; } case kIA32S32x4Swizzle: { DCHECK_EQ(2, instr->InputCount()); __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1)); break; } case kIA32S32x4Shuffle: { DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above. int8_t shuffle = i.InputInt8(2); DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below. __ Pshufd(kScratchDoubleReg, i.InputOperand(1), shuffle); __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), shuffle); __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3)); break; } case kIA32S16x8Blend: ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2)); break; case kIA32S16x8HalfShuffle1: { XMMRegister dst = i.OutputSimd128Register(); __ Pshuflw(dst, i.InputOperand(0), i.InputInt8(1)); __ Pshufhw(dst, dst, i.InputInt8(2)); break; } case kIA32S16x8HalfShuffle2: { XMMRegister dst = i.OutputSimd128Register(); __ Pshuflw(kScratchDoubleReg, i.InputOperand(1), i.InputInt8(2)); __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3)); __ Pshuflw(dst, i.InputOperand(0), i.InputInt8(2)); __ Pshufhw(dst, dst, i.InputInt8(3)); __ Pblendw(dst, kScratchDoubleReg, i.InputInt8(4)); break; } case kIA32S8x16Alignr: ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2)); break; case kIA32S16x8Dup: { XMMRegister dst = i.OutputSimd128Register(); Operand src = i.InputOperand(0); int8_t lane = i.InputInt8(1) & 0x7; int8_t lane4 = lane & 0x3; int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6); if (lane < 4) { __ Pshuflw(dst, src, half_dup); __ Pshufd(dst, dst, 0); } else { __ Pshufhw(dst, src, half_dup); __ Pshufd(dst, dst, 0xaa); } break; } case kIA32S8x16Dup: { XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = i.InputSimd128Register(0); int8_t lane = i.InputInt8(1) & 0xf; if (CpuFeatures::IsSupported(AVX)) { CpuFeatureScope avx_scope(tasm(), AVX); if (lane < 8) { __ vpunpcklbw(dst, src, src); } else { __ vpunpckhbw(dst, src, src); } } else { DCHECK_EQ(dst, src); if (lane < 8) { __ punpcklbw(dst, dst); } else { __ punpckhbw(dst, dst); } } lane &= 0x7; int8_t lane4 = lane & 0x3; int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6); if (lane < 4) { __ Pshuflw(dst, dst, half_dup); __ Pshufd(dst, dst, 0); } else { __ Pshufhw(dst, dst, half_dup); __ Pshufd(dst, dst, 0xaa); } break; } case kIA32S64x2UnpackHigh: ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq); break; case kIA32S32x4UnpackHigh: ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq); break; case kIA32S16x8UnpackHigh: ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd); break; case kIA32S8x16UnpackHigh: ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw); break; case kIA32S64x2UnpackLow: ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq); break; case kIA32S32x4UnpackLow: ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq); break; case kIA32S16x8UnpackLow: ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd); break; case kIA32S8x16UnpackLow: ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw); break; case kSSES16x8UnzipHigh: { CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src2 = dst; DCHECK_EQ(dst, i.InputSimd128Register(0)); if (instr->InputCount() == 2) { __ movups(kScratchDoubleReg, i.InputOperand(1)); __ psrld(kScratchDoubleReg, 16); src2 = kScratchDoubleReg; } __ psrld(dst, 16); __ packusdw(dst, src2); break; } case kAVXS16x8UnzipHigh: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src2 = dst; if (instr->InputCount() == 2) { __ vpsrld(kScratchDoubleReg, i.InputSimd128Register(1), 16); src2 = kScratchDoubleReg; } __ vpsrld(dst, i.InputSimd128Register(0), 16); __ vpackusdw(dst, dst, src2); break; } case kSSES16x8UnzipLow: { CpuFeatureScope sse_scope(tasm(), SSE4_1); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src2 = dst; DCHECK_EQ(dst, i.InputSimd128Register(0)); __ pxor(kScratchDoubleReg, kScratchDoubleReg); if (instr->InputCount() == 2) { __ pblendw(kScratchDoubleReg, i.InputOperand(1), 0x55); src2 = kScratchDoubleReg; } __ pblendw(dst, kScratchDoubleReg, 0xaa); __ packusdw(dst, src2); break; } case kAVXS16x8UnzipLow: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src2 = dst; __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); if (instr->InputCount() == 2) { __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1), 0x55); src2 = kScratchDoubleReg; } __ vpblendw(dst, kScratchDoubleReg, i.InputSimd128Register(0), 0x55); __ vpackusdw(dst, dst, src2); break; } case kSSES8x16UnzipHigh: { XMMRegister dst = i.OutputSimd128Register(); XMMRegister src2 = dst; DCHECK_EQ(dst, i.InputSimd128Register(0)); if (instr->InputCount() == 2) { __ movups(kScratchDoubleReg, i.InputOperand(1)); __ psrlw(kScratchDoubleReg, 8); src2 = kScratchDoubleReg; } __ psrlw(dst, 8); __ packuswb(dst, src2); break; } case kAVXS8x16UnzipHigh: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src2 = dst; if (instr->InputCount() == 2) { __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8); src2 = kScratchDoubleReg; } __ vpsrlw(dst, i.InputSimd128Register(0), 8); __ vpackuswb(dst, dst, src2); break; } case kSSES8x16UnzipLow: { XMMRegister dst = i.OutputSimd128Register(); XMMRegister src2 = dst; DCHECK_EQ(dst, i.InputSimd128Register(0)); if (instr->InputCount() == 2) { __ movups(kScratchDoubleReg, i.InputOperand(1)); __ psllw(kScratchDoubleReg, 8); __ psrlw(kScratchDoubleReg, 8); src2 = kScratchDoubleReg; } __ psllw(dst, 8); __ psrlw(dst, 8); __ packuswb(dst, src2); break; } case kAVXS8x16UnzipLow: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src2 = dst; if (instr->InputCount() == 2) { __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8); __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8); src2 = kScratchDoubleReg; } __ vpsllw(dst, i.InputSimd128Register(0), 8); __ vpsrlw(dst, dst, 8); __ vpackuswb(dst, dst, src2); break; } case kSSES8x16TransposeLow: { XMMRegister dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); __ psllw(dst, 8); if (instr->InputCount() == 1) { __ movups(kScratchDoubleReg, dst); } else { DCHECK_EQ(2, instr->InputCount()); __ movups(kScratchDoubleReg, i.InputOperand(1)); __ psllw(kScratchDoubleReg, 8); } __ psrlw(dst, 8); __ por(dst, kScratchDoubleReg); break; } case kAVXS8x16TransposeLow: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); if (instr->InputCount() == 1) { __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(0), 8); __ vpsrlw(dst, kScratchDoubleReg, 8); } else { DCHECK_EQ(2, instr->InputCount()); __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8); __ vpsllw(dst, i.InputSimd128Register(0), 8); __ vpsrlw(dst, dst, 8); } __ vpor(dst, dst, kScratchDoubleReg); break; } case kSSES8x16TransposeHigh: { XMMRegister dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); __ psrlw(dst, 8); if (instr->InputCount() == 1) { __ movups(kScratchDoubleReg, dst); } else { DCHECK_EQ(2, instr->InputCount()); __ movups(kScratchDoubleReg, i.InputOperand(1)); __ psrlw(kScratchDoubleReg, 8); } __ psllw(kScratchDoubleReg, 8); __ por(dst, kScratchDoubleReg); break; } case kAVXS8x16TransposeHigh: { CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); if (instr->InputCount() == 1) { __ vpsrlw(dst, i.InputSimd128Register(0), 8); __ vpsllw(kScratchDoubleReg, dst, 8); } else { DCHECK_EQ(2, instr->InputCount()); __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8); __ vpsrlw(dst, i.InputSimd128Register(0), 8); __ vpsllw(kScratchDoubleReg, kScratchDoubleReg, 8); } __ vpor(dst, dst, kScratchDoubleReg); break; } case kSSES8x8Reverse: case kSSES8x4Reverse: case kSSES8x2Reverse: { DCHECK_EQ(1, instr->InputCount()); XMMRegister dst = i.OutputSimd128Register(); DCHECK_EQ(dst, i.InputSimd128Register(0)); if (arch_opcode != kSSES8x2Reverse) { // First shuffle words into position. int8_t shuffle_mask = arch_opcode == kSSES8x4Reverse ? 0xB1 : 0x1B; __ pshuflw(dst, dst, shuffle_mask); __ pshufhw(dst, dst, shuffle_mask); } __ movaps(kScratchDoubleReg, dst); __ psrlw(kScratchDoubleReg, 8); __ psllw(dst, 8); __ por(dst, kScratchDoubleReg); break; } case kAVXS8x2Reverse: case kAVXS8x4Reverse: case kAVXS8x8Reverse: { DCHECK_EQ(1, instr->InputCount()); CpuFeatureScope avx_scope(tasm(), AVX); XMMRegister dst = i.OutputSimd128Register(); XMMRegister src = dst; if (arch_opcode != kAVXS8x2Reverse) { // First shuffle words into position. int8_t shuffle_mask = arch_opcode == kAVXS8x4Reverse ? 0xB1 : 0x1B; __ vpshuflw(dst, i.InputOperand(0), shuffle_mask); __ vpshufhw(dst, dst, shuffle_mask); } else { src = i.InputSimd128Register(0); } // Reverse each 16 bit lane. __ vpsrlw(kScratchDoubleReg, src, 8); __ vpsllw(dst, src, 8); __ vpor(dst, dst, kScratchDoubleReg); break; } case kIA32S1x4AnyTrue: case kIA32S1x8AnyTrue: case kIA32S1x16AnyTrue: { Register dst = i.OutputRegister(); XMMRegister src = i.InputSimd128Register(0); Register tmp = i.TempRegister(0); __ xor_(tmp, tmp); __ mov(dst, Immediate(-1)); __ Ptest(src, src); __ cmov(zero, dst, tmp); break; } case kIA32S1x4AllTrue: case kIA32S1x8AllTrue: case kIA32S1x16AllTrue: { Register dst = i.OutputRegister(); Operand src = i.InputOperand(0); Register tmp = i.TempRegister(0); __ mov(tmp, Immediate(-1)); __ xor_(dst, dst); // Compare all src lanes to false. __ Pxor(kScratchDoubleReg, kScratchDoubleReg); if (arch_opcode == kIA32S1x4AllTrue) { __ Pcmpeqd(kScratchDoubleReg, src); } else if (arch_opcode == kIA32S1x8AllTrue) { __ Pcmpeqw(kScratchDoubleReg, src); } else { __ Pcmpeqb(kScratchDoubleReg, src); } // If kScratchDoubleReg is all zero, none of src lanes are false. __ Ptest(kScratchDoubleReg, kScratchDoubleReg); __ cmov(zero, dst, tmp); break; } case kIA32StackCheck: { ExternalReference const stack_limit = ExternalReference::address_of_stack_limit(__ isolate()); __ VerifyRootRegister(); __ cmp(esp, tasm()->StaticVariable(stack_limit)); break; } case kIA32Word32AtomicPairLoad: { XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0)); __ movq(tmp, i.MemoryOperand()); __ Pextrd(i.OutputRegister(0), tmp, 0); __ Pextrd(i.OutputRegister(1), tmp, 1); break; } case kIA32Word32AtomicPairStore: { __ mov(i.TempRegister(0), i.MemoryOperand(2)); __ mov(i.TempRegister(1), i.NextMemoryOperand(2)); __ lock(); __ cmpxchg8b(i.MemoryOperand(2)); break; } case kWord32AtomicExchangeInt8: { __ xchg_b(i.InputRegister(0), i.MemoryOperand(1)); __ movsx_b(i.InputRegister(0), i.InputRegister(0)); break; } case kWord32AtomicExchangeUint8: { __ xchg_b(i.InputRegister(0), i.MemoryOperand(1)); __ movzx_b(i.InputRegister(0), i.InputRegister(0)); break; } case kWord32AtomicExchangeInt16: { __ xchg_w(i.InputRegister(0), i.MemoryOperand(1)); __ movsx_w(i.InputRegister(0), i.InputRegister(0)); break; } case kWord32AtomicExchangeUint16: { __ xchg_w(i.InputRegister(0), i.MemoryOperand(1)); __ movzx_w(i.InputRegister(0), i.InputRegister(0)); break; } case kWord32AtomicExchangeWord32: { __ xchg(i.InputRegister(0), i.MemoryOperand(1)); break; } // For the narrow Word64 operations below, i.OutputRegister(1) contains // the high-order 32 bits for the 64bit operation. As the data exchange // fits in one register, the i.OutputRegister(1) needs to be cleared for // the correct return value to be propagated back. case kIA32Word64AtomicNarrowExchangeUint8: { __ xchg_b(i.OutputRegister(0), i.MemoryOperand(1)); __ movzx_b(i.OutputRegister(0), i.OutputRegister(0)); __ xor_(i.OutputRegister(1), i.OutputRegister(1)); break; } case kIA32Word64AtomicNarrowExchangeUint16: { __ xchg_w(i.OutputRegister(0), i.MemoryOperand(1)); __ movzx_w(i.OutputRegister(0), i.OutputRegister(0)); __ xor_(i.OutputRegister(1), i.OutputRegister(1)); break; } case kIA32Word64AtomicNarrowExchangeUint32: { __ xchg(i.OutputRegister(0), i.MemoryOperand(1)); __ xor_(i.OutputRegister(1), i.OutputRegister(1)); break; } case kIA32Word32AtomicPairExchange: { __ mov(i.OutputRegister(0), i.MemoryOperand(2)); __ mov(i.OutputRegister(1), i.NextMemoryOperand(2)); __ lock(); __ cmpxchg8b(i.MemoryOperand(2)); break; } case kWord32AtomicCompareExchangeInt8: { __ lock(); __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1)); __ movsx_b(eax, eax); break; } case kWord32AtomicCompareExchangeUint8: { __ lock(); __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1)); __ movzx_b(eax, eax); break; } case kWord32AtomicCompareExchangeInt16: { __ lock(); __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1)); __ movsx_w(eax, eax); break; } case kWord32AtomicCompareExchangeUint16: { __ lock(); __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1)); __ movzx_w(eax, eax); break; } case kWord32AtomicCompareExchangeWord32: { __ lock(); __ cmpxchg(i.MemoryOperand(2), i.InputRegister(1)); break; } case kIA32Word64AtomicNarrowCompareExchangeUint8: { __ lock(); __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1)); __ movzx_b(i.OutputRegister(0), i.OutputRegister(0)); __ xor_(i.OutputRegister(1), i.OutputRegister(1)); break; } case kIA32Word64AtomicNarrowCompareExchangeUint16: { __ lock(); __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1)); __ movzx_w(i.OutputRegister(0), i.OutputRegister(0)); __ xor_(i.OutputRegister(1), i.OutputRegister(1)); break; } case kIA32Word64AtomicNarrowCompareExchangeUint32: { __ lock(); __ cmpxchg(i.MemoryOperand(2), i.InputRegister(1)); __ xor_(i.OutputRegister(1), i.OutputRegister(1)); break; } case kIA32Word32AtomicPairCompareExchange: { __ lock(); __ cmpxchg8b(i.MemoryOperand(4)); break; } #define ATOMIC_BINOP_CASE(op, inst) \ case kWord32Atomic##op##Int8: { \ ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \ __ movsx_b(eax, eax); \ break; \ } \ case kIA32Word64AtomicNarrow##op##Uint8: { \ ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \ __ movzx_b(i.OutputRegister(0), i.OutputRegister(0)); \ __ xor_(i.OutputRegister(1), i.OutputRegister(1)); \ break; \ } \ case kWord32Atomic##op##Uint8: { \ ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \ __ movzx_b(eax, eax); \ break; \ } \ case kWord32Atomic##op##Int16: { \ ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \ __ movsx_w(eax, eax); \ break; \ } \ case kIA32Word64AtomicNarrow##op##Uint16: { \ ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \ __ movzx_w(i.OutputRegister(0), i.OutputRegister(0)); \ __ xor_(i.OutputRegister(1), i.OutputRegister(1)); \ break; \ } \ case kWord32Atomic##op##Uint16: { \ ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \ __ movzx_w(eax, eax); \ break; \ } \ case kIA32Word64AtomicNarrow##op##Uint32: { \ ASSEMBLE_ATOMIC_BINOP(inst, mov, cmpxchg); \ __ xor_(i.OutputRegister(1), i.OutputRegister(1)); \ break; \ } \ case kWord32Atomic##op##Word32: { \ ASSEMBLE_ATOMIC_BINOP(inst, mov, cmpxchg); \ break; \ } ATOMIC_BINOP_CASE(Add, add) ATOMIC_BINOP_CASE(Sub, sub) ATOMIC_BINOP_CASE(And, and_) ATOMIC_BINOP_CASE(Or, or_) ATOMIC_BINOP_CASE(Xor, xor_) #undef ATOMIC_BINOP_CASE #define ATOMIC_BINOP_CASE(op, instr1, instr2) \ case kIA32Word32AtomicPair##op: { \ ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \ break; \ } ATOMIC_BINOP_CASE(Add, add, adc) ATOMIC_BINOP_CASE(And, and_, and_) ATOMIC_BINOP_CASE(Or, or_, or_) ATOMIC_BINOP_CASE(Xor, xor_, xor_) #undef ATOMIC_BINOP_CASE case kIA32Word32AtomicPairSub: { Label binop; __ bind(&binop); // Move memory operand into edx:eax __ mov(i.OutputRegister(0), i.MemoryOperand(2)); __ mov(i.OutputRegister(1), i.NextMemoryOperand(2)); // Save input registers temporarily on the stack. __ push(i.InputRegister(0)); __ push(i.InputRegister(1)); // Negate input in place __ neg(i.InputRegister(0)); __ adc(i.InputRegister(1), 0); __ neg(i.InputRegister(1)); // Add memory operand, negated input. __ add(i.InputRegister(0), i.OutputRegister(0)); __ adc(i.InputRegister(1), i.OutputRegister(1)); __ lock(); __ cmpxchg8b(i.MemoryOperand(2)); // Restore input registers __ pop(i.InputRegister(1)); __ pop(i.InputRegister(0)); __ j(not_equal, &binop); break; } case kWord32AtomicLoadInt8: case kWord32AtomicLoadUint8: case kWord32AtomicLoadInt16: case kWord32AtomicLoadUint16: case kWord32AtomicLoadWord32: case kWord32AtomicStoreWord8: case kWord32AtomicStoreWord16: case kWord32AtomicStoreWord32: UNREACHABLE(); // Won't be generated by instruction selector. break; } return kSuccess; } // NOLINT(readability/fn_size) static Condition FlagsConditionToCondition(FlagsCondition condition) { switch (condition) { case kUnorderedEqual: case kEqual: return equal; break; case kUnorderedNotEqual: case kNotEqual: return not_equal; break; case kSignedLessThan: return less; break; case kSignedGreaterThanOrEqual: return greater_equal; break; case kSignedLessThanOrEqual: return less_equal; break; case kSignedGreaterThan: return greater; break; case kUnsignedLessThan: return below; break; case kUnsignedGreaterThanOrEqual: return above_equal; break; case kUnsignedLessThanOrEqual: return below_equal; break; case kUnsignedGreaterThan: return above; break; case kOverflow: return overflow; break; case kNotOverflow: return no_overflow; break; default: UNREACHABLE(); break; } } // Assembles a branch after an instruction. void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) { Label::Distance flabel_distance = branch->fallthru ? Label::kNear : Label::kFar; Label* tlabel = branch->true_label; Label* flabel = branch->false_label; if (branch->condition == kUnorderedEqual) { __ j(parity_even, flabel, flabel_distance); } else if (branch->condition == kUnorderedNotEqual) { __ j(parity_even, tlabel); } __ j(FlagsConditionToCondition(branch->condition), tlabel); // Add a jump if not falling through to the next block. if (!branch->fallthru) __ jmp(flabel); } void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition, Instruction* instr) { // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal). if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) { return; } condition = NegateFlagsCondition(condition); __ setcc(FlagsConditionToCondition(condition), kSpeculationPoisonRegister); __ add(kSpeculationPoisonRegister, Immediate(255)); __ sar(kSpeculationPoisonRegister, 31u); } void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr, BranchInfo* branch) { AssembleArchBranch(instr, branch); } void CodeGenerator::AssembleArchJump(RpoNumber target) { if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target)); } void CodeGenerator::AssembleArchTrap(Instruction* instr, FlagsCondition condition) { class OutOfLineTrap final : public OutOfLineCode { public: OutOfLineTrap(CodeGenerator* gen, Instruction* instr) : OutOfLineCode(gen), instr_(instr), gen_(gen) {} void Generate() final { IA32OperandConverter i(gen_, instr_); TrapId trap_id = static_cast(i.InputInt32(instr_->InputCount() - 1)); GenerateCallToTrap(trap_id); } private: void GenerateCallToTrap(TrapId trap_id) { if (trap_id == TrapId::kInvalid) { // We cannot test calls to the runtime in cctest/test-run-wasm. // Therefore we emit a call to C here instead of a call to the runtime. __ PrepareCallCFunction(0, esi); __ CallCFunction( ExternalReference::wasm_call_trap_callback_for_testing(), 0); __ LeaveFrame(StackFrame::WASM_COMPILED); auto call_descriptor = gen_->linkage()->GetIncomingDescriptor(); size_t pop_size = call_descriptor->StackParameterCount() * kPointerSize; // Use ecx as a scratch register, we return anyways immediately. __ Ret(static_cast(pop_size), ecx); } else { gen_->AssembleSourcePosition(instr_); // A direct call to a wasm runtime stub defined in this module. // Just encode the stub index. This will be patched at relocation. __ wasm_call(static_cast
(trap_id), RelocInfo::WASM_STUB_CALL); ReferenceMap* reference_map = new (gen_->zone()) ReferenceMap(gen_->zone()); gen_->RecordSafepoint(reference_map, Safepoint::kSimple, 0, Safepoint::kNoLazyDeopt); __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap); } } Instruction* instr_; CodeGenerator* gen_; }; auto ool = new (zone()) OutOfLineTrap(this, instr); Label* tlabel = ool->entry(); Label end; if (condition == kUnorderedEqual) { __ j(parity_even, &end); } else if (condition == kUnorderedNotEqual) { __ j(parity_even, tlabel); } __ j(FlagsConditionToCondition(condition), tlabel); __ bind(&end); } // Assembles boolean materializations after an instruction. void CodeGenerator::AssembleArchBoolean(Instruction* instr, FlagsCondition condition) { IA32OperandConverter i(this, instr); Label done; // Materialize a full 32-bit 1 or 0 value. The result register is always the // last output of the instruction. Label check; DCHECK_NE(0u, instr->OutputCount()); Register reg = i.OutputRegister(instr->OutputCount() - 1); if (condition == kUnorderedEqual) { __ j(parity_odd, &check, Label::kNear); __ Move(reg, Immediate(0)); __ jmp(&done, Label::kNear); } else if (condition == kUnorderedNotEqual) { __ j(parity_odd, &check, Label::kNear); __ mov(reg, Immediate(1)); __ jmp(&done, Label::kNear); } Condition cc = FlagsConditionToCondition(condition); __ bind(&check); if (reg.is_byte_register()) { // setcc for byte registers (al, bl, cl, dl). __ setcc(cc, reg); __ movzx_b(reg, reg); } else { // Emit a branch to set a register to either 1 or 0. Label set; __ j(cc, &set, Label::kNear); __ Move(reg, Immediate(0)); __ jmp(&done, Label::kNear); __ bind(&set); __ mov(reg, Immediate(1)); } __ bind(&done); } void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) { IA32OperandConverter i(this, instr); Register input = i.InputRegister(0); std::vector> cases; for (size_t index = 2; index < instr->InputCount(); index += 2) { cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))}); } AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(), cases.data() + cases.size()); } void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) { IA32OperandConverter i(this, instr); Register input = i.InputRegister(0); for (size_t index = 2; index < instr->InputCount(); index += 2) { __ cmp(input, Immediate(i.InputInt32(index + 0))); __ j(equal, GetLabel(i.InputRpo(index + 1))); } AssembleArchJump(i.InputRpo(1)); } void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) { IA32OperandConverter i(this, instr); Register input = i.InputRegister(0); size_t const case_count = instr->InputCount() - 2; Label** cases = zone()->NewArray(case_count); for (size_t index = 0; index < case_count; ++index) { cases[index] = GetLabel(i.InputRpo(index + 2)); } Label* const table = AddJumpTable(cases, case_count); __ cmp(input, Immediate(case_count)); __ j(above_equal, GetLabel(i.InputRpo(1))); __ jmp(Operand::JumpTable(input, times_4, table)); } // The calling convention for JSFunctions on IA32 passes arguments on the // stack and the JSFunction and context in EDI and ESI, respectively, thus // the steps of the call look as follows: // --{ before the call instruction }-------------------------------------------- // | caller frame | // ^ esp ^ ebp // --{ push arguments and setup ESI, EDI }-------------------------------------- // | args + receiver | caller frame | // ^ esp ^ ebp // [edi = JSFunction, esi = context] // --{ call [edi + kCodeEntryOffset] }------------------------------------------ // | RET | args + receiver | caller frame | // ^ esp ^ ebp // =={ prologue of called function }============================================ // --{ push ebp }--------------------------------------------------------------- // | FP | RET | args + receiver | caller frame | // ^ esp ^ ebp // --{ mov ebp, esp }----------------------------------------------------------- // | FP | RET | args + receiver | caller frame | // ^ ebp,esp // --{ push esi }--------------------------------------------------------------- // | CTX | FP | RET | args + receiver | caller frame | // ^esp ^ ebp // --{ push edi }--------------------------------------------------------------- // | FNC | CTX | FP | RET | args + receiver | caller frame | // ^esp ^ ebp // --{ subi esp, #N }----------------------------------------------------------- // | callee frame | FNC | CTX | FP | RET | args + receiver | caller frame | // ^esp ^ ebp // =={ body of called function }================================================ // =={ epilogue of called function }============================================ // --{ mov esp, ebp }----------------------------------------------------------- // | FP | RET | args + receiver | caller frame | // ^ esp,ebp // --{ pop ebp }----------------------------------------------------------- // | | RET | args + receiver | caller frame | // ^ esp ^ ebp // --{ ret #A+1 }----------------------------------------------------------- // | | caller frame | // ^ esp ^ ebp // Runtime function calls are accomplished by doing a stub call to the // CEntry (a real code object). On IA32 passes arguments on the // stack, the number of arguments in EAX, the address of the runtime function // in EBX, and the context in ESI. // --{ before the call instruction }-------------------------------------------- // | caller frame | // ^ esp ^ ebp // --{ push arguments and setup EAX, EBX, and ESI }----------------------------- // | args + receiver | caller frame | // ^ esp ^ ebp // [eax = #args, ebx = runtime function, esi = context] // --{ call #CEntry }----------------------------------------------------------- // | RET | args + receiver | caller frame | // ^ esp ^ ebp // =={ body of runtime function }=============================================== // --{ runtime returns }-------------------------------------------------------- // | caller frame | // ^ esp ^ ebp // Other custom linkages (e.g. for calling directly into and out of C++) may // need to save callee-saved registers on the stack, which is done in the // function prologue of generated code. // --{ before the call instruction }-------------------------------------------- // | caller frame | // ^ esp ^ ebp // --{ set up arguments in registers on stack }--------------------------------- // | args | caller frame | // ^ esp ^ ebp // [r0 = arg0, r1 = arg1, ...] // --{ call code }-------------------------------------------------------------- // | RET | args | caller frame | // ^ esp ^ ebp // =={ prologue of called function }============================================ // --{ push ebp }--------------------------------------------------------------- // | FP | RET | args | caller frame | // ^ esp ^ ebp // --{ mov ebp, esp }----------------------------------------------------------- // | FP | RET | args | caller frame | // ^ ebp,esp // --{ save registers }--------------------------------------------------------- // | regs | FP | RET | args | caller frame | // ^ esp ^ ebp // --{ subi esp, #N }----------------------------------------------------------- // | callee frame | regs | FP | RET | args | caller frame | // ^esp ^ ebp // =={ body of called function }================================================ // =={ epilogue of called function }============================================ // --{ restore registers }------------------------------------------------------ // | regs | FP | RET | args | caller frame | // ^ esp ^ ebp // --{ mov esp, ebp }----------------------------------------------------------- // | FP | RET | args | caller frame | // ^ esp,ebp // --{ pop ebp }---------------------------------------------------------------- // | RET | args | caller frame | // ^ esp ^ ebp void CodeGenerator::FinishFrame(Frame* frame) { auto call_descriptor = linkage()->GetIncomingDescriptor(); const RegList saves = call_descriptor->CalleeSavedRegisters(); if (saves != 0) { // Save callee-saved registers. DCHECK(!info()->is_osr()); int pushed = 0; for (int i = Register::kNumRegisters - 1; i >= 0; i--) { if (!((1 << i) & saves)) continue; ++pushed; } frame->AllocateSavedCalleeRegisterSlots(pushed); } } void CodeGenerator::AssembleConstructFrame() { auto call_descriptor = linkage()->GetIncomingDescriptor(); if (frame_access_state()->has_frame()) { if (call_descriptor->IsCFunctionCall()) { __ push(ebp); __ mov(ebp, esp); } else if (call_descriptor->IsJSFunctionCall()) { __ Prologue(); if (call_descriptor->PushArgumentCount()) { __ push(kJavaScriptCallArgCountRegister); } } else { __ StubPrologue(info()->GetOutputStackFrameType()); if (call_descriptor->IsWasmFunctionCall()) { __ push(kWasmInstanceRegister); } } } int shrink_slots = frame()->GetTotalFrameSlotCount() - call_descriptor->CalculateFixedFrameSize(); if (info()->is_osr()) { // TurboFan OSR-compiled functions cannot be entered directly. __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction); // Unoptimized code jumps directly to this entrypoint while the unoptimized // frame is still on the stack. Optimized code uses OSR values directly from // the unoptimized frame. Thus, all that needs to be done is to allocate the // remaining stack slots. if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --"); osr_pc_offset_ = __ pc_offset(); shrink_slots -= osr_helper()->UnoptimizedFrameSlots(); ResetSpeculationPoison(); } const RegList saves = call_descriptor->CalleeSavedRegisters(); if (shrink_slots > 0) { DCHECK(frame_access_state()->has_frame()); if (info()->IsWasm() && shrink_slots > 128) { // For WebAssembly functions with big frames we have to do the stack // overflow check before we construct the frame. Otherwise we may not // have enough space on the stack to call the runtime for the stack // overflow. Label done; // If the frame is bigger than the stack, we throw the stack overflow // exception unconditionally. Thereby we can avoid the integer overflow // check in the condition code. if (shrink_slots * kPointerSize < FLAG_stack_size * 1024) { Register scratch = esi; __ push(scratch); __ mov(scratch, FieldOperand(kWasmInstanceRegister, WasmInstanceObject::kRealStackLimitAddressOffset)); __ mov(scratch, Operand(scratch, 0)); __ add(scratch, Immediate(shrink_slots * kPointerSize)); __ cmp(esp, scratch); __ pop(scratch); __ j(above_equal, &done); } __ mov(ecx, FieldOperand(kWasmInstanceRegister, WasmInstanceObject::kCEntryStubOffset)); __ Move(esi, Smi::kZero); __ CallRuntimeWithCEntry(Runtime::kThrowWasmStackOverflow, ecx); ReferenceMap* reference_map = new (zone()) ReferenceMap(zone()); RecordSafepoint(reference_map, Safepoint::kSimple, 0, Safepoint::kNoLazyDeopt); __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap); __ bind(&done); } // Skip callee-saved and return slots, which are created below. shrink_slots -= base::bits::CountPopulation(saves); shrink_slots -= frame()->GetReturnSlotCount(); if (shrink_slots > 0) { __ sub(esp, Immediate(shrink_slots * kPointerSize)); } } if (saves != 0) { // Save callee-saved registers. DCHECK(!info()->is_osr()); for (int i = Register::kNumRegisters - 1; i >= 0; i--) { if (((1 << i) & saves)) __ push(Register::from_code(i)); } } // Allocate return slots (located after callee-saved). if (frame()->GetReturnSlotCount() > 0) { __ sub(esp, Immediate(frame()->GetReturnSlotCount() * kPointerSize)); } } void CodeGenerator::AssembleReturn(InstructionOperand* pop) { auto call_descriptor = linkage()->GetIncomingDescriptor(); const RegList saves = call_descriptor->CalleeSavedRegisters(); // Restore registers. if (saves != 0) { const int returns = frame()->GetReturnSlotCount(); if (returns != 0) { __ add(esp, Immediate(returns * kPointerSize)); } for (int i = 0; i < Register::kNumRegisters; i++) { if (!((1 << i) & saves)) continue; __ pop(Register::from_code(i)); } } // Might need ecx for scratch if pop_size is too big or if there is a variable // pop count. DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & ecx.bit()); size_t pop_size = call_descriptor->StackParameterCount() * kPointerSize; IA32OperandConverter g(this, nullptr); if (call_descriptor->IsCFunctionCall()) { AssembleDeconstructFrame(); } else if (frame_access_state()->has_frame()) { // Canonicalize JSFunction return sites for now if they always have the same // number of return args. if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) { if (return_label_.is_bound()) { __ jmp(&return_label_); return; } else { __ bind(&return_label_); AssembleDeconstructFrame(); } } else { AssembleDeconstructFrame(); } } DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & edx.bit()); DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & ecx.bit()); if (pop->IsImmediate()) { DCHECK_EQ(Constant::kInt32, g.ToConstant(pop).type()); pop_size += g.ToConstant(pop).ToInt32() * kPointerSize; __ Ret(static_cast(pop_size), ecx); } else { Register pop_reg = g.ToRegister(pop); Register scratch_reg = pop_reg == ecx ? edx : ecx; __ pop(scratch_reg); __ lea(esp, Operand(esp, pop_reg, times_4, static_cast(pop_size))); __ jmp(scratch_reg); } } void CodeGenerator::FinishCode() {} void CodeGenerator::AssembleMove(InstructionOperand* source, InstructionOperand* destination) { IA32OperandConverter g(this, nullptr); // Dispatch on the source and destination operand kinds. switch (MoveType::InferMove(source, destination)) { case MoveType::kRegisterToRegister: if (source->IsRegister()) { __ mov(g.ToRegister(destination), g.ToRegister(source)); } else { DCHECK(source->IsFPRegister()); __ movaps(g.ToDoubleRegister(destination), g.ToDoubleRegister(source)); } return; case MoveType::kRegisterToStack: { Operand dst = g.ToOperand(destination); if (source->IsRegister()) { __ mov(dst, g.ToRegister(source)); } else { DCHECK(source->IsFPRegister()); XMMRegister src = g.ToDoubleRegister(source); MachineRepresentation rep = LocationOperand::cast(source)->representation(); if (rep == MachineRepresentation::kFloat32) { __ movss(dst, src); } else if (rep == MachineRepresentation::kFloat64) { __ movsd(dst, src); } else { DCHECK_EQ(MachineRepresentation::kSimd128, rep); __ movups(dst, src); } } return; } case MoveType::kStackToRegister: { Operand src = g.ToOperand(source); if (source->IsStackSlot()) { __ mov(g.ToRegister(destination), src); } else { DCHECK(source->IsFPStackSlot()); XMMRegister dst = g.ToDoubleRegister(destination); MachineRepresentation rep = LocationOperand::cast(source)->representation(); if (rep == MachineRepresentation::kFloat32) { __ movss(dst, src); } else if (rep == MachineRepresentation::kFloat64) { __ movsd(dst, src); } else { DCHECK_EQ(MachineRepresentation::kSimd128, rep); __ movups(dst, src); } } return; } case MoveType::kStackToStack: { Operand src = g.ToOperand(source); Operand dst = g.ToOperand(destination); if (source->IsStackSlot()) { __ push(src); __ pop(dst); } else { MachineRepresentation rep = LocationOperand::cast(source)->representation(); if (rep == MachineRepresentation::kFloat32) { __ movss(kScratchDoubleReg, src); __ movss(dst, kScratchDoubleReg); } else if (rep == MachineRepresentation::kFloat64) { __ movsd(kScratchDoubleReg, src); __ movsd(dst, kScratchDoubleReg); } else { DCHECK_EQ(MachineRepresentation::kSimd128, rep); __ movups(kScratchDoubleReg, src); __ movups(dst, kScratchDoubleReg); } } return; } case MoveType::kConstantToRegister: { Constant src = g.ToConstant(source); if (destination->IsRegister()) { Register dst = g.ToRegister(destination); if (src.type() == Constant::kHeapObject) { __ Move(dst, src.ToHeapObject()); } else { __ Move(dst, g.ToImmediate(source)); } } else { DCHECK(destination->IsFPRegister()); XMMRegister dst = g.ToDoubleRegister(destination); if (src.type() == Constant::kFloat32) { // TODO(turbofan): Can we do better here? __ Move(dst, src.ToFloat32AsInt()); } else { DCHECK_EQ(src.type(), Constant::kFloat64); __ Move(dst, src.ToFloat64().AsUint64()); } } return; } case MoveType::kConstantToStack: { Constant src = g.ToConstant(source); Operand dst = g.ToOperand(destination); if (destination->IsStackSlot()) { if (src.type() == Constant::kHeapObject) { __ mov(dst, src.ToHeapObject()); } else { __ Move(dst, g.ToImmediate(source)); } } else { DCHECK(destination->IsFPStackSlot()); if (src.type() == Constant::kFloat32) { __ Move(dst, Immediate(src.ToFloat32AsInt())); } else { DCHECK_EQ(src.type(), Constant::kFloat64); uint64_t constant_value = src.ToFloat64().AsUint64(); uint32_t lower = static_cast(constant_value); uint32_t upper = static_cast(constant_value >> 32); Operand dst0 = dst; Operand dst1 = g.ToOperand(destination, kPointerSize); __ Move(dst0, Immediate(lower)); __ Move(dst1, Immediate(upper)); } } return; } } UNREACHABLE(); } void CodeGenerator::AssembleSwap(InstructionOperand* source, InstructionOperand* destination) { IA32OperandConverter g(this, nullptr); // Dispatch on the source and destination operand kinds. Not all // combinations are possible. switch (MoveType::InferSwap(source, destination)) { case MoveType::kRegisterToRegister: { if (source->IsRegister()) { Register src = g.ToRegister(source); Register dst = g.ToRegister(destination); __ push(src); __ mov(src, dst); __ pop(dst); } else { DCHECK(source->IsFPRegister()); XMMRegister src = g.ToDoubleRegister(source); XMMRegister dst = g.ToDoubleRegister(destination); __ movaps(kScratchDoubleReg, src); __ movaps(src, dst); __ movaps(dst, kScratchDoubleReg); } return; } case MoveType::kRegisterToStack: { if (source->IsRegister()) { Register src = g.ToRegister(source); __ push(src); frame_access_state()->IncreaseSPDelta(1); Operand dst = g.ToOperand(destination); __ mov(src, dst); frame_access_state()->IncreaseSPDelta(-1); dst = g.ToOperand(destination); __ pop(dst); } else { DCHECK(source->IsFPRegister()); XMMRegister src = g.ToDoubleRegister(source); Operand dst = g.ToOperand(destination); MachineRepresentation rep = LocationOperand::cast(source)->representation(); if (rep == MachineRepresentation::kFloat32) { __ movss(kScratchDoubleReg, dst); __ movss(dst, src); __ movaps(src, kScratchDoubleReg); } else if (rep == MachineRepresentation::kFloat64) { __ movsd(kScratchDoubleReg, dst); __ movsd(dst, src); __ movaps(src, kScratchDoubleReg); } else { DCHECK_EQ(MachineRepresentation::kSimd128, rep); __ movups(kScratchDoubleReg, dst); __ movups(dst, src); __ movups(src, kScratchDoubleReg); } } return; } case MoveType::kStackToStack: { if (source->IsStackSlot()) { Operand dst1 = g.ToOperand(destination); __ push(dst1); frame_access_state()->IncreaseSPDelta(1); Operand src1 = g.ToOperand(source); __ push(src1); Operand dst2 = g.ToOperand(destination); __ pop(dst2); frame_access_state()->IncreaseSPDelta(-1); Operand src2 = g.ToOperand(source); __ pop(src2); } else { DCHECK(source->IsFPStackSlot()); Operand src0 = g.ToOperand(source); Operand dst0 = g.ToOperand(destination); MachineRepresentation rep = LocationOperand::cast(source)->representation(); if (rep == MachineRepresentation::kFloat32) { __ movss(kScratchDoubleReg, dst0); // Save dst in scratch register. __ push(src0); // Then use stack to copy src to destination. __ pop(dst0); __ movss(src0, kScratchDoubleReg); } else if (rep == MachineRepresentation::kFloat64) { __ movsd(kScratchDoubleReg, dst0); // Save dst in scratch register. __ push(src0); // Then use stack to copy src to destination. __ pop(dst0); __ push(g.ToOperand(source, kPointerSize)); __ pop(g.ToOperand(destination, kPointerSize)); __ movsd(src0, kScratchDoubleReg); } else { DCHECK_EQ(MachineRepresentation::kSimd128, rep); __ movups(kScratchDoubleReg, dst0); // Save dst in scratch register. __ push(src0); // Then use stack to copy src to destination. __ pop(dst0); __ push(g.ToOperand(source, kPointerSize)); __ pop(g.ToOperand(destination, kPointerSize)); __ push(g.ToOperand(source, 2 * kPointerSize)); __ pop(g.ToOperand(destination, 2 * kPointerSize)); __ push(g.ToOperand(source, 3 * kPointerSize)); __ pop(g.ToOperand(destination, 3 * kPointerSize)); __ movups(src0, kScratchDoubleReg); } } return; } default: UNREACHABLE(); break; } } void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) { for (size_t index = 0; index < target_count; ++index) { __ dd(targets[index]); } } #undef __ #undef kScratchDoubleReg #undef ASSEMBLE_COMPARE #undef ASSEMBLE_IEEE754_BINOP #undef ASSEMBLE_IEEE754_UNOP #undef ASSEMBLE_BINOP #undef ASSEMBLE_ATOMIC_BINOP #undef ASSEMBLE_I64ATOMIC_BINOP #undef ASSEMBLE_MOVX #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE #undef ASSEMBLE_SIMD_IMM_SHUFFLE } // namespace compiler } // namespace internal } // namespace v8