/* * Copyright (C) 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "code_generator_x86_64.h" #include "art_method.h" #include "code_generator_utils.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "intrinsics.h" #include "intrinsics_x86_64.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "mirror/object_reference.h" #include "thread.h" #include "utils/assembler.h" #include "utils/stack_checks.h" #include "utils/x86_64/assembler_x86_64.h" #include "utils/x86_64/managed_register_x86_64.h" namespace art { template class GcRoot; namespace x86_64 { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = RDI; // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump // table version generates 7 instructions and num_entries literals. Compare/jump sequence will // generates less code/data with a small num_entries. static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5; static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 }; static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 }; static constexpr int kC2ConditionMask = 0x400; #define __ down_cast(codegen->GetAssembler())-> #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, x).Int32Value() class NullCheckSlowPathX86_64 : public SlowPathCode { public: explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes(); } bool IsFatal() const OVERRIDE { return true; } const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64); }; class DivZeroCheckSlowPathX86_64 : public SlowPathCode { public: explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes(); } bool IsFatal() const OVERRIDE { return true; } const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64); }; class DivRemMinusOneSlowPathX86_64 : public SlowPathCode { public: DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, Primitive::Type type, bool is_div) : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); if (type_ == Primitive::kPrimInt) { if (is_div_) { __ negl(cpu_reg_); } else { __ xorl(cpu_reg_, cpu_reg_); } } else { DCHECK_EQ(Primitive::kPrimLong, type_); if (is_div_) { __ negq(cpu_reg_); } else { __ xorl(cpu_reg_, cpu_reg_); } } __ jmp(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; } private: const CpuRegister cpu_reg_; const Primitive::Type type_; const bool is_div_; DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64); }; class SuspendCheckSlowPathX86_64 : public SlowPathCode { public: SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor) : SlowPathCode(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { __ jmp(x86_64_codegen->GetLabelOf(successor_)); } } Label* GetReturnLabel() { DCHECK(successor_ == nullptr); return &return_label_; } HBasicBlock* GetSuccessor() const { return successor_; } const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; } private: HBasicBlock* const successor_; Label return_label_; DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64); }; class BoundsCheckSlowPathX86_64 : public SlowPathCode { public: explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86_64* x86_64_codegen = down_cast(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. InvokeRuntimeCallingConvention calling_convention; codegen->EmitParallelMoves( locations->InAt(0), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt, locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes(); } bool IsFatal() const OVERRIDE { return true; } const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64); }; class LoadClassSlowPathX86_64 : public SlowPathCode { public: LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, bool do_clinit) : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = at_->GetLocations(); CodeGeneratorX86_64* x86_64_codegen = down_cast(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex())); x86_64_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType), at_, dex_pc_, this); if (do_clinit_) { CheckEntrypointTypes(); } else { CheckEntrypointTypes(); } Location out = locations->Out(); // Move the class to the desired location. if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); x86_64_codegen->Move(out, Location::RegisterLocation(RAX)); } RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; } private: // The class this slow path will load. HLoadClass* const cls_; // The instruction where this slow path is happening. // (Might be the load class or an initialization check). HInstruction* const at_; // The dex PC of `at_`. const uint32_t dex_pc_; // Whether to initialize the class. const bool do_clinit_; DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64); }; class LoadStringSlowPathX86_64 : public SlowPathCode { public: explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); CodeGeneratorX86_64* x86_64_codegen = down_cast(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index)); x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes(); x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64); }; class TypeCheckSlowPathX86_64 : public SlowPathCode { public: TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal) : SlowPathCode(instruction), is_fatal_(is_fatal) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0) : locations->Out(); uint32_t dex_pc = instruction_->GetDexPc(); DCHECK(instruction_->IsCheckCast() || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); CodeGeneratorX86_64* x86_64_codegen = down_cast(codegen); __ Bind(GetEntryLabel()); if (!is_fatal_) { SaveLiveRegisters(codegen, locations); } // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. InvokeRuntimeCallingConvention calling_convention; codegen->EmitParallelMoves( locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot, object_class, Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this); CheckEntrypointTypes< kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this); CheckEntrypointTypes(); } if (!is_fatal_) { if (instruction_->IsInstanceOf()) { x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); } RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } } const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; } bool IsFatal() const OVERRIDE { return is_fatal_; } private: const bool is_fatal_; DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64); }; class DeoptimizationSlowPathX86_64 : public SlowPathCode { public: explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64); }; class ArraySetSlowPathX86_64 : public SlowPathCode { public: explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; HParallelMove parallel_move(codegen->GetGraph()->GetArena()); parallel_move.AddMove( locations->InAt(0), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot, nullptr); parallel_move.AddMove( locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt, nullptr); parallel_move.AddMove( locations->InAt(2), Location::RegisterLocation(calling_convention.GetRegisterAt(2)), Primitive::kPrimNot, nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); CodeGeneratorX86_64* x86_64_codegen = down_cast(codegen); x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes(); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); }; // Slow path marking an object during a read barrier. class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { public: ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj) : SlowPathCode(instruction), out_(out), obj_(obj) { DCHECK(kEmitCompilerReadBarrier); } const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Register reg_out = out_.AsRegister(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || instruction_->IsLoadClass() || instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast()) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; CodeGeneratorX86_64* x86_64_codegen = down_cast(codegen); x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_); x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes(); x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } private: const Location out_; const Location obj_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64); }; // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { public: ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction, Location out, Location ref, Location obj, uint32_t offset, Location index) : SlowPathCode(instruction), out_(out), ref_(ref), obj_(obj), offset_(offset), index_(index) { DCHECK(kEmitCompilerReadBarrier); // If `obj` is equal to `out` or `ref`, it means the initial // object has been overwritten by (or after) the heap object // reference load to be instrumented, e.g.: // // __ movl(out, Address(out, offset)); // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); // // In that case, we have lost the information about the original // object, and the emitted read barrier cannot work properly. DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast(codegen); LocationSummary* locations = instruction_->GetLocations(); CpuRegister reg_out = out_.AsRegister(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_; DCHECK(!instruction_->IsInvoke() || (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); // We may have to change the index's value, but as `index_` is a // constant member (like other "inputs" of this slow path), // introduce a copy of it, `index`. Location index = index_; if (index_.IsValid()) { // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. if (instruction_->IsArrayGet()) { // Compute real offset and store it in index_. Register index_reg = index_.AsRegister().AsRegister(); DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); if (codegen->IsCoreCalleeSaveRegister(index_reg)) { // We are about to change the value of `index_reg` (see the // calls to art::x86_64::X86_64Assembler::shll and // art::x86_64::X86_64Assembler::AddImmediate below), but it // has not been saved by the previous call to // art::SlowPathCode::SaveLiveRegisters, as it is a // callee-save register -- // art::SlowPathCode::SaveLiveRegisters does not consider // callee-save registers, as it has been designed with the // assumption that callee-save registers are supposed to be // handled by the called function. So, as a callee-save // register, `index_reg` _would_ eventually be saved onto // the stack, but it would be too late: we would have // changed its value earlier. Therefore, we manually save // it here into another freely available register, // `free_reg`, chosen of course among the caller-save // registers (as a callee-save `free_reg` register would // exhibit the same problem). // // Note we could have requested a temporary register from // the register allocator instead; but we prefer not to, as // this is a slow path, and we know we can find a // caller-save register that is available. Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister(); __ movl(CpuRegister(free_reg), CpuRegister(index_reg)); index_reg = free_reg; index = Location::RegisterLocation(index_reg); } else { // The initial register stored in `index_` has already been // saved in the call to art::SlowPathCode::SaveLiveRegisters // (as it is not a callee-save register), so we can freely // use it. } // Shifting the index value contained in `index_reg` by the // scale factor (2) cannot overflow in practice, as the // runtime is unable to allocate object arrays with a size // larger than 2^26 - 1 (that is, 2^28 - 4 bytes). __ shll(CpuRegister(index_reg), Immediate(TIMES_4)); static_assert( sizeof(mirror::HeapReference) == sizeof(int32_t), "art::mirror::HeapReference and int32_t have different sizes."); __ AddImmediate(CpuRegister(index_reg), Immediate(offset_)); } else { DCHECK(instruction_->IsInvoke()); DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) << instruction_->AsInvoke()->GetIntrinsic(); DCHECK_EQ(offset_, 0U); DCHECK(index_.IsRegister()); } } // We're moving two or three locations to locations that could // overlap, so we need a parallel move resolver. InvokeRuntimeCallingConvention calling_convention; HParallelMove parallel_move(codegen->GetGraph()->GetArena()); parallel_move.AddMove(ref_, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot, nullptr); parallel_move.AddMove(obj_, Location::RegisterLocation(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot, nullptr); if (index.IsValid()) { parallel_move.AddMove(index, Location::RegisterLocation(calling_convention.GetRegisterAt(2)), Primitive::kPrimInt, nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); } else { codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_)); } x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes< kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathX86_64"; } private: CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) { size_t ref = static_cast(ref_.AsRegister().AsRegister()); size_t obj = static_cast(obj_.AsRegister().AsRegister()); for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { return static_cast(i); } } // We shall never fail to find a free caller-save register, as // there are more than two core caller-save registers on x86-64 // (meaning it is possible to find one which is different from // `ref` and `obj`). DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); LOG(FATAL) << "Could not find a free caller-save register"; UNREACHABLE(); } const Location out_; const Location ref_; const Location obj_; const uint32_t offset_; // An additional location containing an index to an array. // Only used for HArrayGet and the UnsafeGetObject & // UnsafeGetObjectVolatile intrinsics. const Location index_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64); }; // Slow path generating a read barrier for a GC root. class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { public: ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root) : SlowPathCode(instruction), out_(out), root_(root) { DCHECK(kEmitCompilerReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) << "Unexpected instruction in read barrier for GC root slow path: " << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; CodeGeneratorX86_64* x86_64_codegen = down_cast(codegen); x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes*>(); x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; } private: const Location out_; const Location root_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64); }; #undef __ #define __ down_cast(GetAssembler())-> inline Condition X86_64IntegerCondition(IfCondition cond) { switch (cond) { case kCondEQ: return kEqual; case kCondNE: return kNotEqual; case kCondLT: return kLess; case kCondLE: return kLessEqual; case kCondGT: return kGreater; case kCondGE: return kGreaterEqual; case kCondB: return kBelow; case kCondBE: return kBelowEqual; case kCondA: return kAbove; case kCondAE: return kAboveEqual; } LOG(FATAL) << "Unreachable"; UNREACHABLE(); } // Maps FP condition to x86_64 name. inline Condition X86_64FPCondition(IfCondition cond) { switch (cond) { case kCondEQ: return kEqual; case kCondNE: return kNotEqual; case kCondLT: return kBelow; case kCondLE: return kBelowEqual; case kCondGT: return kAbove; case kCondGE: return kAboveEqual; default: break; // should not happen }; LOG(FATAL) << "Unreachable"; UNREACHABLE(); } HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method ATTRIBUTE_UNUSED) { switch (desired_dispatch_info.code_ptr_location) { case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: // For direct code, we actually prefer to call via the code pointer from ArtMethod*. return HInvokeStaticOrDirect::DispatchInfo { desired_dispatch_info.method_load_kind, HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, desired_dispatch_info.method_load_data, 0u }; default: return desired_dispatch_info; } } void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { // All registers are assumed to be correctly set up. Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: // temp = thread->string_init_entrypoint __ gs()->movq(temp.AsRegister(), Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true)); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movq(temp.AsRegister(), Immediate(invoke->GetMethodAddress())); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: __ movl(temp.AsRegister(), Immediate(0)); // Placeholder. method_patches_.emplace_back(invoke->GetTargetMethod()); __ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn. break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { __ movq(temp.AsRegister(), Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); // Bind a new fixup label at the end of the "movl" insn. uint32_t offset = invoke->GetDexCacheArrayOffset(); __ Bind(NewPcRelativeDexCacheArrayPatch(*invoke->GetTargetMethod().dex_file, offset)); break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register method_reg; CpuRegister reg = temp.AsRegister(); if (current_method.IsRegister()) { method_reg = current_method.AsRegister(); } else { DCHECK(invoke->GetLocations()->Intrinsified()); DCHECK(!current_method.IsValid()); method_reg = reg.AsRegister(); __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); } // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; __ movq(reg, Address(CpuRegister(method_reg), ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue())); // temp = temp[index_in_cache]; // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. uint32_t index_in_cache = invoke->GetDexMethodIndex(); __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache))); break; } } switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: __ call(&frame_entry_label_); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { relative_call_patches_.emplace_back(invoke->GetTargetMethod()); Label* label = &relative_call_patches_.back().label; __ call(label); // Bind to the patch label, override at link time. __ Bind(label); // Bind the label at the end of the "call" insn. break; } case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: // Filtered out by GetSupportedInvokeStaticOrDirectDispatch(). LOG(FATAL) << "Unsupported"; UNREACHABLE(); case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: // (callee_method + offset_of_quick_compiled_code)() __ call(Address(callee_method.AsRegister(), ArtMethod::EntryPointFromQuickCompiledCodeOffset( kX86_64WordSize).SizeValue())); break; } DCHECK(!IsLeafMethod()); } void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) { CpuRegister temp = temp_in.AsRegister(); size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue(); // Use the calling convention instead of the location of the receiver, as // intrinsics may have put the receiver in a different register. In the intrinsics // slow path, the arguments have been moved to the right place, so here we are // guaranteed that the receiver is the first register of the calling convention. InvokeDexCallingConvention calling_convention; Register receiver = calling_convention.GetRegisterAt(0); size_t class_offset = mirror::Object::ClassOffset().SizeValue(); // /* HeapReference */ temp = receiver->klass_ __ movl(temp, Address(CpuRegister(receiver), class_offset)); MaybeRecordImplicitNullCheck(invoke); // Instead of simply (possibly) unpoisoning `temp` here, we should // emit a read barrier for the previous class reference load. // However this is not required in practice, as this is an // intermediate/temporary reference and because the current // concurrent copying collector keeps the from-space memory // intact/accessible until the end of the marking phase (the // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetMethodAt(method_offset); __ movq(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( kX86_64WordSize).SizeValue())); } void CodeGeneratorX86_64::RecordSimplePatch() { if (GetCompilerOptions().GetIncludePatchInformation()) { simple_patches_.emplace_back(); __ Bind(&simple_patches_.back()); } } void CodeGeneratorX86_64::RecordStringPatch(HLoadString* load_string) { string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex()); __ Bind(&string_patches_.back().label); } Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset) { // Add a patch entry and return the label. pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset); return &pc_relative_dex_cache_patches_.back().label; } void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector* linker_patches) { DCHECK(linker_patches->empty()); size_t size = method_patches_.size() + relative_call_patches_.size() + pc_relative_dex_cache_patches_.size() + simple_patches_.size() + string_patches_.size(); linker_patches->reserve(size); // The label points to the end of the "movl" insn but the literal offset for method // patch needs to point to the embedded constant which occupies the last 4 bytes. constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; for (const MethodPatchInfo