1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 19 20 #include "arch/x86_64/instruction_set_features_x86_64.h" 21 #include "base/macros.h" 22 #include "code_generator.h" 23 #include "driver/compiler_options.h" 24 #include "nodes.h" 25 #include "parallel_move_resolver.h" 26 #include "utils/x86_64/assembler_x86_64.h" 27 28 namespace art HIDDEN { 29 namespace x86_64 { 30 31 static constexpr Register kMethodRegisterArgument = RDI; 32 33 // Use a local definition to prevent copying mistakes. 34 static constexpr size_t kX86_64WordSize = static_cast<size_t>(kX86_64PointerSize); 35 36 // Some x86_64 instructions require a register to be available as temp. 37 static constexpr Register TMP = R11; 38 39 static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 }; 40 static constexpr FloatRegister kParameterFloatRegisters[] = 41 { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 }; 42 43 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); 44 static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters); 45 46 static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX, RCX }; 47 static constexpr size_t kRuntimeParameterCoreRegistersLength = 48 arraysize(kRuntimeParameterCoreRegisters); 49 static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; 50 static constexpr size_t kRuntimeParameterFpuRegistersLength = 51 arraysize(kRuntimeParameterFpuRegisters); 52 53 // These XMM registers are non-volatile in ART ABI, but volatile in native ABI. 54 // If the ART ABI changes, this list must be updated. It is used to ensure that 55 // these are not clobbered by any direct call to native code (such as math intrinsics). 56 static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 }; 57 58 #define UNIMPLEMENTED_INTRINSIC_LIST_X86_64(V) \ 59 V(MathSignumFloat) \ 60 V(MathSignumDouble) \ 61 V(MathCopySignFloat) \ 62 V(MathCopySignDouble) \ 63 V(CRC32Update) \ 64 V(CRC32UpdateBytes) \ 65 V(CRC32UpdateByteBuffer) \ 66 V(FP16ToFloat) \ 67 V(FP16ToHalf) \ 68 V(FP16Floor) \ 69 V(FP16Ceil) \ 70 V(FP16Rint) \ 71 V(FP16Greater) \ 72 V(FP16GreaterEquals) \ 73 V(FP16Less) \ 74 V(FP16LessEquals) \ 75 V(FP16Compare) \ 76 V(FP16Min) \ 77 V(FP16Max) \ 78 V(IntegerRemainderUnsigned) \ 79 V(LongRemainderUnsigned) \ 80 V(StringStringIndexOf) \ 81 V(StringStringIndexOfAfter) \ 82 V(StringBufferAppend) \ 83 V(StringBufferLength) \ 84 V(StringBufferToString) \ 85 V(StringBuilderAppendObject) \ 86 V(StringBuilderAppendString) \ 87 V(StringBuilderAppendCharSequence) \ 88 V(StringBuilderAppendCharArray) \ 89 V(StringBuilderAppendBoolean) \ 90 V(StringBuilderAppendChar) \ 91 V(StringBuilderAppendInt) \ 92 V(StringBuilderAppendLong) \ 93 V(StringBuilderAppendFloat) \ 94 V(StringBuilderAppendDouble) \ 95 V(StringBuilderLength) \ 96 V(StringBuilderToString) \ 97 V(UnsafeArrayBaseOffset) \ 98 /* 1.8 */ \ 99 V(JdkUnsafeArrayBaseOffset) \ 100 V(MethodHandleInvoke) \ 101 102 103 class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { 104 public: InvokeRuntimeCallingConvention()105 InvokeRuntimeCallingConvention() 106 : CallingConvention(kRuntimeParameterCoreRegisters, 107 kRuntimeParameterCoreRegistersLength, 108 kRuntimeParameterFpuRegisters, 109 kRuntimeParameterFpuRegistersLength, 110 kX86_64PointerSize) {} 111 112 private: 113 DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); 114 }; 115 116 class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> { 117 public: InvokeDexCallingConvention()118 InvokeDexCallingConvention() : CallingConvention( 119 kParameterCoreRegisters, 120 kParameterCoreRegistersLength, 121 kParameterFloatRegisters, 122 kParameterFloatRegistersLength, 123 kX86_64PointerSize) {} 124 125 private: 126 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); 127 }; 128 129 class CriticalNativeCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor { 130 public: CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation)131 explicit CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation) 132 : for_register_allocation_(for_register_allocation) {} 133 ~CriticalNativeCallingConventionVisitorX86_64()134 virtual ~CriticalNativeCallingConventionVisitorX86_64() {} 135 136 Location GetNextLocation(DataType::Type type) override; 137 Location GetReturnLocation(DataType::Type type) const override; 138 Location GetMethodLocation() const override; 139 GetStackOffset()140 size_t GetStackOffset() const { return stack_offset_; } 141 142 private: 143 // Register allocator does not support adjusting frame size, so we cannot provide final locations 144 // of stack arguments for register allocation. We ask the register allocator for any location and 145 // move these arguments to the right place after adjusting the SP when generating the call. 146 const bool for_register_allocation_; 147 size_t gpr_index_ = 0u; 148 size_t fpr_index_ = 0u; 149 size_t stack_offset_ = 0u; 150 151 DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorX86_64); 152 }; 153 154 class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention { 155 public: FieldAccessCallingConventionX86_64()156 FieldAccessCallingConventionX86_64() {} 157 GetObjectLocation()158 Location GetObjectLocation() const override { 159 return Location::RegisterLocation(RSI); 160 } GetFieldIndexLocation()161 Location GetFieldIndexLocation() const override { 162 return Location::RegisterLocation(RDI); 163 } GetReturnLocation(DataType::Type type)164 Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override { 165 return Location::RegisterLocation(RAX); 166 } GetSetValueLocation(DataType::Type type,bool is_instance)167 Location GetSetValueLocation([[maybe_unused]] DataType::Type type, 168 bool is_instance) const override { 169 return is_instance 170 ? Location::RegisterLocation(RDX) 171 : Location::RegisterLocation(RSI); 172 } GetFpuLocation(DataType::Type type)173 Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override { 174 return Location::FpuRegisterLocation(XMM0); 175 } 176 177 private: 178 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86_64); 179 }; 180 181 182 class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor { 183 public: InvokeDexCallingConventionVisitorX86_64()184 InvokeDexCallingConventionVisitorX86_64() {} ~InvokeDexCallingConventionVisitorX86_64()185 virtual ~InvokeDexCallingConventionVisitorX86_64() {} 186 187 Location GetNextLocation(DataType::Type type) override; 188 Location GetReturnLocation(DataType::Type type) const override; 189 Location GetMethodLocation() const override; 190 191 private: 192 InvokeDexCallingConvention calling_convention; 193 194 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64); 195 }; 196 197 class CodeGeneratorX86_64; 198 199 class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap { 200 public: ParallelMoveResolverX86_64(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen)201 ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen) 202 : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} 203 204 void EmitMove(size_t index) override; 205 void EmitSwap(size_t index) override; 206 void SpillScratch(int reg) override; 207 void RestoreScratch(int reg) override; 208 209 X86_64Assembler* GetAssembler() const; 210 211 private: 212 void Exchange32(CpuRegister reg, int mem); 213 void Exchange32(XmmRegister reg, int mem); 214 void Exchange64(CpuRegister reg1, CpuRegister reg2); 215 void Exchange64(CpuRegister reg, int mem); 216 void Exchange64(XmmRegister reg, int mem); 217 void Exchange128(XmmRegister reg, int mem); 218 void ExchangeMemory32(int mem1, int mem2); 219 void ExchangeMemory64(int mem1, int mem2, int num_of_qwords); 220 221 CodeGeneratorX86_64* const codegen_; 222 223 DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86_64); 224 }; 225 226 class LocationsBuilderX86_64 : public HGraphVisitor { 227 public: LocationsBuilderX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)228 LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen) 229 : HGraphVisitor(graph), codegen_(codegen) {} 230 231 #define DECLARE_VISIT_INSTRUCTION(name, super) \ 232 void Visit##name(H##name* instr) override; 233 234 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)235 FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) 236 FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) 237 238 #undef DECLARE_VISIT_INSTRUCTION 239 240 void VisitInstruction(HInstruction* instruction) override { 241 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 242 << " (id " << instruction->GetId() << ")"; 243 } 244 245 private: 246 void HandleInvoke(HInvoke* invoke); 247 void HandleBitwiseOperation(HBinaryOperation* operation); 248 void HandleCondition(HCondition* condition); 249 void HandleShift(HBinaryOperation* operation); 250 void HandleRotate(HBinaryOperation* rotate); 251 void HandleFieldSet(HInstruction* instruction, 252 const FieldInfo& field_info, 253 WriteBarrierKind write_barrier_kind); 254 void HandleFieldGet(HInstruction* instruction); 255 bool CpuHasAvxFeatureFlag(); 256 bool CpuHasAvx2FeatureFlag(); 257 258 CodeGeneratorX86_64* const codegen_; 259 InvokeDexCallingConventionVisitorX86_64 parameter_visitor_; 260 261 DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64); 262 }; 263 264 class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { 265 public: 266 InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen); 267 268 #define DECLARE_VISIT_INSTRUCTION(name, super) \ 269 void Visit##name(H##name* instr) override; 270 271 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)272 FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) 273 FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) 274 275 #undef DECLARE_VISIT_INSTRUCTION 276 277 void VisitInstruction(HInstruction* instruction) override { 278 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 279 << " (id " << instruction->GetId() << ")"; 280 } 281 GetAssembler()282 X86_64Assembler* GetAssembler() const { return assembler_; } 283 284 // Generate a GC root reference load: 285 // 286 // root <- *address 287 // 288 // while honoring read barriers based on read_barrier_option. 289 void GenerateGcRootFieldLoad(HInstruction* instruction, 290 Location root, 291 const Address& address, 292 Label* fixup_label, 293 ReadBarrierOption read_barrier_option); 294 void HandleFieldSet(HInstruction* instruction, 295 uint32_t value_index, 296 uint32_t extra_temp_index, 297 DataType::Type field_type, 298 Address field_addr, 299 CpuRegister base, 300 bool is_volatile, 301 bool is_atomic, 302 bool value_can_be_null, 303 bool byte_swap, 304 WriteBarrierKind write_barrier_kind); 305 306 void Bswap(Location value, DataType::Type type, CpuRegister* temp = nullptr); 307 308 private: 309 // Generate code for the given suspend check. If not null, `successor` 310 // is the block to branch to if the suspend check is not needed, and after 311 // the suspend call. 312 void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); 313 void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg); 314 void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp); 315 void HandleBitwiseOperation(HBinaryOperation* operation); 316 void GenerateRemFP(HRem* rem); 317 void DivRemOneOrMinusOne(HBinaryOperation* instruction); 318 void DivByPowerOfTwo(HDiv* instruction); 319 void RemByPowerOfTwo(HRem* instruction); 320 void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); 321 void GenerateDivRemIntegral(HBinaryOperation* instruction); 322 void HandleCondition(HCondition* condition); 323 void HandleShift(HBinaryOperation* operation); 324 void HandleRotate(HBinaryOperation* rotate); 325 326 void HandleFieldSet(HInstruction* instruction, 327 const FieldInfo& field_info, 328 bool value_can_be_null, 329 WriteBarrierKind write_barrier_kind); 330 void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); 331 332 void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); 333 void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); 334 void GenerateMinMax(HBinaryOperation* minmax, bool is_min); 335 void GenerateMethodEntryExitHook(HInstruction* instruction); 336 337 // Generate a heap reference load using one register `out`: 338 // 339 // out <- *(out + offset) 340 // 341 // while honoring heap poisoning and/or read barriers (if any). 342 // 343 // Location `maybe_temp` is used when generating a read barrier and 344 // shall be a register in that case; it may be an invalid location 345 // otherwise. 346 void GenerateReferenceLoadOneRegister(HInstruction* instruction, 347 Location out, 348 uint32_t offset, 349 Location maybe_temp, 350 ReadBarrierOption read_barrier_option); 351 // Generate a heap reference load using two different registers 352 // `out` and `obj`: 353 // 354 // out <- *(obj + offset) 355 // 356 // while honoring heap poisoning and/or read barriers (if any). 357 // 358 // Location `maybe_temp` is used when generating a Baker's (fast 359 // path) read barrier and shall be a register in that case; it may 360 // be an invalid location otherwise. 361 void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, 362 Location out, 363 Location obj, 364 uint32_t offset, 365 ReadBarrierOption read_barrier_option); 366 367 void PushOntoFPStack(Location source, uint32_t temp_offset, 368 uint32_t stack_adjustment, bool is_float); 369 void GenerateCompareTest(HCondition* condition); 370 template<class LabelType> 371 void GenerateTestAndBranch(HInstruction* instruction, 372 size_t condition_input_index, 373 LabelType* true_target, 374 LabelType* false_target); 375 template<class LabelType> 376 void GenerateCompareTestAndBranch(HCondition* condition, 377 LabelType* true_target, 378 LabelType* false_target); 379 template<class LabelType> 380 void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label); 381 382 void HandleGoto(HInstruction* got, HBasicBlock* successor); 383 384 bool CpuHasAvxFeatureFlag(); 385 bool CpuHasAvx2FeatureFlag(); 386 387 X86_64Assembler* const assembler_; 388 CodeGeneratorX86_64* const codegen_; 389 390 DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64); 391 }; 392 393 // Class for fixups to jump tables. 394 class JumpTableRIPFixup; 395 396 class CodeGeneratorX86_64 : public CodeGenerator { 397 public: 398 CodeGeneratorX86_64(HGraph* graph, 399 const CompilerOptions& compiler_options, 400 OptimizingCompilerStats* stats = nullptr); ~CodeGeneratorX86_64()401 virtual ~CodeGeneratorX86_64() {} 402 403 void GenerateFrameEntry() override; 404 void GenerateFrameExit() override; 405 void Bind(HBasicBlock* block) override; 406 void MoveConstant(Location destination, int32_t value) override; 407 void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; 408 void AddLocationAsTemp(Location location, LocationSummary* locations) override; 409 410 size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; 411 size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; 412 size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; 413 size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; 414 415 // Generate code to invoke a runtime entry point. 416 void InvokeRuntime(QuickEntrypointEnum entrypoint, 417 HInstruction* instruction, 418 SlowPathCode* slow_path = nullptr) override; 419 420 // Generate code to invoke a runtime entry point, but do not record 421 // PC-related information in a stack map. 422 void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 423 HInstruction* instruction, 424 SlowPathCode* slow_path); 425 426 void GenerateInvokeRuntime(int32_t entry_point_offset); 427 GetWordSize()428 size_t GetWordSize() const override { 429 return kX86_64WordSize; 430 } 431 GetSlowPathFPWidth()432 size_t GetSlowPathFPWidth() const override { 433 return GetGraph()->HasSIMD() 434 ? GetSIMDRegisterWidth() 435 : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill 436 } 437 GetCalleePreservedFPWidth()438 size_t GetCalleePreservedFPWidth() const override { 439 return 1 * kX86_64WordSize; 440 } 441 GetSIMDRegisterWidth()442 size_t GetSIMDRegisterWidth() const override { 443 return 2 * kX86_64WordSize; 444 } 445 GetLocationBuilder()446 HGraphVisitor* GetLocationBuilder() override { 447 return &location_builder_; 448 } 449 GetInstructionVisitor()450 HGraphVisitor* GetInstructionVisitor() override { 451 return &instruction_visitor_; 452 } 453 GetAssembler()454 X86_64Assembler* GetAssembler() override { 455 return &assembler_; 456 } 457 GetAssembler()458 const X86_64Assembler& GetAssembler() const override { 459 return assembler_; 460 } 461 GetMoveResolver()462 ParallelMoveResolverX86_64* GetMoveResolver() override { 463 return &move_resolver_; 464 } 465 GetAddressOf(HBasicBlock * block)466 uintptr_t GetAddressOf(HBasicBlock* block) override { 467 return GetLabelOf(block)->Position(); 468 } 469 470 void SetupBlockedRegisters() const override; 471 void DumpCoreRegister(std::ostream& stream, int reg) const override; 472 void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; 473 void Finalize() override; 474 GetInstructionSet()475 InstructionSet GetInstructionSet() const override { 476 return InstructionSet::kX86_64; 477 } 478 GetInstructionCodegen()479 InstructionCodeGeneratorX86_64* GetInstructionCodegen() { 480 return down_cast<InstructionCodeGeneratorX86_64*>(GetInstructionVisitor()); 481 } 482 483 const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const; 484 485 // Emit a write barrier if: 486 // A) emit_null_check is false 487 // B) emit_null_check is true, and value is not null. 488 void MaybeMarkGCCard(CpuRegister temp, 489 CpuRegister card, 490 CpuRegister object, 491 CpuRegister value, 492 bool emit_null_check); 493 494 // Emit a write barrier unconditionally. 495 void MarkGCCard(CpuRegister temp, CpuRegister card, CpuRegister object); 496 497 // Crash if the card table is not valid. This check is only emitted for the CC GC. We assert 498 // `(!clean || !self->is_gc_marking)`, since the card table should not be set to clean when the CC 499 // GC is marking for eliminated write barriers. 500 void CheckGCCardIsValid(CpuRegister temp, CpuRegister card, CpuRegister object); 501 502 void GenerateMemoryBarrier(MemBarrierKind kind); 503 504 // Helper method to move a value between two locations. 505 void Move(Location destination, Location source); 506 // Helper method to load a value of non-reference type from memory. 507 void LoadFromMemoryNoReference(DataType::Type type, Location dst, Address src); 508 GetLabelOf(HBasicBlock * block)509 Label* GetLabelOf(HBasicBlock* block) const { 510 return CommonGetLabelOf<Label>(block_labels_, block); 511 } 512 Initialize()513 void Initialize() override { 514 block_labels_ = CommonInitializeLabels<Label>(); 515 } 516 NeedsTwoRegisters(DataType::Type type)517 bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; } 518 519 // Check if the desired_string_load_kind is supported. If it is, return it, 520 // otherwise return a fall-back kind that should be used instead. 521 HLoadString::LoadKind GetSupportedLoadStringKind( 522 HLoadString::LoadKind desired_string_load_kind) override; 523 524 // Check if the desired_class_load_kind is supported. If it is, return it, 525 // otherwise return a fall-back kind that should be used instead. 526 HLoadClass::LoadKind GetSupportedLoadClassKind( 527 HLoadClass::LoadKind desired_class_load_kind) override; 528 529 // Check if the desired_dispatch_info is supported. If it is, return it, 530 // otherwise return a fall-back info that should be used instead. 531 HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 532 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 533 ArtMethod* method) override; 534 535 void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke); 536 void GenerateStaticOrDirectCall( 537 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; 538 void GenerateVirtualCall( 539 HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; 540 541 void RecordBootImageIntrinsicPatch(uint32_t intrinsic_data); 542 void RecordBootImageRelRoPatch(uint32_t boot_image_offset); 543 void RecordBootImageMethodPatch(HInvoke* invoke); 544 void RecordAppImageMethodPatch(HInvoke* invoke); 545 void RecordMethodBssEntryPatch(HInvoke* invoke); 546 void RecordBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); 547 void RecordAppImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); 548 Label* NewTypeBssEntryPatch(HLoadClass* load_class); 549 void RecordBootImageStringPatch(HLoadString* load_string); 550 Label* NewStringBssEntryPatch(HLoadString* load_string); 551 Label* NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type); 552 void RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke); 553 Label* NewJitRootStringPatch(const DexFile& dex_file, 554 dex::StringIndex string_index, 555 Handle<mirror::String> handle); 556 Label* NewJitRootClassPatch(const DexFile& dex_file, 557 dex::TypeIndex type_index, 558 Handle<mirror::Class> handle); 559 Label* NewJitRootMethodTypePatch(const DexFile& dex_file, 560 dex::ProtoIndex proto_index, 561 Handle<mirror::MethodType> method_type); 562 563 void LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference); 564 void LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke); 565 void LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root); 566 567 void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; 568 569 void PatchJitRootUse(uint8_t* code, 570 const uint8_t* roots_data, 571 const PatchInfo<Label>& info, 572 uint64_t index_in_table) const; 573 574 void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; 575 576 // Fast path implementation of ReadBarrier::Barrier for a heap 577 // reference field load when Baker's read barriers are used. 578 void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 579 Location ref, 580 CpuRegister obj, 581 uint32_t offset, 582 bool needs_null_check); 583 // Fast path implementation of ReadBarrier::Barrier for a heap 584 // reference array load when Baker's read barriers are used. 585 void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 586 Location ref, 587 CpuRegister obj, 588 uint32_t data_offset, 589 Location index, 590 bool needs_null_check); 591 // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, 592 // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. 593 // 594 // Load the object reference located at address `src`, held by 595 // object `obj`, into `ref`, and mark it if needed. The base of 596 // address `src` must be `obj`. 597 // 598 // If `always_update_field` is true, the value of the reference is 599 // atomically updated in the holder (`obj`). This operation 600 // requires two temporary registers, which must be provided as 601 // non-null pointers (`temp1` and `temp2`). 602 void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 603 Location ref, 604 CpuRegister obj, 605 const Address& src, 606 bool needs_null_check, 607 bool always_update_field = false, 608 CpuRegister* temp1 = nullptr, 609 CpuRegister* temp2 = nullptr); 610 611 // Generate a read barrier for a heap reference within `instruction` 612 // using a slow path. 613 // 614 // A read barrier for an object reference read from the heap is 615 // implemented as a call to the artReadBarrierSlow runtime entry 616 // point, which is passed the values in locations `ref`, `obj`, and 617 // `offset`: 618 // 619 // mirror::Object* artReadBarrierSlow(mirror::Object* ref, 620 // mirror::Object* obj, 621 // uint32_t offset); 622 // 623 // The `out` location contains the value returned by 624 // artReadBarrierSlow. 625 // 626 // When `index` provided (i.e., when it is different from 627 // Location::NoLocation()), the offset value passed to 628 // artReadBarrierSlow is adjusted to take `index` into account. 629 void GenerateReadBarrierSlow(HInstruction* instruction, 630 Location out, 631 Location ref, 632 Location obj, 633 uint32_t offset, 634 Location index = Location::NoLocation()); 635 636 // If read barriers are enabled, generate a read barrier for a heap 637 // reference using a slow path. If heap poisoning is enabled, also 638 // unpoison the reference in `out`. 639 void MaybeGenerateReadBarrierSlow(HInstruction* instruction, 640 Location out, 641 Location ref, 642 Location obj, 643 uint32_t offset, 644 Location index = Location::NoLocation()); 645 646 // Generate a read barrier for a GC root within `instruction` using 647 // a slow path. 648 // 649 // A read barrier for an object reference GC root is implemented as 650 // a call to the artReadBarrierForRootSlow runtime entry point, 651 // which is passed the value in location `root`: 652 // 653 // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); 654 // 655 // The `out` location contains the value returned by 656 // artReadBarrierForRootSlow. 657 void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); 658 ConstantAreaStart()659 int ConstantAreaStart() const { 660 return constant_area_start_; 661 } 662 663 Address LiteralDoubleAddress(double v); 664 Address LiteralFloatAddress(float v); 665 Address LiteralInt32Address(int32_t v); 666 Address LiteralInt64Address(int64_t v); 667 668 // Load a 32/64-bit value into a register in the most efficient manner. 669 void Load32BitValue(CpuRegister dest, int32_t value); 670 void Load64BitValue(CpuRegister dest, int64_t value); 671 void Load32BitValue(XmmRegister dest, int32_t value); 672 void Load64BitValue(XmmRegister dest, int64_t value); 673 void Load32BitValue(XmmRegister dest, float value); 674 void Load64BitValue(XmmRegister dest, double value); 675 676 // Compare a register with a 32/64-bit value in the most efficient manner. 677 void Compare32BitValue(CpuRegister dest, int32_t value); 678 void Compare64BitValue(CpuRegister dest, int64_t value); 679 680 // Compare int values. Supports register locations for `lhs`. 681 void GenerateIntCompare(Location lhs, Location rhs); 682 void GenerateIntCompare(CpuRegister lhs, Location rhs); 683 684 // Compare long values. Supports only register locations for `lhs`. 685 void GenerateLongCompare(Location lhs, Location rhs); 686 687 // Construct address for array access. 688 static Address ArrayAddress(CpuRegister obj, 689 Location index, 690 ScaleFactor scale, 691 uint32_t data_offset); 692 693 Address LiteralCaseTable(HPackedSwitch* switch_instr); 694 695 // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. 696 void Store64BitValueToStack(Location dest, int64_t value); 697 698 void MoveFromReturnRegister(Location trg, DataType::Type type) override; 699 700 // Assign a 64 bit constant to an address. 701 void MoveInt64ToAddress(const Address& addr_low, 702 const Address& addr_high, 703 int64_t v, 704 HInstruction* instruction); 705 706 // Ensure that prior stores complete to memory before subsequent loads. 707 // The locked add implementation will avoid serializing device memory, but will 708 // touch (but not change) the top of the stack. 709 // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores. 710 void MemoryFence(bool force_mfence = false) { 711 if (!force_mfence) { 712 assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0)); 713 } else { 714 assembler_.mfence(); 715 } 716 } 717 718 void IncreaseFrame(size_t adjustment) override; 719 void DecreaseFrame(size_t adjustment) override; 720 721 void GenerateNop() override; 722 void GenerateImplicitNullCheck(HNullCheck* instruction) override; 723 void GenerateExplicitNullCheck(HNullCheck* instruction) override; 724 void MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister cls); 725 726 void MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry); 727 728 static void BlockNonVolatileXmmRegisters(LocationSummary* locations); 729 730 // When we don't know the proper offset for the value, we use kPlaceholder32BitOffset. 731 // We will fix this up in the linker later to have the right value. 732 static constexpr int32_t kPlaceholder32BitOffset = 256; 733 734 private: 735 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 736 static void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos, 737 ArenaVector<linker::LinkerPatch>* linker_patches); 738 739 // Labels for each block that will be compiled. 740 Label* block_labels_; // Indexed by block id. 741 Label frame_entry_label_; 742 LocationsBuilderX86_64 location_builder_; 743 InstructionCodeGeneratorX86_64 instruction_visitor_; 744 ParallelMoveResolverX86_64 move_resolver_; 745 X86_64Assembler assembler_; 746 747 // Offset to the start of the constant area in the assembled code. 748 // Used for fixups to the constant area. 749 int constant_area_start_; 750 751 // PC-relative method patch info for kBootImageLinkTimePcRelative. 752 ArenaDeque<PatchInfo<Label>> boot_image_method_patches_; 753 // PC-relative method patch info for kAppImageRelRo. 754 ArenaDeque<PatchInfo<Label>> app_image_method_patches_; 755 // PC-relative method patch info for kBssEntry. 756 ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_; 757 // PC-relative type patch info for kBootImageLinkTimePcRelative. 758 ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; 759 // PC-relative type patch info for kAppImageRelRo. 760 ArenaDeque<PatchInfo<Label>> app_image_type_patches_; 761 // PC-relative type patch info for kBssEntry. 762 ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; 763 // PC-relative public type patch info for kBssEntryPublic. 764 ArenaDeque<PatchInfo<Label>> public_type_bss_entry_patches_; 765 // PC-relative package type patch info for kBssEntryPackage. 766 ArenaDeque<PatchInfo<Label>> package_type_bss_entry_patches_; 767 // PC-relative String patch info for kBootImageLinkTimePcRelative. 768 ArenaDeque<PatchInfo<Label>> boot_image_string_patches_; 769 // PC-relative String patch info for kBssEntry. 770 ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_; 771 // PC-relative MethodType patch info for kBssEntry. 772 ArenaDeque<PatchInfo<Label>> method_type_bss_entry_patches_; 773 // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative. 774 ArenaDeque<PatchInfo<Label>> boot_image_jni_entrypoint_patches_; 775 // PC-relative patch info for IntrinsicObjects for the boot image, 776 // and for method/type/string patches for kBootImageRelRo otherwise. 777 ArenaDeque<PatchInfo<Label>> boot_image_other_patches_; 778 779 // Patches for string literals in JIT compiled code. 780 ArenaDeque<PatchInfo<Label>> jit_string_patches_; 781 // Patches for class literals in JIT compiled code. 782 ArenaDeque<PatchInfo<Label>> jit_class_patches_; 783 // Patches for method type in JIT compiled code. 784 ArenaDeque<PatchInfo<Label>> jit_method_type_patches_; 785 786 // Fixups for jump tables need to be handled specially. 787 ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; 788 789 DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); 790 }; 791 792 } // namespace x86_64 793 } // namespace art 794 795 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 796