1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 19 20 #include "arch/x86_64/instruction_set_features_x86_64.h" 21 #include "base/macros.h" 22 #include "code_generator.h" 23 #include "driver/compiler_options.h" 24 #include "nodes.h" 25 #include "parallel_move_resolver.h" 26 #include "utils/x86_64/assembler_x86_64.h" 27 28 namespace art HIDDEN { 29 namespace x86_64 { 30 31 // Use a local definition to prevent copying mistakes. 32 static constexpr size_t kX86_64WordSize = static_cast<size_t>(kX86_64PointerSize); 33 34 // Some x86_64 instructions require a register to be available as temp. 35 static constexpr Register TMP = R11; 36 37 static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 }; 38 static constexpr FloatRegister kParameterFloatRegisters[] = 39 { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 }; 40 41 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); 42 static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters); 43 44 static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX, RCX }; 45 static constexpr size_t kRuntimeParameterCoreRegistersLength = 46 arraysize(kRuntimeParameterCoreRegisters); 47 static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; 48 static constexpr size_t kRuntimeParameterFpuRegistersLength = 49 arraysize(kRuntimeParameterFpuRegisters); 50 51 // These XMM registers are non-volatile in ART ABI, but volatile in native ABI. 52 // If the ART ABI changes, this list must be updated. It is used to ensure that 53 // these are not clobbered by any direct call to native code (such as math intrinsics). 54 static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 }; 55 56 #define UNIMPLEMENTED_INTRINSIC_LIST_X86_64(V) \ 57 V(CRC32Update) \ 58 V(CRC32UpdateBytes) \ 59 V(CRC32UpdateByteBuffer) \ 60 V(FP16ToFloat) \ 61 V(FP16ToHalf) \ 62 V(FP16Floor) \ 63 V(FP16Ceil) \ 64 V(FP16Rint) \ 65 V(FP16Greater) \ 66 V(FP16GreaterEquals) \ 67 V(FP16Less) \ 68 V(FP16LessEquals) \ 69 V(FP16Compare) \ 70 V(FP16Min) \ 71 V(FP16Max) \ 72 V(StringStringIndexOf) \ 73 V(StringStringIndexOfAfter) \ 74 V(StringBufferAppend) \ 75 V(StringBufferLength) \ 76 V(StringBufferToString) \ 77 V(StringBuilderAppendObject) \ 78 V(StringBuilderAppendString) \ 79 V(StringBuilderAppendCharSequence) \ 80 V(StringBuilderAppendCharArray) \ 81 V(StringBuilderAppendBoolean) \ 82 V(StringBuilderAppendChar) \ 83 V(StringBuilderAppendInt) \ 84 V(StringBuilderAppendLong) \ 85 V(StringBuilderAppendFloat) \ 86 V(StringBuilderAppendDouble) \ 87 V(StringBuilderLength) \ 88 V(StringBuilderToString) \ 89 /* 1.8 */ \ 90 V(UnsafeGetAndAddInt) \ 91 V(UnsafeGetAndAddLong) \ 92 V(UnsafeGetAndSetInt) \ 93 V(UnsafeGetAndSetLong) \ 94 V(UnsafeGetAndSetObject) \ 95 V(MethodHandleInvokeExact) \ 96 V(MethodHandleInvoke) \ 97 /* OpenJDK 11 */ \ 98 V(JdkUnsafeGetAndAddInt) \ 99 V(JdkUnsafeGetAndAddLong) \ 100 V(JdkUnsafeGetAndSetInt) \ 101 V(JdkUnsafeGetAndSetLong) \ 102 V(JdkUnsafeGetAndSetObject) 103 104 class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { 105 public: InvokeRuntimeCallingConvention()106 InvokeRuntimeCallingConvention() 107 : CallingConvention(kRuntimeParameterCoreRegisters, 108 kRuntimeParameterCoreRegistersLength, 109 kRuntimeParameterFpuRegisters, 110 kRuntimeParameterFpuRegistersLength, 111 kX86_64PointerSize) {} 112 113 private: 114 DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); 115 }; 116 117 class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> { 118 public: InvokeDexCallingConvention()119 InvokeDexCallingConvention() : CallingConvention( 120 kParameterCoreRegisters, 121 kParameterCoreRegistersLength, 122 kParameterFloatRegisters, 123 kParameterFloatRegistersLength, 124 kX86_64PointerSize) {} 125 126 private: 127 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); 128 }; 129 130 class CriticalNativeCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor { 131 public: CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation)132 explicit CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation) 133 : for_register_allocation_(for_register_allocation) {} 134 ~CriticalNativeCallingConventionVisitorX86_64()135 virtual ~CriticalNativeCallingConventionVisitorX86_64() {} 136 137 Location GetNextLocation(DataType::Type type) override; 138 Location GetReturnLocation(DataType::Type type) const override; 139 Location GetMethodLocation() const override; 140 GetStackOffset()141 size_t GetStackOffset() const { return stack_offset_; } 142 143 private: 144 // Register allocator does not support adjusting frame size, so we cannot provide final locations 145 // of stack arguments for register allocation. We ask the register allocator for any location and 146 // move these arguments to the right place after adjusting the SP when generating the call. 147 const bool for_register_allocation_; 148 size_t gpr_index_ = 0u; 149 size_t fpr_index_ = 0u; 150 size_t stack_offset_ = 0u; 151 152 DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorX86_64); 153 }; 154 155 class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention { 156 public: FieldAccessCallingConventionX86_64()157 FieldAccessCallingConventionX86_64() {} 158 GetObjectLocation()159 Location GetObjectLocation() const override { 160 return Location::RegisterLocation(RSI); 161 } GetFieldIndexLocation()162 Location GetFieldIndexLocation() const override { 163 return Location::RegisterLocation(RDI); 164 } GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED)165 Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { 166 return Location::RegisterLocation(RAX); 167 } GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED,bool is_instance)168 Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, bool is_instance) 169 const override { 170 return is_instance 171 ? Location::RegisterLocation(RDX) 172 : Location::RegisterLocation(RSI); 173 } GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED)174 Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { 175 return Location::FpuRegisterLocation(XMM0); 176 } 177 178 private: 179 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86_64); 180 }; 181 182 183 class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor { 184 public: InvokeDexCallingConventionVisitorX86_64()185 InvokeDexCallingConventionVisitorX86_64() {} ~InvokeDexCallingConventionVisitorX86_64()186 virtual ~InvokeDexCallingConventionVisitorX86_64() {} 187 188 Location GetNextLocation(DataType::Type type) override; 189 Location GetReturnLocation(DataType::Type type) const override; 190 Location GetMethodLocation() const override; 191 192 private: 193 InvokeDexCallingConvention calling_convention; 194 195 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64); 196 }; 197 198 class CodeGeneratorX86_64; 199 200 class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap { 201 public: ParallelMoveResolverX86_64(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen)202 ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen) 203 : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} 204 205 void EmitMove(size_t index) override; 206 void EmitSwap(size_t index) override; 207 void SpillScratch(int reg) override; 208 void RestoreScratch(int reg) override; 209 210 X86_64Assembler* GetAssembler() const; 211 212 private: 213 void Exchange32(CpuRegister reg, int mem); 214 void Exchange32(XmmRegister reg, int mem); 215 void Exchange64(CpuRegister reg1, CpuRegister reg2); 216 void Exchange64(CpuRegister reg, int mem); 217 void Exchange64(XmmRegister reg, int mem); 218 void Exchange128(XmmRegister reg, int mem); 219 void ExchangeMemory32(int mem1, int mem2); 220 void ExchangeMemory64(int mem1, int mem2, int num_of_qwords); 221 222 CodeGeneratorX86_64* const codegen_; 223 224 DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86_64); 225 }; 226 227 class LocationsBuilderX86_64 : public HGraphVisitor { 228 public: LocationsBuilderX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)229 LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen) 230 : HGraphVisitor(graph), codegen_(codegen) {} 231 232 #define DECLARE_VISIT_INSTRUCTION(name, super) \ 233 void Visit##name(H##name* instr) override; 234 235 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)236 FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) 237 FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) 238 239 #undef DECLARE_VISIT_INSTRUCTION 240 241 void VisitInstruction(HInstruction* instruction) override { 242 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 243 << " (id " << instruction->GetId() << ")"; 244 } 245 246 private: 247 void HandleInvoke(HInvoke* invoke); 248 void HandleBitwiseOperation(HBinaryOperation* operation); 249 void HandleCondition(HCondition* condition); 250 void HandleShift(HBinaryOperation* operation); 251 void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); 252 void HandleFieldGet(HInstruction* instruction); 253 bool CpuHasAvxFeatureFlag(); 254 bool CpuHasAvx2FeatureFlag(); 255 256 CodeGeneratorX86_64* const codegen_; 257 InvokeDexCallingConventionVisitorX86_64 parameter_visitor_; 258 259 DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64); 260 }; 261 262 class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { 263 public: 264 InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen); 265 266 #define DECLARE_VISIT_INSTRUCTION(name, super) \ 267 void Visit##name(H##name* instr) override; 268 269 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)270 FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) 271 FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) 272 273 #undef DECLARE_VISIT_INSTRUCTION 274 275 void VisitInstruction(HInstruction* instruction) override { 276 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 277 << " (id " << instruction->GetId() << ")"; 278 } 279 GetAssembler()280 X86_64Assembler* GetAssembler() const { return assembler_; } 281 282 // Generate a GC root reference load: 283 // 284 // root <- *address 285 // 286 // while honoring read barriers based on read_barrier_option. 287 void GenerateGcRootFieldLoad(HInstruction* instruction, 288 Location root, 289 const Address& address, 290 Label* fixup_label, 291 ReadBarrierOption read_barrier_option); 292 void HandleFieldSet(HInstruction* instruction, 293 uint32_t value_index, 294 uint32_t extra_temp_index, 295 DataType::Type field_type, 296 Address field_addr, 297 CpuRegister base, 298 bool is_volatile, 299 bool is_atomic, 300 bool value_can_be_null, 301 bool byte_swap, 302 WriteBarrierKind write_barrier_kind); 303 304 void Bswap(Location value, DataType::Type type, CpuRegister* temp = nullptr); 305 306 private: 307 // Generate code for the given suspend check. If not null, `successor` 308 // is the block to branch to if the suspend check is not needed, and after 309 // the suspend call. 310 void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); 311 void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg); 312 void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp); 313 void HandleBitwiseOperation(HBinaryOperation* operation); 314 void GenerateRemFP(HRem* rem); 315 void DivRemOneOrMinusOne(HBinaryOperation* instruction); 316 void DivByPowerOfTwo(HDiv* instruction); 317 void RemByPowerOfTwo(HRem* instruction); 318 void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); 319 void GenerateDivRemIntegral(HBinaryOperation* instruction); 320 void HandleCondition(HCondition* condition); 321 void HandleShift(HBinaryOperation* operation); 322 323 void HandleFieldSet(HInstruction* instruction, 324 const FieldInfo& field_info, 325 bool value_can_be_null, 326 WriteBarrierKind write_barrier_kind); 327 void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); 328 329 void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); 330 void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); 331 void GenerateMinMax(HBinaryOperation* minmax, bool is_min); 332 void GenerateMethodEntryExitHook(HInstruction* instruction); 333 334 // Generate a heap reference load using one register `out`: 335 // 336 // out <- *(out + offset) 337 // 338 // while honoring heap poisoning and/or read barriers (if any). 339 // 340 // Location `maybe_temp` is used when generating a read barrier and 341 // shall be a register in that case; it may be an invalid location 342 // otherwise. 343 void GenerateReferenceLoadOneRegister(HInstruction* instruction, 344 Location out, 345 uint32_t offset, 346 Location maybe_temp, 347 ReadBarrierOption read_barrier_option); 348 // Generate a heap reference load using two different registers 349 // `out` and `obj`: 350 // 351 // out <- *(obj + offset) 352 // 353 // while honoring heap poisoning and/or read barriers (if any). 354 // 355 // Location `maybe_temp` is used when generating a Baker's (fast 356 // path) read barrier and shall be a register in that case; it may 357 // be an invalid location otherwise. 358 void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, 359 Location out, 360 Location obj, 361 uint32_t offset, 362 ReadBarrierOption read_barrier_option); 363 364 void PushOntoFPStack(Location source, uint32_t temp_offset, 365 uint32_t stack_adjustment, bool is_float); 366 void GenerateCompareTest(HCondition* condition); 367 template<class LabelType> 368 void GenerateTestAndBranch(HInstruction* instruction, 369 size_t condition_input_index, 370 LabelType* true_target, 371 LabelType* false_target); 372 template<class LabelType> 373 void GenerateCompareTestAndBranch(HCondition* condition, 374 LabelType* true_target, 375 LabelType* false_target); 376 template<class LabelType> 377 void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label); 378 379 void HandleGoto(HInstruction* got, HBasicBlock* successor); 380 381 bool CpuHasAvxFeatureFlag(); 382 bool CpuHasAvx2FeatureFlag(); 383 384 X86_64Assembler* const assembler_; 385 CodeGeneratorX86_64* const codegen_; 386 387 DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64); 388 }; 389 390 // Class for fixups to jump tables. 391 class JumpTableRIPFixup; 392 393 class CodeGeneratorX86_64 : public CodeGenerator { 394 public: 395 CodeGeneratorX86_64(HGraph* graph, 396 const CompilerOptions& compiler_options, 397 OptimizingCompilerStats* stats = nullptr); ~CodeGeneratorX86_64()398 virtual ~CodeGeneratorX86_64() {} 399 400 void GenerateFrameEntry() override; 401 void GenerateFrameExit() override; 402 void Bind(HBasicBlock* block) override; 403 void MoveConstant(Location destination, int32_t value) override; 404 void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; 405 void AddLocationAsTemp(Location location, LocationSummary* locations) override; 406 407 size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; 408 size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; 409 size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; 410 size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; 411 412 // Generate code to invoke a runtime entry point. 413 void InvokeRuntime(QuickEntrypointEnum entrypoint, 414 HInstruction* instruction, 415 uint32_t dex_pc, 416 SlowPathCode* slow_path = nullptr) override; 417 418 // Generate code to invoke a runtime entry point, but do not record 419 // PC-related information in a stack map. 420 void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 421 HInstruction* instruction, 422 SlowPathCode* slow_path); 423 424 void GenerateInvokeRuntime(int32_t entry_point_offset); 425 GetWordSize()426 size_t GetWordSize() const override { 427 return kX86_64WordSize; 428 } 429 GetSlowPathFPWidth()430 size_t GetSlowPathFPWidth() const override { 431 return GetGraph()->HasSIMD() 432 ? GetSIMDRegisterWidth() 433 : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill 434 } 435 GetCalleePreservedFPWidth()436 size_t GetCalleePreservedFPWidth() const override { 437 return 1 * kX86_64WordSize; 438 } 439 GetSIMDRegisterWidth()440 size_t GetSIMDRegisterWidth() const override { 441 return 2 * kX86_64WordSize; 442 } 443 GetLocationBuilder()444 HGraphVisitor* GetLocationBuilder() override { 445 return &location_builder_; 446 } 447 GetInstructionVisitor()448 HGraphVisitor* GetInstructionVisitor() override { 449 return &instruction_visitor_; 450 } 451 GetAssembler()452 X86_64Assembler* GetAssembler() override { 453 return &assembler_; 454 } 455 GetAssembler()456 const X86_64Assembler& GetAssembler() const override { 457 return assembler_; 458 } 459 GetMoveResolver()460 ParallelMoveResolverX86_64* GetMoveResolver() override { 461 return &move_resolver_; 462 } 463 GetAddressOf(HBasicBlock * block)464 uintptr_t GetAddressOf(HBasicBlock* block) override { 465 return GetLabelOf(block)->Position(); 466 } 467 468 void SetupBlockedRegisters() const override; 469 void DumpCoreRegister(std::ostream& stream, int reg) const override; 470 void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; 471 void Finalize(CodeAllocator* allocator) override; 472 GetInstructionSet()473 InstructionSet GetInstructionSet() const override { 474 return InstructionSet::kX86_64; 475 } 476 GetInstructionCodegen()477 InstructionCodeGeneratorX86_64* GetInstructionCodegen() { 478 return down_cast<InstructionCodeGeneratorX86_64*>(GetInstructionVisitor()); 479 } 480 481 const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const; 482 483 // Emit a write barrier. 484 void MarkGCCard(CpuRegister temp, 485 CpuRegister card, 486 CpuRegister object, 487 CpuRegister value, 488 bool emit_null_check); 489 490 void GenerateMemoryBarrier(MemBarrierKind kind); 491 492 // Helper method to move a value between two locations. 493 void Move(Location destination, Location source); 494 // Helper method to load a value of non-reference type from memory. 495 void LoadFromMemoryNoReference(DataType::Type type, Location dst, Address src); 496 GetLabelOf(HBasicBlock * block)497 Label* GetLabelOf(HBasicBlock* block) const { 498 return CommonGetLabelOf<Label>(block_labels_, block); 499 } 500 Initialize()501 void Initialize() override { 502 block_labels_ = CommonInitializeLabels<Label>(); 503 } 504 NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED)505 bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override { 506 return false; 507 } 508 509 // Check if the desired_string_load_kind is supported. If it is, return it, 510 // otherwise return a fall-back kind that should be used instead. 511 HLoadString::LoadKind GetSupportedLoadStringKind( 512 HLoadString::LoadKind desired_string_load_kind) override; 513 514 // Check if the desired_class_load_kind is supported. If it is, return it, 515 // otherwise return a fall-back kind that should be used instead. 516 HLoadClass::LoadKind GetSupportedLoadClassKind( 517 HLoadClass::LoadKind desired_class_load_kind) override; 518 519 // Check if the desired_dispatch_info is supported. If it is, return it, 520 // otherwise return a fall-back info that should be used instead. 521 HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 522 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 523 ArtMethod* method) override; 524 525 void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke); 526 void GenerateStaticOrDirectCall( 527 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; 528 void GenerateVirtualCall( 529 HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; 530 531 void RecordBootImageIntrinsicPatch(uint32_t intrinsic_data); 532 void RecordBootImageRelRoPatch(uint32_t boot_image_offset); 533 void RecordBootImageMethodPatch(HInvoke* invoke); 534 void RecordMethodBssEntryPatch(HInvoke* invoke); 535 void RecordBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); 536 Label* NewTypeBssEntryPatch(HLoadClass* load_class); 537 void RecordBootImageStringPatch(HLoadString* load_string); 538 Label* NewStringBssEntryPatch(HLoadString* load_string); 539 void RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke); 540 Label* NewJitRootStringPatch(const DexFile& dex_file, 541 dex::StringIndex string_index, 542 Handle<mirror::String> handle); 543 Label* NewJitRootClassPatch(const DexFile& dex_file, 544 dex::TypeIndex type_index, 545 Handle<mirror::Class> handle); 546 547 void LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference); 548 void LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke); 549 void LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root); 550 551 void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; 552 553 void PatchJitRootUse(uint8_t* code, 554 const uint8_t* roots_data, 555 const PatchInfo<Label>& info, 556 uint64_t index_in_table) const; 557 558 void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; 559 560 // Fast path implementation of ReadBarrier::Barrier for a heap 561 // reference field load when Baker's read barriers are used. 562 void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 563 Location ref, 564 CpuRegister obj, 565 uint32_t offset, 566 bool needs_null_check); 567 // Fast path implementation of ReadBarrier::Barrier for a heap 568 // reference array load when Baker's read barriers are used. 569 void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 570 Location ref, 571 CpuRegister obj, 572 uint32_t data_offset, 573 Location index, 574 bool needs_null_check); 575 // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, 576 // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. 577 // 578 // Load the object reference located at address `src`, held by 579 // object `obj`, into `ref`, and mark it if needed. The base of 580 // address `src` must be `obj`. 581 // 582 // If `always_update_field` is true, the value of the reference is 583 // atomically updated in the holder (`obj`). This operation 584 // requires two temporary registers, which must be provided as 585 // non-null pointers (`temp1` and `temp2`). 586 void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 587 Location ref, 588 CpuRegister obj, 589 const Address& src, 590 bool needs_null_check, 591 bool always_update_field = false, 592 CpuRegister* temp1 = nullptr, 593 CpuRegister* temp2 = nullptr); 594 595 // Generate a read barrier for a heap reference within `instruction` 596 // using a slow path. 597 // 598 // A read barrier for an object reference read from the heap is 599 // implemented as a call to the artReadBarrierSlow runtime entry 600 // point, which is passed the values in locations `ref`, `obj`, and 601 // `offset`: 602 // 603 // mirror::Object* artReadBarrierSlow(mirror::Object* ref, 604 // mirror::Object* obj, 605 // uint32_t offset); 606 // 607 // The `out` location contains the value returned by 608 // artReadBarrierSlow. 609 // 610 // When `index` provided (i.e., when it is different from 611 // Location::NoLocation()), the offset value passed to 612 // artReadBarrierSlow is adjusted to take `index` into account. 613 void GenerateReadBarrierSlow(HInstruction* instruction, 614 Location out, 615 Location ref, 616 Location obj, 617 uint32_t offset, 618 Location index = Location::NoLocation()); 619 620 // If read barriers are enabled, generate a read barrier for a heap 621 // reference using a slow path. If heap poisoning is enabled, also 622 // unpoison the reference in `out`. 623 void MaybeGenerateReadBarrierSlow(HInstruction* instruction, 624 Location out, 625 Location ref, 626 Location obj, 627 uint32_t offset, 628 Location index = Location::NoLocation()); 629 630 // Generate a read barrier for a GC root within `instruction` using 631 // a slow path. 632 // 633 // A read barrier for an object reference GC root is implemented as 634 // a call to the artReadBarrierForRootSlow runtime entry point, 635 // which is passed the value in location `root`: 636 // 637 // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); 638 // 639 // The `out` location contains the value returned by 640 // artReadBarrierForRootSlow. 641 void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); 642 ConstantAreaStart()643 int ConstantAreaStart() const { 644 return constant_area_start_; 645 } 646 647 Address LiteralDoubleAddress(double v); 648 Address LiteralFloatAddress(float v); 649 Address LiteralInt32Address(int32_t v); 650 Address LiteralInt64Address(int64_t v); 651 652 // Load a 32/64-bit value into a register in the most efficient manner. 653 void Load32BitValue(CpuRegister dest, int32_t value); 654 void Load64BitValue(CpuRegister dest, int64_t value); 655 void Load32BitValue(XmmRegister dest, int32_t value); 656 void Load64BitValue(XmmRegister dest, int64_t value); 657 void Load32BitValue(XmmRegister dest, float value); 658 void Load64BitValue(XmmRegister dest, double value); 659 660 // Compare a register with a 32/64-bit value in the most efficient manner. 661 void Compare32BitValue(CpuRegister dest, int32_t value); 662 void Compare64BitValue(CpuRegister dest, int64_t value); 663 664 // Compare int values. Supports register locations for `lhs`. 665 void GenerateIntCompare(Location lhs, Location rhs); 666 void GenerateIntCompare(CpuRegister lhs, Location rhs); 667 668 // Compare long values. Supports only register locations for `lhs`. 669 void GenerateLongCompare(Location lhs, Location rhs); 670 671 // Construct address for array access. 672 static Address ArrayAddress(CpuRegister obj, 673 Location index, 674 ScaleFactor scale, 675 uint32_t data_offset); 676 677 Address LiteralCaseTable(HPackedSwitch* switch_instr); 678 679 // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. 680 void Store64BitValueToStack(Location dest, int64_t value); 681 682 void MoveFromReturnRegister(Location trg, DataType::Type type) override; 683 684 // Assign a 64 bit constant to an address. 685 void MoveInt64ToAddress(const Address& addr_low, 686 const Address& addr_high, 687 int64_t v, 688 HInstruction* instruction); 689 690 // Ensure that prior stores complete to memory before subsequent loads. 691 // The locked add implementation will avoid serializing device memory, but will 692 // touch (but not change) the top of the stack. 693 // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores. 694 void MemoryFence(bool force_mfence = false) { 695 if (!force_mfence) { 696 assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0)); 697 } else { 698 assembler_.mfence(); 699 } 700 } 701 702 void IncreaseFrame(size_t adjustment) override; 703 void DecreaseFrame(size_t adjustment) override; 704 705 void GenerateNop() override; 706 void GenerateImplicitNullCheck(HNullCheck* instruction) override; 707 void GenerateExplicitNullCheck(HNullCheck* instruction) override; 708 void MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister cls); 709 710 void MaybeIncrementHotness(bool is_frame_entry); 711 712 static void BlockNonVolatileXmmRegisters(LocationSummary* locations); 713 714 // When we don't know the proper offset for the value, we use kPlaceholder32BitOffset. 715 // We will fix this up in the linker later to have the right value. 716 static constexpr int32_t kPlaceholder32BitOffset = 256; 717 718 private: 719 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 720 static void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos, 721 ArenaVector<linker::LinkerPatch>* linker_patches); 722 723 // Labels for each block that will be compiled. 724 Label* block_labels_; // Indexed by block id. 725 Label frame_entry_label_; 726 LocationsBuilderX86_64 location_builder_; 727 InstructionCodeGeneratorX86_64 instruction_visitor_; 728 ParallelMoveResolverX86_64 move_resolver_; 729 X86_64Assembler assembler_; 730 731 // Offset to the start of the constant area in the assembled code. 732 // Used for fixups to the constant area. 733 int constant_area_start_; 734 735 // PC-relative method patch info for kBootImageLinkTimePcRelative. 736 ArenaDeque<PatchInfo<Label>> boot_image_method_patches_; 737 // PC-relative method patch info for kBssEntry. 738 ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_; 739 // PC-relative type patch info for kBootImageLinkTimePcRelative. 740 ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; 741 // PC-relative type patch info for kBssEntry. 742 ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; 743 // PC-relative public type patch info for kBssEntryPublic. 744 ArenaDeque<PatchInfo<Label>> public_type_bss_entry_patches_; 745 // PC-relative package type patch info for kBssEntryPackage. 746 ArenaDeque<PatchInfo<Label>> package_type_bss_entry_patches_; 747 // PC-relative String patch info for kBootImageLinkTimePcRelative. 748 ArenaDeque<PatchInfo<Label>> boot_image_string_patches_; 749 // PC-relative String patch info for kBssEntry. 750 ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_; 751 // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative. 752 ArenaDeque<PatchInfo<Label>> boot_image_jni_entrypoint_patches_; 753 // PC-relative patch info for IntrinsicObjects for the boot image, 754 // and for method/type/string patches for kBootImageRelRo otherwise. 755 ArenaDeque<PatchInfo<Label>> boot_image_other_patches_; 756 757 // Patches for string literals in JIT compiled code. 758 ArenaDeque<PatchInfo<Label>> jit_string_patches_; 759 // Patches for class literals in JIT compiled code. 760 ArenaDeque<PatchInfo<Label>> jit_class_patches_; 761 762 // Fixups for jump tables need to be handled specially. 763 ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; 764 765 DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); 766 }; 767 768 } // namespace x86_64 769 } // namespace art 770 771 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 772