1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 19 20 #include "arch/instruction_set.h" 21 #include "arch/instruction_set_features.h" 22 #include "base/arena_containers.h" 23 #include "base/arena_object.h" 24 #include "base/bit_field.h" 25 #include "base/bit_utils.h" 26 #include "base/enums.h" 27 #include "globals.h" 28 #include "graph_visualizer.h" 29 #include "locations.h" 30 #include "memory_region.h" 31 #include "nodes.h" 32 #include "optimizing_compiler_stats.h" 33 #include "read_barrier_option.h" 34 #include "stack.h" 35 #include "stack_map_stream.h" 36 #include "string_reference.h" 37 #include "type_reference.h" 38 #include "utils/label.h" 39 40 namespace art { 41 42 // Binary encoding of 2^32 for type double. 43 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); 44 // Binary encoding of 2^31 for type double. 45 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); 46 47 // Minimum value for a primitive integer. 48 static int32_t constexpr kPrimIntMin = 0x80000000; 49 // Minimum value for a primitive long. 50 static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000); 51 52 // Maximum value for a primitive integer. 53 static int32_t constexpr kPrimIntMax = 0x7fffffff; 54 // Maximum value for a primitive long. 55 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); 56 57 static constexpr ReadBarrierOption kCompilerReadBarrierOption = 58 kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier; 59 60 class Assembler; 61 class CodeGenerator; 62 class CompilerDriver; 63 class CompilerOptions; 64 class LinkerPatch; 65 class ParallelMoveResolver; 66 67 class CodeAllocator { 68 public: CodeAllocator()69 CodeAllocator() {} ~CodeAllocator()70 virtual ~CodeAllocator() {} 71 72 virtual uint8_t* Allocate(size_t size) = 0; 73 74 private: 75 DISALLOW_COPY_AND_ASSIGN(CodeAllocator); 76 }; 77 78 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> { 79 public: SlowPathCode(HInstruction * instruction)80 explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) { 81 for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) { 82 saved_core_stack_offsets_[i] = kRegisterNotSaved; 83 saved_fpu_stack_offsets_[i] = kRegisterNotSaved; 84 } 85 } 86 ~SlowPathCode()87 virtual ~SlowPathCode() {} 88 89 virtual void EmitNativeCode(CodeGenerator* codegen) = 0; 90 91 // Save live core and floating-point caller-save registers and 92 // update the stack mask in `locations` for registers holding object 93 // references. 94 virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 95 // Restore live core and floating-point caller-save registers. 96 virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 97 IsCoreRegisterSaved(int reg)98 bool IsCoreRegisterSaved(int reg) const { 99 return saved_core_stack_offsets_[reg] != kRegisterNotSaved; 100 } 101 IsFpuRegisterSaved(int reg)102 bool IsFpuRegisterSaved(int reg) const { 103 return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved; 104 } 105 GetStackOffsetOfCoreRegister(int reg)106 uint32_t GetStackOffsetOfCoreRegister(int reg) const { 107 return saved_core_stack_offsets_[reg]; 108 } 109 GetStackOffsetOfFpuRegister(int reg)110 uint32_t GetStackOffsetOfFpuRegister(int reg) const { 111 return saved_fpu_stack_offsets_[reg]; 112 } 113 IsFatal()114 virtual bool IsFatal() const { return false; } 115 116 virtual const char* GetDescription() const = 0; 117 GetEntryLabel()118 Label* GetEntryLabel() { return &entry_label_; } GetExitLabel()119 Label* GetExitLabel() { return &exit_label_; } 120 GetInstruction()121 HInstruction* GetInstruction() const { 122 return instruction_; 123 } 124 GetDexPc()125 uint32_t GetDexPc() const { 126 return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc; 127 } 128 129 protected: 130 static constexpr size_t kMaximumNumberOfExpectedRegisters = 32; 131 static constexpr uint32_t kRegisterNotSaved = -1; 132 // The instruction where this slow path is happening. 133 HInstruction* instruction_; 134 uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 135 uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 136 137 private: 138 Label entry_label_; 139 Label exit_label_; 140 141 DISALLOW_COPY_AND_ASSIGN(SlowPathCode); 142 }; 143 144 class InvokeDexCallingConventionVisitor { 145 public: 146 virtual Location GetNextLocation(Primitive::Type type) = 0; 147 virtual Location GetReturnLocation(Primitive::Type type) const = 0; 148 virtual Location GetMethodLocation() const = 0; 149 150 protected: InvokeDexCallingConventionVisitor()151 InvokeDexCallingConventionVisitor() {} ~InvokeDexCallingConventionVisitor()152 virtual ~InvokeDexCallingConventionVisitor() {} 153 154 // The current index for core registers. 155 uint32_t gp_index_ = 0u; 156 // The current index for floating-point registers. 157 uint32_t float_index_ = 0u; 158 // The current stack index. 159 uint32_t stack_index_ = 0u; 160 161 private: 162 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); 163 }; 164 165 class FieldAccessCallingConvention { 166 public: 167 virtual Location GetObjectLocation() const = 0; 168 virtual Location GetFieldIndexLocation() const = 0; 169 virtual Location GetReturnLocation(Primitive::Type type) const = 0; 170 virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0; 171 virtual Location GetFpuLocation(Primitive::Type type) const = 0; ~FieldAccessCallingConvention()172 virtual ~FieldAccessCallingConvention() {} 173 174 protected: FieldAccessCallingConvention()175 FieldAccessCallingConvention() {} 176 177 private: 178 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention); 179 }; 180 181 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { 182 public: 183 // Compiles the graph to executable instructions. 184 void Compile(CodeAllocator* allocator); 185 static std::unique_ptr<CodeGenerator> Create(HGraph* graph, 186 InstructionSet instruction_set, 187 const InstructionSetFeatures& isa_features, 188 const CompilerOptions& compiler_options, 189 OptimizingCompilerStats* stats = nullptr); ~CodeGenerator()190 virtual ~CodeGenerator() {} 191 192 // Get the graph. This is the outermost graph, never the graph of a method being inlined. GetGraph()193 HGraph* GetGraph() const { return graph_; } 194 195 HBasicBlock* GetNextBlockToEmit() const; 196 HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; 197 bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; 198 GetStackSlotOfParameter(HParameterValue * parameter)199 size_t GetStackSlotOfParameter(HParameterValue* parameter) const { 200 // Note that this follows the current calling convention. 201 return GetFrameSize() 202 + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet())) // Art method 203 + parameter->GetIndex() * kVRegSize; 204 } 205 206 virtual void Initialize() = 0; 207 virtual void Finalize(CodeAllocator* allocator); 208 virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches); 209 virtual void GenerateFrameEntry() = 0; 210 virtual void GenerateFrameExit() = 0; 211 virtual void Bind(HBasicBlock* block) = 0; 212 virtual void MoveConstant(Location destination, int32_t value) = 0; 213 virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0; 214 virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0; 215 216 virtual Assembler* GetAssembler() = 0; 217 virtual const Assembler& GetAssembler() const = 0; 218 virtual size_t GetWordSize() const = 0; 219 virtual size_t GetFloatingPointSpillSlotSize() const = 0; 220 virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0; 221 void InitializeCodeGeneration(size_t number_of_spill_slots, 222 size_t maximum_safepoint_spill_size, 223 size_t number_of_out_slots, 224 const ArenaVector<HBasicBlock*>& block_order); 225 // Backends can override this as necessary. For most, no special alignment is required. GetPreferredSlotsAlignment()226 virtual uint32_t GetPreferredSlotsAlignment() const { return 1; } 227 GetFrameSize()228 uint32_t GetFrameSize() const { return frame_size_; } SetFrameSize(uint32_t size)229 void SetFrameSize(uint32_t size) { frame_size_ = size; } GetCoreSpillMask()230 uint32_t GetCoreSpillMask() const { return core_spill_mask_; } GetFpuSpillMask()231 uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } 232 GetNumberOfCoreRegisters()233 size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } GetNumberOfFloatingPointRegisters()234 size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } 235 virtual void SetupBlockedRegisters() const = 0; 236 ComputeSpillMask()237 virtual void ComputeSpillMask() { 238 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; 239 DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; 240 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; 241 } 242 ComputeRegisterMask(const int * registers,size_t length)243 static uint32_t ComputeRegisterMask(const int* registers, size_t length) { 244 uint32_t mask = 0; 245 for (size_t i = 0, e = length; i < e; ++i) { 246 mask |= (1 << registers[i]); 247 } 248 return mask; 249 } 250 251 virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; 252 virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; 253 virtual InstructionSet GetInstructionSet() const = 0; 254 GetCompilerOptions()255 const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } 256 257 void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const; 258 259 // Saves the register in the stack. Returns the size taken on stack. 260 virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 261 // Restores the register from the stack. Returns the size taken on stack. 262 virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 263 264 virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 265 virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 266 267 virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; 268 // Returns whether we should split long moves in parallel moves. ShouldSplitLongMoves()269 virtual bool ShouldSplitLongMoves() const { return false; } 270 GetNumberOfCoreCalleeSaveRegisters()271 size_t GetNumberOfCoreCalleeSaveRegisters() const { 272 return POPCOUNT(core_callee_save_mask_); 273 } 274 GetNumberOfCoreCallerSaveRegisters()275 size_t GetNumberOfCoreCallerSaveRegisters() const { 276 DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters()); 277 return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters(); 278 } 279 IsCoreCalleeSaveRegister(int reg)280 bool IsCoreCalleeSaveRegister(int reg) const { 281 return (core_callee_save_mask_ & (1 << reg)) != 0; 282 } 283 IsFloatingPointCalleeSaveRegister(int reg)284 bool IsFloatingPointCalleeSaveRegister(int reg) const { 285 return (fpu_callee_save_mask_ & (1 << reg)) != 0; 286 } 287 GetSlowPathSpills(LocationSummary * locations,bool core_registers)288 uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const { 289 DCHECK(locations->OnlyCallsOnSlowPath() || 290 (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() && 291 !locations->HasCustomSlowPathCallingConvention())); 292 uint32_t live_registers = core_registers 293 ? locations->GetLiveRegisters()->GetCoreRegisters() 294 : locations->GetLiveRegisters()->GetFloatingPointRegisters(); 295 if (locations->HasCustomSlowPathCallingConvention()) { 296 // Save only the live registers that the custom calling convention wants us to save. 297 uint32_t caller_saves = core_registers 298 ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters() 299 : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters(); 300 return live_registers & caller_saves; 301 } else { 302 // Default ABI, we need to spill non-callee-save live registers. 303 uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_; 304 return live_registers & ~callee_saves; 305 } 306 } 307 GetNumberOfSlowPathSpills(LocationSummary * locations,bool core_registers)308 size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const { 309 return POPCOUNT(GetSlowPathSpills(locations, core_registers)); 310 } 311 GetStackOffsetOfShouldDeoptimizeFlag()312 size_t GetStackOffsetOfShouldDeoptimizeFlag() const { 313 DCHECK(GetGraph()->HasShouldDeoptimizeFlag()); 314 DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize); 315 return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize; 316 } 317 318 // Record native to dex mapping for a suspend point. Required by runtime. 319 void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); 320 // Check whether we have already recorded mapping at this PC. 321 bool HasStackMapAtCurrentPc(); 322 // Record extra stack maps if we support native debugging. 323 void MaybeRecordNativeDebugInfo(HInstruction* instruction, 324 uint32_t dex_pc, 325 SlowPathCode* slow_path = nullptr); 326 327 bool CanMoveNullCheckToUser(HNullCheck* null_check); 328 void MaybeRecordImplicitNullCheck(HInstruction* instruction); 329 LocationSummary* CreateThrowingSlowPathLocations( 330 HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty()); 331 void GenerateNullCheck(HNullCheck* null_check); 332 virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0; 333 virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0; 334 335 // Records a stack map which the runtime might use to set catch phi values 336 // during exception delivery. 337 // TODO: Replace with a catch-entering instruction that records the environment. 338 void RecordCatchBlockInfo(); 339 340 // TODO: Avoid creating the `std::unique_ptr` here. AddSlowPath(SlowPathCode * slow_path)341 void AddSlowPath(SlowPathCode* slow_path) { 342 slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path)); 343 } 344 345 void BuildStackMaps(MemoryRegion stack_map_region, 346 MemoryRegion method_info_region, 347 const DexFile::CodeItem& code_item); 348 void ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size); GetNumberOfJitRoots()349 size_t GetNumberOfJitRoots() const { 350 return jit_string_roots_.size() + jit_class_roots_.size(); 351 } 352 353 // Fills the `literals` array with literals collected during code generation. 354 // Also emits literal patches. 355 void EmitJitRoots(uint8_t* code, 356 Handle<mirror::ObjectArray<mirror::Object>> roots, 357 const uint8_t* roots_data) 358 REQUIRES_SHARED(Locks::mutator_lock_); 359 IsLeafMethod()360 bool IsLeafMethod() const { 361 return is_leaf_; 362 } 363 MarkNotLeaf()364 void MarkNotLeaf() { 365 is_leaf_ = false; 366 requires_current_method_ = true; 367 } 368 SetRequiresCurrentMethod()369 void SetRequiresCurrentMethod() { 370 requires_current_method_ = true; 371 } 372 RequiresCurrentMethod()373 bool RequiresCurrentMethod() const { 374 return requires_current_method_; 375 } 376 377 // Clears the spill slots taken by loop phis in the `LocationSummary` of the 378 // suspend check. This is called when the code generator generates code 379 // for the suspend check at the back edge (instead of where the suspend check 380 // is, which is the loop entry). At this point, the spill slots for the phis 381 // have not been written to. 382 void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; 383 GetBlockedCoreRegisters()384 bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } GetBlockedFloatingPointRegisters()385 bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } 386 IsBlockedCoreRegister(size_t i)387 bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; } IsBlockedFloatingPointRegister(size_t i)388 bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; } 389 390 // Helper that returns the pointer offset of an index in an object array. 391 // Note: this method assumes we always have the same pointer size, regardless 392 // of the architecture. 393 static size_t GetCacheOffset(uint32_t index); 394 // Pointer variant for ArtMethod and ArtField arrays. 395 size_t GetCachePointerOffset(uint32_t index); 396 397 // Helper that returns the offset of the array's length field. 398 // Note: Besides the normal arrays, we also use the HArrayLength for 399 // accessing the String's `count` field in String intrinsics. 400 static uint32_t GetArrayLengthOffset(HArrayLength* array_length); 401 402 // Helper that returns the offset of the array's data. 403 // Note: Besides the normal arrays, we also use the HArrayGet for 404 // accessing the String's `value` field in String intrinsics. 405 static uint32_t GetArrayDataOffset(HArrayGet* array_get); 406 407 void EmitParallelMoves(Location from1, 408 Location to1, 409 Primitive::Type type1, 410 Location from2, 411 Location to2, 412 Primitive::Type type2); 413 StoreNeedsWriteBarrier(Primitive::Type type,HInstruction * value)414 static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) { 415 // Check that null value is not represented as an integer constant. 416 DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant()); 417 return type == Primitive::kPrimNot && !value->IsNullConstant(); 418 } 419 420 421 // Performs checks pertaining to an InvokeRuntime call. 422 void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint, 423 HInstruction* instruction, 424 SlowPathCode* slow_path); 425 426 // Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call. 427 static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction, 428 SlowPathCode* slow_path); 429 AddAllocatedRegister(Location location)430 void AddAllocatedRegister(Location location) { 431 allocated_registers_.Add(location); 432 } 433 HasAllocatedRegister(bool is_core,int reg)434 bool HasAllocatedRegister(bool is_core, int reg) const { 435 return is_core 436 ? allocated_registers_.ContainsCoreRegister(reg) 437 : allocated_registers_.ContainsFloatingPointRegister(reg); 438 } 439 440 void AllocateLocations(HInstruction* instruction); 441 442 // Tells whether the stack frame of the compiled method is 443 // considered "empty", that is either actually having a size of zero, 444 // or just containing the saved return address register. HasEmptyFrame()445 bool HasEmptyFrame() const { 446 return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); 447 } 448 GetInt32ValueOf(HConstant * constant)449 static int32_t GetInt32ValueOf(HConstant* constant) { 450 if (constant->IsIntConstant()) { 451 return constant->AsIntConstant()->GetValue(); 452 } else if (constant->IsNullConstant()) { 453 return 0; 454 } else { 455 DCHECK(constant->IsFloatConstant()); 456 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 457 } 458 } 459 GetInt64ValueOf(HConstant * constant)460 static int64_t GetInt64ValueOf(HConstant* constant) { 461 if (constant->IsIntConstant()) { 462 return constant->AsIntConstant()->GetValue(); 463 } else if (constant->IsNullConstant()) { 464 return 0; 465 } else if (constant->IsFloatConstant()) { 466 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 467 } else if (constant->IsLongConstant()) { 468 return constant->AsLongConstant()->GetValue(); 469 } else { 470 DCHECK(constant->IsDoubleConstant()); 471 return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); 472 } 473 } 474 GetFirstRegisterSlotInSlowPath()475 size_t GetFirstRegisterSlotInSlowPath() const { 476 return first_register_slot_in_slow_path_; 477 } 478 FrameEntrySpillSize()479 uint32_t FrameEntrySpillSize() const { 480 return GetFpuSpillSize() + GetCoreSpillSize(); 481 } 482 483 virtual ParallelMoveResolver* GetMoveResolver() = 0; 484 485 static void CreateCommonInvokeLocationSummary( 486 HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor); 487 488 void GenerateInvokeStaticOrDirectRuntimeCall( 489 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path); 490 void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); 491 492 void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke); 493 494 void CreateUnresolvedFieldLocationSummary( 495 HInstruction* field_access, 496 Primitive::Type field_type, 497 const FieldAccessCallingConvention& calling_convention); 498 499 void GenerateUnresolvedFieldAccess( 500 HInstruction* field_access, 501 Primitive::Type field_type, 502 uint32_t field_index, 503 uint32_t dex_pc, 504 const FieldAccessCallingConvention& calling_convention); 505 506 static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, 507 Location runtime_type_index_location, 508 Location runtime_return_location); 509 void GenerateLoadClassRuntimeCall(HLoadClass* cls); 510 511 static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke); 512 SetDisassemblyInformation(DisassemblyInformation * info)513 void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } GetDisassemblyInformation()514 DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; } 515 516 virtual void InvokeRuntime(QuickEntrypointEnum entrypoint, 517 HInstruction* instruction, 518 uint32_t dex_pc, 519 SlowPathCode* slow_path = nullptr) = 0; 520 521 // Check if the desired_string_load_kind is supported. If it is, return it, 522 // otherwise return a fall-back kind that should be used instead. 523 virtual HLoadString::LoadKind GetSupportedLoadStringKind( 524 HLoadString::LoadKind desired_string_load_kind) = 0; 525 526 // Check if the desired_class_load_kind is supported. If it is, return it, 527 // otherwise return a fall-back kind that should be used instead. 528 virtual HLoadClass::LoadKind GetSupportedLoadClassKind( 529 HLoadClass::LoadKind desired_class_load_kind) = 0; 530 GetLoadStringCallKind(HLoadString * load)531 static LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) { 532 switch (load->GetLoadKind()) { 533 case HLoadString::LoadKind::kBssEntry: 534 DCHECK(load->NeedsEnvironment()); 535 return LocationSummary::kCallOnSlowPath; 536 case HLoadString::LoadKind::kRuntimeCall: 537 DCHECK(load->NeedsEnvironment()); 538 return LocationSummary::kCallOnMainOnly; 539 case HLoadString::LoadKind::kJitTableAddress: 540 DCHECK(!load->NeedsEnvironment()); 541 return kEmitCompilerReadBarrier 542 ? LocationSummary::kCallOnSlowPath 543 : LocationSummary::kNoCall; 544 break; 545 default: 546 DCHECK(!load->NeedsEnvironment()); 547 return LocationSummary::kNoCall; 548 } 549 } 550 551 // Check if the desired_dispatch_info is supported. If it is, return it, 552 // otherwise return a fall-back info that should be used instead. 553 virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 554 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 555 HInvokeStaticOrDirect* invoke) = 0; 556 557 // Generate a call to a static or direct method. 558 virtual void GenerateStaticOrDirectCall( 559 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; 560 // Generate a call to a virtual method. 561 virtual void GenerateVirtualCall( 562 HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; 563 564 // Copy the result of a call into the given target. 565 virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0; 566 567 virtual void GenerateNop() = 0; 568 569 static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass); 570 571 protected: 572 // Patch info used for recording locations of required linker patches and their targets, 573 // i.e. target method, string, type or code identified by their dex file and index. 574 template <typename LabelType> 575 struct PatchInfo { PatchInfoPatchInfo576 PatchInfo(const DexFile& target_dex_file, uint32_t target_index) 577 : dex_file(target_dex_file), index(target_index) { } 578 579 const DexFile& dex_file; 580 uint32_t index; 581 LabelType label; 582 }; 583 CodeGenerator(HGraph * graph,size_t number_of_core_registers,size_t number_of_fpu_registers,size_t number_of_register_pairs,uint32_t core_callee_save_mask,uint32_t fpu_callee_save_mask,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)584 CodeGenerator(HGraph* graph, 585 size_t number_of_core_registers, 586 size_t number_of_fpu_registers, 587 size_t number_of_register_pairs, 588 uint32_t core_callee_save_mask, 589 uint32_t fpu_callee_save_mask, 590 const CompilerOptions& compiler_options, 591 OptimizingCompilerStats* stats) 592 : frame_size_(0), 593 core_spill_mask_(0), 594 fpu_spill_mask_(0), 595 first_register_slot_in_slow_path_(0), 596 allocated_registers_(RegisterSet::Empty()), 597 blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers, 598 kArenaAllocCodeGenerator)), 599 blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers, 600 kArenaAllocCodeGenerator)), 601 number_of_core_registers_(number_of_core_registers), 602 number_of_fpu_registers_(number_of_fpu_registers), 603 number_of_register_pairs_(number_of_register_pairs), 604 core_callee_save_mask_(core_callee_save_mask), 605 fpu_callee_save_mask_(fpu_callee_save_mask), 606 stack_map_stream_(graph->GetArena(), graph->GetInstructionSet()), 607 block_order_(nullptr), 608 jit_string_roots_(StringReferenceValueComparator(), 609 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 610 jit_class_roots_(TypeReferenceValueComparator(), 611 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 612 disasm_info_(nullptr), 613 stats_(stats), 614 graph_(graph), 615 compiler_options_(compiler_options), 616 slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 617 current_slow_path_(nullptr), 618 current_block_index_(0), 619 is_leaf_(true), 620 requires_current_method_(false) { 621 slow_paths_.reserve(8); 622 } 623 624 virtual HGraphVisitor* GetLocationBuilder() = 0; 625 virtual HGraphVisitor* GetInstructionVisitor() = 0; 626 627 // Returns the location of the first spilled entry for floating point registers, 628 // relative to the stack pointer. GetFpuSpillStart()629 uint32_t GetFpuSpillStart() const { 630 return GetFrameSize() - FrameEntrySpillSize(); 631 } 632 GetFpuSpillSize()633 uint32_t GetFpuSpillSize() const { 634 return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize(); 635 } 636 GetCoreSpillSize()637 uint32_t GetCoreSpillSize() const { 638 return POPCOUNT(core_spill_mask_) * GetWordSize(); 639 } 640 HasAllocatedCalleeSaveRegisters()641 virtual bool HasAllocatedCalleeSaveRegisters() const { 642 // We check the core registers against 1 because it always comprises the return PC. 643 return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) 644 || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); 645 } 646 CallPushesPC()647 bool CallPushesPC() const { 648 InstructionSet instruction_set = GetInstructionSet(); 649 return instruction_set == kX86 || instruction_set == kX86_64; 650 } 651 652 // Arm64 has its own type for a label, so we need to templatize these methods 653 // to share the logic. 654 655 template <typename LabelType> CommonInitializeLabels()656 LabelType* CommonInitializeLabels() { 657 // We use raw array allocations instead of ArenaVector<> because Labels are 658 // non-constructible and non-movable and as such cannot be held in a vector. 659 size_t size = GetGraph()->GetBlocks().size(); 660 LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size, 661 kArenaAllocCodeGenerator); 662 for (size_t i = 0; i != size; ++i) { 663 new(labels + i) LabelType(); 664 } 665 return labels; 666 } 667 668 template <typename LabelType> CommonGetLabelOf(LabelType * raw_pointer_to_labels_array,HBasicBlock * block)669 LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const { 670 block = FirstNonEmptyBlock(block); 671 return raw_pointer_to_labels_array + block->GetBlockId(); 672 } 673 GetCurrentSlowPath()674 SlowPathCode* GetCurrentSlowPath() { 675 return current_slow_path_; 676 } 677 678 // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code. EmitJitRootPatches(uint8_t * code ATTRIBUTE_UNUSED,const uint8_t * roots_data ATTRIBUTE_UNUSED)679 virtual void EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED, 680 const uint8_t* roots_data ATTRIBUTE_UNUSED) { 681 DCHECK_EQ(jit_string_roots_.size(), 0u); 682 DCHECK_EQ(jit_class_roots_.size(), 0u); 683 } 684 685 // Frame size required for this method. 686 uint32_t frame_size_; 687 uint32_t core_spill_mask_; 688 uint32_t fpu_spill_mask_; 689 uint32_t first_register_slot_in_slow_path_; 690 691 // Registers that were allocated during linear scan. 692 RegisterSet allocated_registers_; 693 694 // Arrays used when doing register allocation to know which 695 // registers we can allocate. `SetupBlockedRegisters` updates the 696 // arrays. 697 bool* const blocked_core_registers_; 698 bool* const blocked_fpu_registers_; 699 size_t number_of_core_registers_; 700 size_t number_of_fpu_registers_; 701 size_t number_of_register_pairs_; 702 const uint32_t core_callee_save_mask_; 703 const uint32_t fpu_callee_save_mask_; 704 705 StackMapStream stack_map_stream_; 706 707 // The order to use for code generation. 708 const ArenaVector<HBasicBlock*>* block_order_; 709 710 // Maps a StringReference (dex_file, string_index) to the index in the literal table. 711 // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots` 712 // will compute all the indices. 713 ArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_; 714 715 // Maps a ClassReference (dex_file, type_index) to the index in the literal table. 716 // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots` 717 // will compute all the indices. 718 ArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_; 719 720 DisassemblyInformation* disasm_info_; 721 722 private: 723 size_t GetStackOffsetOfSavedRegister(size_t index); 724 void GenerateSlowPaths(); 725 void BlockIfInRegister(Location location, bool is_out = false) const; 726 void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path); 727 728 OptimizingCompilerStats* stats_; 729 730 HGraph* const graph_; 731 const CompilerOptions& compiler_options_; 732 733 ArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_; 734 735 // The current slow-path that we're generating code for. 736 SlowPathCode* current_slow_path_; 737 738 // The current block index in `block_order_` of the block 739 // we are generating code for. 740 size_t current_block_index_; 741 742 // Whether the method is a leaf method. 743 bool is_leaf_; 744 745 // Whether an instruction in the graph accesses the current method. 746 // TODO: Rename: this actually indicates that some instruction in the method 747 // needs the environment including a valid stack frame. 748 bool requires_current_method_; 749 750 friend class OptimizingCFITest; 751 752 DISALLOW_COPY_AND_ASSIGN(CodeGenerator); 753 }; 754 755 template <typename C, typename F> 756 class CallingConvention { 757 public: CallingConvention(const C * registers,size_t number_of_registers,const F * fpu_registers,size_t number_of_fpu_registers,PointerSize pointer_size)758 CallingConvention(const C* registers, 759 size_t number_of_registers, 760 const F* fpu_registers, 761 size_t number_of_fpu_registers, 762 PointerSize pointer_size) 763 : registers_(registers), 764 number_of_registers_(number_of_registers), 765 fpu_registers_(fpu_registers), 766 number_of_fpu_registers_(number_of_fpu_registers), 767 pointer_size_(pointer_size) {} 768 GetNumberOfRegisters()769 size_t GetNumberOfRegisters() const { return number_of_registers_; } GetNumberOfFpuRegisters()770 size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } 771 GetRegisterAt(size_t index)772 C GetRegisterAt(size_t index) const { 773 DCHECK_LT(index, number_of_registers_); 774 return registers_[index]; 775 } 776 GetFpuRegisterAt(size_t index)777 F GetFpuRegisterAt(size_t index) const { 778 DCHECK_LT(index, number_of_fpu_registers_); 779 return fpu_registers_[index]; 780 } 781 GetStackOffsetOf(size_t index)782 size_t GetStackOffsetOf(size_t index) const { 783 // We still reserve the space for parameters passed by registers. 784 // Add space for the method pointer. 785 return static_cast<size_t>(pointer_size_) + index * kVRegSize; 786 } 787 788 private: 789 const C* registers_; 790 const size_t number_of_registers_; 791 const F* fpu_registers_; 792 const size_t number_of_fpu_registers_; 793 const PointerSize pointer_size_; 794 795 DISALLOW_COPY_AND_ASSIGN(CallingConvention); 796 }; 797 798 /** 799 * A templated class SlowPathGenerator with a templated method NewSlowPath() 800 * that can be used by any code generator to share equivalent slow-paths with 801 * the objective of reducing generated code size. 802 * 803 * InstructionType: instruction that requires SlowPathCodeType 804 * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *) 805 */ 806 template <typename InstructionType> 807 class SlowPathGenerator { 808 static_assert(std::is_base_of<HInstruction, InstructionType>::value, 809 "InstructionType is not a subclass of art::HInstruction"); 810 811 public: SlowPathGenerator(HGraph * graph,CodeGenerator * codegen)812 SlowPathGenerator(HGraph* graph, CodeGenerator* codegen) 813 : graph_(graph), 814 codegen_(codegen), 815 slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {} 816 817 // Creates and adds a new slow-path, if needed, or returns existing one otherwise. 818 // Templating the method (rather than the whole class) on the slow-path type enables 819 // keeping this code at a generic, non architecture-specific place. 820 // 821 // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType. 822 // To relax this requirement, we would need some RTTI on the stored slow-paths, 823 // or template the class as a whole on SlowPathType. 824 template <typename SlowPathCodeType> NewSlowPath(InstructionType * instruction)825 SlowPathCodeType* NewSlowPath(InstructionType* instruction) { 826 static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value, 827 "SlowPathCodeType is not a subclass of art::SlowPathCode"); 828 static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value, 829 "SlowPathCodeType is not constructible from InstructionType*"); 830 // Iterate over potential candidates for sharing. Currently, only same-typed 831 // slow-paths with exactly the same dex-pc are viable candidates. 832 // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing? 833 const uint32_t dex_pc = instruction->GetDexPc(); 834 auto iter = slow_path_map_.find(dex_pc); 835 if (iter != slow_path_map_.end()) { 836 const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second; 837 for (const auto& it : candidates) { 838 InstructionType* other_instruction = it.first; 839 SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second); 840 // Determine if the instructions allow for slow-path sharing. 841 if (HaveSameLiveRegisters(instruction, other_instruction) && 842 HaveSameStackMap(instruction, other_instruction)) { 843 // Can share: reuse existing one. 844 return other_slow_path; 845 } 846 } 847 } else { 848 // First time this dex-pc is seen. 849 iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}}); 850 } 851 // Cannot share: create and add new slow-path for this particular dex-pc. 852 SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction); 853 iter->second.emplace_back(std::make_pair(instruction, slow_path)); 854 codegen_->AddSlowPath(slow_path); 855 return slow_path; 856 } 857 858 private: 859 // Tests if both instructions have same set of live physical registers. This ensures 860 // the slow-path has exactly the same preamble on saving these registers to stack. HaveSameLiveRegisters(const InstructionType * i1,const InstructionType * i2)861 bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const { 862 const uint32_t core_spill = ~codegen_->GetCoreSpillMask(); 863 const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask(); 864 RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters(); 865 RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters(); 866 return (((live1->GetCoreRegisters() & core_spill) == 867 (live2->GetCoreRegisters() & core_spill)) && 868 ((live1->GetFloatingPointRegisters() & fpu_spill) == 869 (live2->GetFloatingPointRegisters() & fpu_spill))); 870 } 871 872 // Tests if both instructions have the same stack map. This ensures the interpreter 873 // will find exactly the same dex-registers at the same entries. HaveSameStackMap(const InstructionType * i1,const InstructionType * i2)874 bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const { 875 DCHECK(i1->HasEnvironment()); 876 DCHECK(i2->HasEnvironment()); 877 // We conservatively test if the two instructions find exactly the same instructions 878 // and location in each dex-register. This guarantees they will have the same stack map. 879 HEnvironment* e1 = i1->GetEnvironment(); 880 HEnvironment* e2 = i2->GetEnvironment(); 881 if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) { 882 return false; 883 } 884 for (size_t i = 0, sz = e1->Size(); i < sz; ++i) { 885 if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) || 886 !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) { 887 return false; 888 } 889 } 890 return true; 891 } 892 893 HGraph* const graph_; 894 CodeGenerator* const codegen_; 895 896 // Map from dex-pc to vector of already existing instruction/slow-path pairs. 897 ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_; 898 899 DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator); 900 }; 901 902 class InstructionCodeGenerator : public HGraphVisitor { 903 public: InstructionCodeGenerator(HGraph * graph,CodeGenerator * codegen)904 InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen) 905 : HGraphVisitor(graph), 906 deopt_slow_paths_(graph, codegen) {} 907 908 protected: 909 // Add slow-path generator for each instruction/slow-path combination that desires sharing. 910 // TODO: under current regime, only deopt sharing make sense; extend later. 911 SlowPathGenerator<HDeoptimize> deopt_slow_paths_; 912 }; 913 914 } // namespace art 915 916 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 917