1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 19 20 #include "arch/instruction_set.h" 21 #include "arch/instruction_set_features.h" 22 #include "base/arena_containers.h" 23 #include "base/arena_object.h" 24 #include "base/array_ref.h" 25 #include "base/bit_field.h" 26 #include "base/bit_utils.h" 27 #include "base/enums.h" 28 #include "base/globals.h" 29 #include "base/macros.h" 30 #include "base/memory_region.h" 31 #include "class_root.h" 32 #include "dex/string_reference.h" 33 #include "dex/type_reference.h" 34 #include "graph_visualizer.h" 35 #include "locations.h" 36 #include "nodes.h" 37 #include "oat_quick_method_header.h" 38 #include "optimizing_compiler_stats.h" 39 #include "read_barrier_option.h" 40 #include "stack.h" 41 #include "subtype_check.h" 42 #include "utils/assembler.h" 43 #include "utils/label.h" 44 45 namespace art HIDDEN { 46 47 // Binary encoding of 2^32 for type double. 48 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); 49 // Binary encoding of 2^31 for type double. 50 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); 51 52 // Minimum value for a primitive integer. 53 static int32_t constexpr kPrimIntMin = 0x80000000; 54 // Minimum value for a primitive long. 55 static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000); 56 57 // Maximum value for a primitive integer. 58 static int32_t constexpr kPrimIntMax = 0x7fffffff; 59 // Maximum value for a primitive long. 60 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); 61 62 static const ReadBarrierOption gCompilerReadBarrierOption = 63 gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier; 64 65 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); 66 constexpr size_t status_byte_offset = 67 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); 68 constexpr uint32_t shifted_visibly_initialized_value = 69 enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); 70 constexpr uint32_t shifted_initializing_value = 71 enum_cast<uint32_t>(ClassStatus::kInitializing) << (status_lsb_position % kBitsPerByte); 72 constexpr uint32_t shifted_initialized_value = 73 enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); 74 75 class Assembler; 76 class CodeGenerator; 77 class CompilerOptions; 78 class StackMapStream; 79 class ParallelMoveResolver; 80 81 namespace linker { 82 class LinkerPatch; 83 } // namespace linker 84 85 class CodeAllocator { 86 public: CodeAllocator()87 CodeAllocator() {} ~CodeAllocator()88 virtual ~CodeAllocator() {} 89 90 virtual uint8_t* Allocate(size_t size) = 0; 91 virtual ArrayRef<const uint8_t> GetMemory() const = 0; 92 93 private: 94 DISALLOW_COPY_AND_ASSIGN(CodeAllocator); 95 }; 96 97 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> { 98 public: SlowPathCode(HInstruction * instruction)99 explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) { 100 for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) { 101 saved_core_stack_offsets_[i] = kRegisterNotSaved; 102 saved_fpu_stack_offsets_[i] = kRegisterNotSaved; 103 } 104 } 105 ~SlowPathCode()106 virtual ~SlowPathCode() {} 107 108 virtual void EmitNativeCode(CodeGenerator* codegen) = 0; 109 110 // Save live core and floating-point caller-save registers and 111 // update the stack mask in `locations` for registers holding object 112 // references. 113 virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 114 // Restore live core and floating-point caller-save registers. 115 virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 116 IsCoreRegisterSaved(int reg)117 bool IsCoreRegisterSaved(int reg) const { 118 return saved_core_stack_offsets_[reg] != kRegisterNotSaved; 119 } 120 IsFpuRegisterSaved(int reg)121 bool IsFpuRegisterSaved(int reg) const { 122 return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved; 123 } 124 GetStackOffsetOfCoreRegister(int reg)125 uint32_t GetStackOffsetOfCoreRegister(int reg) const { 126 return saved_core_stack_offsets_[reg]; 127 } 128 GetStackOffsetOfFpuRegister(int reg)129 uint32_t GetStackOffsetOfFpuRegister(int reg) const { 130 return saved_fpu_stack_offsets_[reg]; 131 } 132 IsFatal()133 virtual bool IsFatal() const { return false; } 134 135 virtual const char* GetDescription() const = 0; 136 GetEntryLabel()137 Label* GetEntryLabel() { return &entry_label_; } GetExitLabel()138 Label* GetExitLabel() { return &exit_label_; } 139 GetInstruction()140 HInstruction* GetInstruction() const { 141 return instruction_; 142 } 143 GetDexPc()144 uint32_t GetDexPc() const { 145 return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc; 146 } 147 148 protected: 149 static constexpr size_t kMaximumNumberOfExpectedRegisters = 32; 150 static constexpr uint32_t kRegisterNotSaved = -1; 151 // The instruction where this slow path is happening. 152 HInstruction* instruction_; 153 uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 154 uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 155 156 private: 157 Label entry_label_; 158 Label exit_label_; 159 160 DISALLOW_COPY_AND_ASSIGN(SlowPathCode); 161 }; 162 163 class InvokeDexCallingConventionVisitor { 164 public: 165 virtual Location GetNextLocation(DataType::Type type) = 0; 166 virtual Location GetReturnLocation(DataType::Type type) const = 0; 167 virtual Location GetMethodLocation() const = 0; 168 169 protected: InvokeDexCallingConventionVisitor()170 InvokeDexCallingConventionVisitor() {} ~InvokeDexCallingConventionVisitor()171 virtual ~InvokeDexCallingConventionVisitor() {} 172 173 // The current index for core registers. 174 uint32_t gp_index_ = 0u; 175 // The current index for floating-point registers. 176 uint32_t float_index_ = 0u; 177 // The current stack index. 178 uint32_t stack_index_ = 0u; 179 180 private: 181 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); 182 }; 183 184 class FieldAccessCallingConvention { 185 public: 186 virtual Location GetObjectLocation() const = 0; 187 virtual Location GetFieldIndexLocation() const = 0; 188 virtual Location GetReturnLocation(DataType::Type type) const = 0; 189 virtual Location GetSetValueLocation(DataType::Type type, bool is_instance) const = 0; 190 virtual Location GetFpuLocation(DataType::Type type) const = 0; ~FieldAccessCallingConvention()191 virtual ~FieldAccessCallingConvention() {} 192 193 protected: FieldAccessCallingConvention()194 FieldAccessCallingConvention() {} 195 196 private: 197 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention); 198 }; 199 200 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { 201 public: 202 // Compiles the graph to executable instructions. 203 void Compile(CodeAllocator* allocator); 204 static std::unique_ptr<CodeGenerator> Create(HGraph* graph, 205 const CompilerOptions& compiler_options, 206 OptimizingCompilerStats* stats = nullptr); 207 virtual ~CodeGenerator(); 208 209 // Get the graph. This is the outermost graph, never the graph of a method being inlined. GetGraph()210 HGraph* GetGraph() const { return graph_; } 211 212 HBasicBlock* GetNextBlockToEmit() const; 213 HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; 214 bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; 215 GetStackSlotOfParameter(HParameterValue * parameter)216 size_t GetStackSlotOfParameter(HParameterValue* parameter) const { 217 // Note that this follows the current calling convention. 218 return GetFrameSize() 219 + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet())) // Art method 220 + parameter->GetIndex() * kVRegSize; 221 } 222 223 virtual void Initialize() = 0; 224 virtual void Finalize(CodeAllocator* allocator); 225 virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches); 226 virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const; 227 virtual void EmitThunkCode(const linker::LinkerPatch& patch, 228 /*out*/ ArenaVector<uint8_t>* code, 229 /*out*/ std::string* debug_name); 230 virtual void GenerateFrameEntry() = 0; 231 virtual void GenerateFrameExit() = 0; 232 virtual void Bind(HBasicBlock* block) = 0; 233 virtual void MoveConstant(Location destination, int32_t value) = 0; 234 virtual void MoveLocation(Location dst, Location src, DataType::Type dst_type) = 0; 235 virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0; 236 237 virtual Assembler* GetAssembler() = 0; 238 virtual const Assembler& GetAssembler() const = 0; 239 virtual size_t GetWordSize() const = 0; 240 241 // Returns whether the target supports predicated SIMD instructions. SupportsPredicatedSIMD()242 virtual bool SupportsPredicatedSIMD() const { return false; } 243 244 // Get FP register width in bytes for spilling/restoring in the slow paths. 245 // 246 // Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers 247 // alias and live SIMD registers are forced to be spilled in full size in the slow paths. GetSlowPathFPWidth()248 virtual size_t GetSlowPathFPWidth() const { 249 // Default implementation. 250 return GetCalleePreservedFPWidth(); 251 } 252 253 // Get FP register width required to be preserved by the target ABI. 254 virtual size_t GetCalleePreservedFPWidth() const = 0; 255 256 // Get the size of the target SIMD register in bytes. 257 virtual size_t GetSIMDRegisterWidth() const = 0; 258 virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0; 259 void InitializeCodeGeneration(size_t number_of_spill_slots, 260 size_t maximum_safepoint_spill_size, 261 size_t number_of_out_slots, 262 const ArenaVector<HBasicBlock*>& block_order); 263 // Backends can override this as necessary. For most, no special alignment is required. GetPreferredSlotsAlignment()264 virtual uint32_t GetPreferredSlotsAlignment() const { return 1; } 265 GetFrameSize()266 uint32_t GetFrameSize() const { return frame_size_; } SetFrameSize(uint32_t size)267 void SetFrameSize(uint32_t size) { frame_size_ = size; } GetCoreSpillMask()268 uint32_t GetCoreSpillMask() const { return core_spill_mask_; } GetFpuSpillMask()269 uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } 270 GetNumberOfCoreRegisters()271 size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } GetNumberOfFloatingPointRegisters()272 size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } 273 virtual void SetupBlockedRegisters() const = 0; 274 ComputeSpillMask()275 virtual void ComputeSpillMask() { 276 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; 277 DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; 278 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; 279 } 280 ComputeRegisterMask(const int * registers,size_t length)281 static uint32_t ComputeRegisterMask(const int* registers, size_t length) { 282 uint32_t mask = 0; 283 for (size_t i = 0, e = length; i < e; ++i) { 284 mask |= (1 << registers[i]); 285 } 286 return mask; 287 } 288 289 virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; 290 virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; 291 virtual InstructionSet GetInstructionSet() const = 0; 292 GetCompilerOptions()293 const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } 294 295 // Saves the register in the stack. Returns the size taken on stack. 296 virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 297 // Restores the register from the stack. Returns the size taken on stack. 298 virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 299 300 virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 301 virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 302 303 virtual bool NeedsTwoRegisters(DataType::Type type) const = 0; 304 // Returns whether we should split long moves in parallel moves. ShouldSplitLongMoves()305 virtual bool ShouldSplitLongMoves() const { return false; } 306 307 // Returns true if `invoke` is an implemented intrinsic in this codegen's arch. IsImplementedIntrinsic(HInvoke * invoke)308 bool IsImplementedIntrinsic(HInvoke* invoke) const { 309 return invoke->IsIntrinsic() && 310 !unimplemented_intrinsics_[static_cast<size_t>(invoke->GetIntrinsic())]; 311 } 312 GetNumberOfCoreCalleeSaveRegisters()313 size_t GetNumberOfCoreCalleeSaveRegisters() const { 314 return POPCOUNT(core_callee_save_mask_); 315 } 316 GetNumberOfCoreCallerSaveRegisters()317 size_t GetNumberOfCoreCallerSaveRegisters() const { 318 DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters()); 319 return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters(); 320 } 321 IsCoreCalleeSaveRegister(int reg)322 bool IsCoreCalleeSaveRegister(int reg) const { 323 return (core_callee_save_mask_ & (1 << reg)) != 0; 324 } 325 IsFloatingPointCalleeSaveRegister(int reg)326 bool IsFloatingPointCalleeSaveRegister(int reg) const { 327 return (fpu_callee_save_mask_ & (1 << reg)) != 0; 328 } 329 GetSlowPathSpills(LocationSummary * locations,bool core_registers)330 uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const { 331 DCHECK(locations->OnlyCallsOnSlowPath() || 332 (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() && 333 !locations->HasCustomSlowPathCallingConvention())); 334 uint32_t live_registers = core_registers 335 ? locations->GetLiveRegisters()->GetCoreRegisters() 336 : locations->GetLiveRegisters()->GetFloatingPointRegisters(); 337 if (locations->HasCustomSlowPathCallingConvention()) { 338 // Save only the live registers that the custom calling convention wants us to save. 339 uint32_t caller_saves = core_registers 340 ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters() 341 : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters(); 342 return live_registers & caller_saves; 343 } else { 344 // Default ABI, we need to spill non-callee-save live registers. 345 uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_; 346 return live_registers & ~callee_saves; 347 } 348 } 349 GetNumberOfSlowPathSpills(LocationSummary * locations,bool core_registers)350 size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const { 351 return POPCOUNT(GetSlowPathSpills(locations, core_registers)); 352 } 353 GetStackOffsetOfShouldDeoptimizeFlag()354 size_t GetStackOffsetOfShouldDeoptimizeFlag() const { 355 DCHECK(GetGraph()->HasShouldDeoptimizeFlag()); 356 DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize); 357 return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize; 358 } 359 360 // Record native to dex mapping for a suspend point. Required by runtime. 361 void RecordPcInfo(HInstruction* instruction, 362 uint32_t dex_pc, 363 uint32_t native_pc, 364 SlowPathCode* slow_path = nullptr, 365 bool native_debug_info = false); 366 367 // Record native to dex mapping for a suspend point. 368 // The native_pc is used from Assembler::CodePosition. 369 // 370 // Note: As Assembler::CodePosition is target dependent, it does not guarantee the exact native_pc 371 // for the instruction. If the exact native_pc is required it must be provided explicitly. 372 void RecordPcInfo(HInstruction* instruction, 373 uint32_t dex_pc, 374 SlowPathCode* slow_path = nullptr, 375 bool native_debug_info = false); 376 377 // Check whether we have already recorded mapping at this PC. 378 bool HasStackMapAtCurrentPc(); 379 380 // Record extra stack maps if we support native debugging. 381 // 382 // ARM specific behaviour: The recorded native PC might be a branch over pools to instructions 383 // corresponding the dex PC. 384 void MaybeRecordNativeDebugInfo(HInstruction* instruction, 385 uint32_t dex_pc, 386 SlowPathCode* slow_path = nullptr); 387 388 bool CanMoveNullCheckToUser(HNullCheck* null_check); 389 virtual void MaybeRecordImplicitNullCheck(HInstruction* instruction); 390 LocationSummary* CreateThrowingSlowPathLocations( 391 HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty()); 392 void GenerateNullCheck(HNullCheck* null_check); 393 virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0; 394 virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0; 395 396 // Records a stack map which the runtime might use to set catch phi values 397 // during exception delivery. 398 // TODO: Replace with a catch-entering instruction that records the environment. 399 void RecordCatchBlockInfo(); 400 401 // Get the ScopedArenaAllocator used for codegen memory allocation. 402 ScopedArenaAllocator* GetScopedAllocator(); 403 404 void AddSlowPath(SlowPathCode* slow_path); 405 406 ScopedArenaVector<uint8_t> BuildStackMaps(const dex::CodeItem* code_item_for_osr_check); 407 size_t GetNumberOfJitRoots() const; 408 409 // Fills the `literals` array with literals collected during code generation. 410 // Also emits literal patches. 411 void EmitJitRoots(uint8_t* code, 412 const uint8_t* roots_data, 413 /*out*/std::vector<Handle<mirror::Object>>* roots) 414 REQUIRES_SHARED(Locks::mutator_lock_); 415 IsLeafMethod()416 bool IsLeafMethod() const { 417 return is_leaf_; 418 } 419 MarkNotLeaf()420 void MarkNotLeaf() { 421 is_leaf_ = false; 422 requires_current_method_ = true; 423 } 424 NeedsSuspendCheckEntry()425 bool NeedsSuspendCheckEntry() const { 426 return needs_suspend_check_entry_; 427 } 428 MarkNeedsSuspendCheckEntry()429 void MarkNeedsSuspendCheckEntry() { 430 needs_suspend_check_entry_ = true; 431 } 432 SetRequiresCurrentMethod()433 void SetRequiresCurrentMethod() { 434 requires_current_method_ = true; 435 } 436 RequiresCurrentMethod()437 bool RequiresCurrentMethod() const { 438 return requires_current_method_; 439 } 440 441 // Clears the spill slots taken by loop phis in the `LocationSummary` of the 442 // suspend check. This is called when the code generator generates code 443 // for the suspend check at the back edge (instead of where the suspend check 444 // is, which is the loop entry). At this point, the spill slots for the phis 445 // have not been written to. 446 void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check, 447 HParallelMove* spills) const; 448 GetBlockedCoreRegisters()449 bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } GetBlockedFloatingPointRegisters()450 bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } 451 IsBlockedCoreRegister(size_t i)452 bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; } IsBlockedFloatingPointRegister(size_t i)453 bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; } 454 455 // Helper that returns the offset of the array's length field. 456 // Note: Besides the normal arrays, we also use the HArrayLength for 457 // accessing the String's `count` field in String intrinsics. 458 static uint32_t GetArrayLengthOffset(HArrayLength* array_length); 459 460 // Helper that returns the offset of the array's data. 461 // Note: Besides the normal arrays, we also use the HArrayGet for 462 // accessing the String's `value` field in String intrinsics. 463 static uint32_t GetArrayDataOffset(HArrayGet* array_get); 464 465 void EmitParallelMoves(Location from1, 466 Location to1, 467 DataType::Type type1, 468 Location from2, 469 Location to2, 470 DataType::Type type2); 471 InstanceOfNeedsReadBarrier(HInstanceOf * instance_of)472 static bool InstanceOfNeedsReadBarrier(HInstanceOf* instance_of) { 473 // Used only for kExactCheck, kAbstractClassCheck, kClassHierarchyCheck and kArrayObjectCheck. 474 DCHECK(instance_of->GetTypeCheckKind() == TypeCheckKind::kExactCheck || 475 instance_of->GetTypeCheckKind() == TypeCheckKind::kAbstractClassCheck || 476 instance_of->GetTypeCheckKind() == TypeCheckKind::kClassHierarchyCheck || 477 instance_of->GetTypeCheckKind() == TypeCheckKind::kArrayObjectCheck) 478 << instance_of->GetTypeCheckKind(); 479 // If the target class is in the boot image, it's non-moveable and it doesn't matter 480 // if we compare it with a from-space or to-space reference, the result is the same. 481 // It's OK to traverse a class hierarchy jumping between from-space and to-space. 482 return gUseReadBarrier && !instance_of->GetTargetClass()->IsInBootImage(); 483 } 484 ReadBarrierOptionForInstanceOf(HInstanceOf * instance_of)485 static ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) { 486 return InstanceOfNeedsReadBarrier(instance_of) ? kWithReadBarrier : kWithoutReadBarrier; 487 } 488 IsTypeCheckSlowPathFatal(HCheckCast * check_cast)489 static bool IsTypeCheckSlowPathFatal(HCheckCast* check_cast) { 490 switch (check_cast->GetTypeCheckKind()) { 491 case TypeCheckKind::kExactCheck: 492 case TypeCheckKind::kAbstractClassCheck: 493 case TypeCheckKind::kClassHierarchyCheck: 494 case TypeCheckKind::kArrayObjectCheck: 495 case TypeCheckKind::kInterfaceCheck: { 496 bool needs_read_barrier = 497 gUseReadBarrier && !check_cast->GetTargetClass()->IsInBootImage(); 498 // We do not emit read barriers for HCheckCast, so we can get false negatives 499 // and the slow path shall re-check and simply return if the cast is actually OK. 500 return !needs_read_barrier; 501 } 502 case TypeCheckKind::kArrayCheck: 503 case TypeCheckKind::kUnresolvedCheck: 504 return false; 505 case TypeCheckKind::kBitstringCheck: 506 return true; 507 } 508 LOG(FATAL) << "Unreachable"; 509 UNREACHABLE(); 510 } 511 GetCheckCastCallKind(HCheckCast * check_cast)512 static LocationSummary::CallKind GetCheckCastCallKind(HCheckCast* check_cast) { 513 return (IsTypeCheckSlowPathFatal(check_cast) && !check_cast->CanThrowIntoCatchBlock()) 514 ? LocationSummary::kNoCall // In fact, call on a fatal (non-returning) slow path. 515 : LocationSummary::kCallOnSlowPath; 516 } 517 StoreNeedsWriteBarrier(DataType::Type type,HInstruction * value)518 static bool StoreNeedsWriteBarrier(DataType::Type type, HInstruction* value) { 519 // Check that null value is not represented as an integer constant. 520 DCHECK_IMPLIES(type == DataType::Type::kReference, !value->IsIntConstant()); 521 return type == DataType::Type::kReference && !value->IsNullConstant(); 522 } 523 524 525 // Performs checks pertaining to an InvokeRuntime call. 526 void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint, 527 HInstruction* instruction, 528 SlowPathCode* slow_path); 529 530 // Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call. 531 static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction, 532 SlowPathCode* slow_path); 533 AddAllocatedRegister(Location location)534 void AddAllocatedRegister(Location location) { 535 allocated_registers_.Add(location); 536 } 537 HasAllocatedRegister(bool is_core,int reg)538 bool HasAllocatedRegister(bool is_core, int reg) const { 539 return is_core 540 ? allocated_registers_.ContainsCoreRegister(reg) 541 : allocated_registers_.ContainsFloatingPointRegister(reg); 542 } 543 544 void AllocateLocations(HInstruction* instruction); 545 546 // Tells whether the stack frame of the compiled method is 547 // considered "empty", that is either actually having a size of zero, 548 // or just containing the saved return address register. HasEmptyFrame()549 bool HasEmptyFrame() const { 550 return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); 551 } 552 GetInt8ValueOf(HConstant * constant)553 static int8_t GetInt8ValueOf(HConstant* constant) { 554 DCHECK(constant->IsIntConstant()); 555 return constant->AsIntConstant()->GetValue(); 556 } 557 GetInt16ValueOf(HConstant * constant)558 static int16_t GetInt16ValueOf(HConstant* constant) { 559 DCHECK(constant->IsIntConstant()); 560 return constant->AsIntConstant()->GetValue(); 561 } 562 GetInt32ValueOf(HConstant * constant)563 static int32_t GetInt32ValueOf(HConstant* constant) { 564 if (constant->IsIntConstant()) { 565 return constant->AsIntConstant()->GetValue(); 566 } else if (constant->IsNullConstant()) { 567 return 0; 568 } else { 569 DCHECK(constant->IsFloatConstant()); 570 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 571 } 572 } 573 GetInt64ValueOf(HConstant * constant)574 static int64_t GetInt64ValueOf(HConstant* constant) { 575 if (constant->IsIntConstant()) { 576 return constant->AsIntConstant()->GetValue(); 577 } else if (constant->IsNullConstant()) { 578 return 0; 579 } else if (constant->IsFloatConstant()) { 580 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 581 } else if (constant->IsLongConstant()) { 582 return constant->AsLongConstant()->GetValue(); 583 } else { 584 DCHECK(constant->IsDoubleConstant()); 585 return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); 586 } 587 } 588 GetFirstRegisterSlotInSlowPath()589 size_t GetFirstRegisterSlotInSlowPath() const { 590 return first_register_slot_in_slow_path_; 591 } 592 FrameEntrySpillSize()593 uint32_t FrameEntrySpillSize() const { 594 return GetFpuSpillSize() + GetCoreSpillSize(); 595 } 596 597 virtual ParallelMoveResolver* GetMoveResolver() = 0; 598 599 static void CreateCommonInvokeLocationSummary( 600 HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor); 601 602 template <typename CriticalNativeCallingConventionVisitor, 603 size_t kNativeStackAlignment, 604 size_t GetCriticalNativeDirectCallFrameSize(const char* shorty, uint32_t shorty_len)> PrepareCriticalNativeCall(HInvokeStaticOrDirect * invoke)605 size_t PrepareCriticalNativeCall(HInvokeStaticOrDirect* invoke) { 606 DCHECK(!invoke->GetLocations()->Intrinsified()); 607 CriticalNativeCallingConventionVisitor calling_convention_visitor( 608 /*for_register_allocation=*/ false); 609 HParallelMove parallel_move(GetGraph()->GetAllocator()); 610 PrepareCriticalNativeArgumentMoves(invoke, &calling_convention_visitor, ¶llel_move); 611 size_t out_frame_size = 612 RoundUp(calling_convention_visitor.GetStackOffset(), kNativeStackAlignment); 613 if (kIsDebugBuild) { 614 uint32_t shorty_len; 615 const char* shorty = GetCriticalNativeShorty(invoke, &shorty_len); 616 DCHECK_EQ(GetCriticalNativeDirectCallFrameSize(shorty, shorty_len), out_frame_size); 617 } 618 if (out_frame_size != 0u) { 619 FinishCriticalNativeFrameSetup(out_frame_size, ¶llel_move); 620 } 621 return out_frame_size; 622 } 623 624 void GenerateInvokeStaticOrDirectRuntimeCall( 625 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path); 626 627 void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); 628 629 void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke, SlowPathCode* slow_path = nullptr); 630 631 void GenerateInvokeCustomCall(HInvokeCustom* invoke); 632 633 void CreateStringBuilderAppendLocations(HStringBuilderAppend* instruction, Location out); 634 635 void CreateUnresolvedFieldLocationSummary( 636 HInstruction* field_access, 637 DataType::Type field_type, 638 const FieldAccessCallingConvention& calling_convention); 639 640 void GenerateUnresolvedFieldAccess( 641 HInstruction* field_access, 642 DataType::Type field_type, 643 uint32_t field_index, 644 uint32_t dex_pc, 645 const FieldAccessCallingConvention& calling_convention); 646 647 static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, 648 Location runtime_type_index_location, 649 Location runtime_return_location); 650 void GenerateLoadClassRuntimeCall(HLoadClass* cls); 651 652 static void CreateLoadMethodHandleRuntimeCallLocationSummary(HLoadMethodHandle* method_handle, 653 Location runtime_handle_index_location, 654 Location runtime_return_location); 655 void GenerateLoadMethodHandleRuntimeCall(HLoadMethodHandle* method_handle); 656 657 static void CreateLoadMethodTypeRuntimeCallLocationSummary(HLoadMethodType* method_type, 658 Location runtime_type_index_location, 659 Location runtime_return_location); 660 void GenerateLoadMethodTypeRuntimeCall(HLoadMethodType* method_type); 661 662 static uint32_t GetBootImageOffset(ObjPtr<mirror::Object> object) 663 REQUIRES_SHARED(Locks::mutator_lock_); 664 static uint32_t GetBootImageOffset(HLoadClass* load_class); 665 static uint32_t GetBootImageOffset(HLoadString* load_string); 666 static uint32_t GetBootImageOffset(HInvoke* invoke); 667 static uint32_t GetBootImageOffset(ClassRoot class_root); 668 static uint32_t GetBootImageOffsetOfIntrinsicDeclaringClass(HInvoke* invoke); 669 670 static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke); 671 SetDisassemblyInformation(DisassemblyInformation * info)672 void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } GetDisassemblyInformation()673 DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; } 674 675 virtual void InvokeRuntime(QuickEntrypointEnum entrypoint, 676 HInstruction* instruction, 677 uint32_t dex_pc, 678 SlowPathCode* slow_path = nullptr) = 0; 679 680 // Check if the desired_string_load_kind is supported. If it is, return it, 681 // otherwise return a fall-back kind that should be used instead. 682 virtual HLoadString::LoadKind GetSupportedLoadStringKind( 683 HLoadString::LoadKind desired_string_load_kind) = 0; 684 685 // Check if the desired_class_load_kind is supported. If it is, return it, 686 // otherwise return a fall-back kind that should be used instead. 687 virtual HLoadClass::LoadKind GetSupportedLoadClassKind( 688 HLoadClass::LoadKind desired_class_load_kind) = 0; 689 GetLoadStringCallKind(HLoadString * load)690 static LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) { 691 switch (load->GetLoadKind()) { 692 case HLoadString::LoadKind::kBssEntry: 693 DCHECK(load->NeedsEnvironment()); 694 return LocationSummary::kCallOnSlowPath; 695 case HLoadString::LoadKind::kRuntimeCall: 696 DCHECK(load->NeedsEnvironment()); 697 return LocationSummary::kCallOnMainOnly; 698 case HLoadString::LoadKind::kJitTableAddress: 699 DCHECK(!load->NeedsEnvironment()); 700 return gUseReadBarrier 701 ? LocationSummary::kCallOnSlowPath 702 : LocationSummary::kNoCall; 703 break; 704 default: 705 DCHECK(!load->NeedsEnvironment()); 706 return LocationSummary::kNoCall; 707 } 708 } 709 710 // Check if the desired_dispatch_info is supported. If it is, return it, 711 // otherwise return a fall-back info that should be used instead. 712 virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 713 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 714 ArtMethod* method) = 0; 715 716 // Generate a call to a static or direct method. 717 virtual void GenerateStaticOrDirectCall( 718 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; 719 // Generate a call to a virtual method. 720 virtual void GenerateVirtualCall( 721 HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; 722 723 // Copy the result of a call into the given target. 724 virtual void MoveFromReturnRegister(Location trg, DataType::Type type) = 0; 725 726 virtual void IncreaseFrame(size_t adjustment) = 0; 727 virtual void DecreaseFrame(size_t adjustment) = 0; 728 729 virtual void GenerateNop() = 0; 730 731 static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array); 732 static ScaleFactor ScaleFactorForType(DataType::Type type); 733 734 protected: 735 // Patch info used for recording locations of required linker patches and their targets, 736 // i.e. target method, string, type or code identified by their dex file and index, 737 // or .data.bimg.rel.ro entries identified by the boot image offset. 738 template <typename LabelType> 739 struct PatchInfo { PatchInfoPatchInfo740 PatchInfo(const DexFile* dex_file, uint32_t off_or_idx) 741 : target_dex_file(dex_file), offset_or_index(off_or_idx), label() { } 742 743 // Target dex file or null for .data.bmig.rel.ro patches. 744 const DexFile* target_dex_file; 745 // Either the boot image offset (to write to .data.bmig.rel.ro) or string/type/method index. 746 uint32_t offset_or_index; 747 // Label for the instruction to patch. 748 LabelType label; 749 }; 750 751 CodeGenerator(HGraph* graph, 752 size_t number_of_core_registers, 753 size_t number_of_fpu_registers, 754 size_t number_of_register_pairs, 755 uint32_t core_callee_save_mask, 756 uint32_t fpu_callee_save_mask, 757 const CompilerOptions& compiler_options, 758 OptimizingCompilerStats* stats, 759 const art::ArrayRef<const bool>& unimplemented_intrinsics); 760 761 virtual HGraphVisitor* GetLocationBuilder() = 0; 762 virtual HGraphVisitor* GetInstructionVisitor() = 0; 763 764 // Returns the location of the first spilled entry for floating point registers, 765 // relative to the stack pointer. GetFpuSpillStart()766 uint32_t GetFpuSpillStart() const { 767 return GetFrameSize() - FrameEntrySpillSize(); 768 } 769 GetFpuSpillSize()770 uint32_t GetFpuSpillSize() const { 771 return POPCOUNT(fpu_spill_mask_) * GetCalleePreservedFPWidth(); 772 } 773 GetCoreSpillSize()774 uint32_t GetCoreSpillSize() const { 775 return POPCOUNT(core_spill_mask_) * GetWordSize(); 776 } 777 HasAllocatedCalleeSaveRegisters()778 virtual bool HasAllocatedCalleeSaveRegisters() const { 779 // We check the core registers against 1 because it always comprises the return PC. 780 return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) 781 || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); 782 } 783 CallPushesPC()784 bool CallPushesPC() const { 785 InstructionSet instruction_set = GetInstructionSet(); 786 return instruction_set == InstructionSet::kX86 || instruction_set == InstructionSet::kX86_64; 787 } 788 789 // Arm64 has its own type for a label, so we need to templatize these methods 790 // to share the logic. 791 792 template <typename LabelType> CommonInitializeLabels()793 LabelType* CommonInitializeLabels() { 794 // We use raw array allocations instead of ArenaVector<> because Labels are 795 // non-constructible and non-movable and as such cannot be held in a vector. 796 size_t size = GetGraph()->GetBlocks().size(); 797 LabelType* labels = 798 GetGraph()->GetAllocator()->AllocArray<LabelType>(size, kArenaAllocCodeGenerator); 799 for (size_t i = 0; i != size; ++i) { 800 new(labels + i) LabelType(); 801 } 802 return labels; 803 } 804 805 template <typename LabelType> CommonGetLabelOf(LabelType * raw_pointer_to_labels_array,HBasicBlock * block)806 LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const { 807 block = FirstNonEmptyBlock(block); 808 return raw_pointer_to_labels_array + block->GetBlockId(); 809 } 810 GetCurrentSlowPath()811 SlowPathCode* GetCurrentSlowPath() { 812 return current_slow_path_; 813 } 814 815 StackMapStream* GetStackMapStream(); 816 817 void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string); 818 uint64_t GetJitStringRootIndex(StringReference string_reference); 819 void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass); 820 uint64_t GetJitClassRootIndex(TypeReference type_reference); 821 822 // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code. 823 virtual void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data); 824 825 // Frame size required for this method. 826 uint32_t frame_size_; 827 uint32_t core_spill_mask_; 828 uint32_t fpu_spill_mask_; 829 uint32_t first_register_slot_in_slow_path_; 830 831 // Registers that were allocated during linear scan. 832 RegisterSet allocated_registers_; 833 834 // Arrays used when doing register allocation to know which 835 // registers we can allocate. `SetupBlockedRegisters` updates the 836 // arrays. 837 bool* const blocked_core_registers_; 838 bool* const blocked_fpu_registers_; 839 size_t number_of_core_registers_; 840 size_t number_of_fpu_registers_; 841 size_t number_of_register_pairs_; 842 const uint32_t core_callee_save_mask_; 843 const uint32_t fpu_callee_save_mask_; 844 845 // The order to use for code generation. 846 const ArenaVector<HBasicBlock*>* block_order_; 847 848 DisassemblyInformation* disasm_info_; 849 850 private: 851 class CodeGenerationData; 852 853 void InitializeCodeGenerationData(); 854 size_t GetStackOffsetOfSavedRegister(size_t index); 855 void GenerateSlowPaths(); 856 void BlockIfInRegister(Location location, bool is_out = false) const; 857 void EmitEnvironment(HEnvironment* environment, 858 SlowPathCode* slow_path, 859 bool needs_vreg_info = true, 860 bool is_for_catch_handler = false, 861 bool innermost_environment = true); 862 void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path, bool is_for_catch_handler); 863 void EmitVRegInfoOnlyCatchPhis(HEnvironment* environment); 864 865 static void PrepareCriticalNativeArgumentMoves( 866 HInvokeStaticOrDirect* invoke, 867 /*inout*/InvokeDexCallingConventionVisitor* visitor, 868 /*out*/HParallelMove* parallel_move); 869 870 void FinishCriticalNativeFrameSetup(size_t out_frame_size, /*inout*/HParallelMove* parallel_move); 871 872 static const char* GetCriticalNativeShorty(HInvokeStaticOrDirect* invoke, uint32_t* shorty_len); 873 874 OptimizingCompilerStats* stats_; 875 876 HGraph* const graph_; 877 const CompilerOptions& compiler_options_; 878 879 // The current slow-path that we're generating code for. 880 SlowPathCode* current_slow_path_; 881 882 // The current block index in `block_order_` of the block 883 // we are generating code for. 884 size_t current_block_index_; 885 886 // Whether the method is a leaf method. 887 bool is_leaf_; 888 889 // Whether the method has to emit a SuspendCheck at entry. 890 bool needs_suspend_check_entry_; 891 892 // Whether an instruction in the graph accesses the current method. 893 // TODO: Rename: this actually indicates that some instruction in the method 894 // needs the environment including a valid stack frame. 895 bool requires_current_method_; 896 897 // The CodeGenerationData contains a ScopedArenaAllocator intended for reusing the 898 // ArenaStack memory allocated in previous passes instead of adding to the memory 899 // held by the ArenaAllocator. This ScopedArenaAllocator is created in 900 // CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed. 901 std::unique_ptr<CodeGenerationData> code_generation_data_; 902 903 // Which intrinsics we don't have handcrafted code for. 904 art::ArrayRef<const bool> unimplemented_intrinsics_; 905 906 friend class OptimizingCFITest; 907 ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeSIMD); 908 ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeNoSIMD); 909 910 DISALLOW_COPY_AND_ASSIGN(CodeGenerator); 911 }; 912 913 template <typename C, typename F> 914 class CallingConvention { 915 public: CallingConvention(const C * registers,size_t number_of_registers,const F * fpu_registers,size_t number_of_fpu_registers,PointerSize pointer_size)916 CallingConvention(const C* registers, 917 size_t number_of_registers, 918 const F* fpu_registers, 919 size_t number_of_fpu_registers, 920 PointerSize pointer_size) 921 : registers_(registers), 922 number_of_registers_(number_of_registers), 923 fpu_registers_(fpu_registers), 924 number_of_fpu_registers_(number_of_fpu_registers), 925 pointer_size_(pointer_size) {} 926 GetNumberOfRegisters()927 size_t GetNumberOfRegisters() const { return number_of_registers_; } GetNumberOfFpuRegisters()928 size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } 929 GetRegisterAt(size_t index)930 C GetRegisterAt(size_t index) const { 931 DCHECK_LT(index, number_of_registers_); 932 return registers_[index]; 933 } 934 GetFpuRegisterAt(size_t index)935 F GetFpuRegisterAt(size_t index) const { 936 DCHECK_LT(index, number_of_fpu_registers_); 937 return fpu_registers_[index]; 938 } 939 GetStackOffsetOf(size_t index)940 size_t GetStackOffsetOf(size_t index) const { 941 // We still reserve the space for parameters passed by registers. 942 // Add space for the method pointer. 943 return static_cast<size_t>(pointer_size_) + index * kVRegSize; 944 } 945 946 private: 947 const C* registers_; 948 const size_t number_of_registers_; 949 const F* fpu_registers_; 950 const size_t number_of_fpu_registers_; 951 const PointerSize pointer_size_; 952 953 DISALLOW_COPY_AND_ASSIGN(CallingConvention); 954 }; 955 956 /** 957 * A templated class SlowPathGenerator with a templated method NewSlowPath() 958 * that can be used by any code generator to share equivalent slow-paths with 959 * the objective of reducing generated code size. 960 * 961 * InstructionType: instruction that requires SlowPathCodeType 962 * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *) 963 */ 964 template <typename InstructionType> 965 class SlowPathGenerator { 966 static_assert(std::is_base_of<HInstruction, InstructionType>::value, 967 "InstructionType is not a subclass of art::HInstruction"); 968 969 public: SlowPathGenerator(HGraph * graph,CodeGenerator * codegen)970 SlowPathGenerator(HGraph* graph, CodeGenerator* codegen) 971 : graph_(graph), 972 codegen_(codegen), 973 slow_path_map_(std::less<uint32_t>(), 974 graph->GetAllocator()->Adapter(kArenaAllocSlowPaths)) {} 975 976 // Creates and adds a new slow-path, if needed, or returns existing one otherwise. 977 // Templating the method (rather than the whole class) on the slow-path type enables 978 // keeping this code at a generic, non architecture-specific place. 979 // 980 // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType. 981 // To relax this requirement, we would need some RTTI on the stored slow-paths, 982 // or template the class as a whole on SlowPathType. 983 template <typename SlowPathCodeType> NewSlowPath(InstructionType * instruction)984 SlowPathCodeType* NewSlowPath(InstructionType* instruction) { 985 static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value, 986 "SlowPathCodeType is not a subclass of art::SlowPathCode"); 987 static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value, 988 "SlowPathCodeType is not constructible from InstructionType*"); 989 // Iterate over potential candidates for sharing. Currently, only same-typed 990 // slow-paths with exactly the same dex-pc are viable candidates. 991 // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing? 992 const uint32_t dex_pc = instruction->GetDexPc(); 993 auto iter = slow_path_map_.find(dex_pc); 994 if (iter != slow_path_map_.end()) { 995 const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second; 996 for (const auto& it : candidates) { 997 InstructionType* other_instruction = it.first; 998 SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second); 999 // Determine if the instructions allow for slow-path sharing. 1000 if (HaveSameLiveRegisters(instruction, other_instruction) && 1001 HaveSameStackMap(instruction, other_instruction)) { 1002 // Can share: reuse existing one. 1003 return other_slow_path; 1004 } 1005 } 1006 } else { 1007 // First time this dex-pc is seen. 1008 iter = slow_path_map_.Put(dex_pc, 1009 {{}, {graph_->GetAllocator()->Adapter(kArenaAllocSlowPaths)}}); 1010 } 1011 // Cannot share: create and add new slow-path for this particular dex-pc. 1012 SlowPathCodeType* slow_path = 1013 new (codegen_->GetScopedAllocator()) SlowPathCodeType(instruction); 1014 iter->second.emplace_back(std::make_pair(instruction, slow_path)); 1015 codegen_->AddSlowPath(slow_path); 1016 return slow_path; 1017 } 1018 1019 private: 1020 // Tests if both instructions have same set of live physical registers. This ensures 1021 // the slow-path has exactly the same preamble on saving these registers to stack. HaveSameLiveRegisters(const InstructionType * i1,const InstructionType * i2)1022 bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const { 1023 const uint32_t core_spill = ~codegen_->GetCoreSpillMask(); 1024 const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask(); 1025 RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters(); 1026 RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters(); 1027 return (((live1->GetCoreRegisters() & core_spill) == 1028 (live2->GetCoreRegisters() & core_spill)) && 1029 ((live1->GetFloatingPointRegisters() & fpu_spill) == 1030 (live2->GetFloatingPointRegisters() & fpu_spill))); 1031 } 1032 1033 // Tests if both instructions have the same stack map. This ensures the interpreter 1034 // will find exactly the same dex-registers at the same entries. HaveSameStackMap(const InstructionType * i1,const InstructionType * i2)1035 bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const { 1036 DCHECK(i1->HasEnvironment()); 1037 DCHECK(i2->HasEnvironment()); 1038 // We conservatively test if the two instructions find exactly the same instructions 1039 // and location in each dex-register. This guarantees they will have the same stack map. 1040 HEnvironment* e1 = i1->GetEnvironment(); 1041 HEnvironment* e2 = i2->GetEnvironment(); 1042 if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) { 1043 return false; 1044 } 1045 for (size_t i = 0, sz = e1->Size(); i < sz; ++i) { 1046 if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) || 1047 !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) { 1048 return false; 1049 } 1050 } 1051 return true; 1052 } 1053 1054 HGraph* const graph_; 1055 CodeGenerator* const codegen_; 1056 1057 // Map from dex-pc to vector of already existing instruction/slow-path pairs. 1058 ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_; 1059 1060 DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator); 1061 }; 1062 1063 class InstructionCodeGenerator : public HGraphVisitor { 1064 public: InstructionCodeGenerator(HGraph * graph,CodeGenerator * codegen)1065 InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen) 1066 : HGraphVisitor(graph), 1067 deopt_slow_paths_(graph, codegen) {} 1068 1069 protected: 1070 // Add slow-path generator for each instruction/slow-path combination that desires sharing. 1071 // TODO: under current regime, only deopt sharing make sense; extend later. 1072 SlowPathGenerator<HDeoptimize> deopt_slow_paths_; 1073 }; 1074 1075 } // namespace art 1076 1077 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 1078