1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 19 20 #include "arch/instruction_set.h" 21 #include "arch/instruction_set_features.h" 22 #include "base/arena_containers.h" 23 #include "base/arena_object.h" 24 #include "base/array_ref.h" 25 #include "base/bit_field.h" 26 #include "base/bit_utils.h" 27 #include "base/globals.h" 28 #include "base/macros.h" 29 #include "base/memory_region.h" 30 #include "base/pointer_size.h" 31 #include "class_root.h" 32 #include "dex/proto_reference.h" 33 #include "dex/string_reference.h" 34 #include "dex/type_reference.h" 35 #include "graph_visualizer.h" 36 #include "locations.h" 37 #include "mirror/method_type.h" 38 #include "nodes.h" 39 #include "oat/oat_quick_method_header.h" 40 #include "optimizing_compiler_stats.h" 41 #include "read_barrier_option.h" 42 #include "stack.h" 43 #include "subtype_check.h" 44 #include "utils/assembler.h" 45 #include "utils/label.h" 46 47 namespace art HIDDEN { 48 49 // Binary encoding of 2^32 for type double. 50 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); 51 // Binary encoding of 2^31 for type double. 52 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); 53 54 // Minimum value for a primitive integer. 55 static int32_t constexpr kPrimIntMin = 0x80000000; 56 // Minimum value for a primitive long. 57 static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000); 58 59 // Maximum value for a primitive integer. 60 static int32_t constexpr kPrimIntMax = 0x7fffffff; 61 // Maximum value for a primitive long. 62 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); 63 64 constexpr size_t kClassStatusLsbPosition = SubtypeCheckBits::BitStructSizeOf(); 65 constexpr size_t kClassStatusByteOffset = 66 mirror::Class::StatusOffset().SizeValue() + (kClassStatusLsbPosition / kBitsPerByte); 67 constexpr uint32_t kShiftedVisiblyInitializedValue = enum_cast<uint32_t>( 68 ClassStatus::kVisiblyInitialized) << (kClassStatusLsbPosition % kBitsPerByte); 69 constexpr uint32_t kShiftedInitializingValue = 70 enum_cast<uint32_t>(ClassStatus::kInitializing) << (kClassStatusLsbPosition % kBitsPerByte); 71 constexpr uint32_t kShiftedInitializedValue = 72 enum_cast<uint32_t>(ClassStatus::kInitialized) << (kClassStatusLsbPosition % kBitsPerByte); 73 74 class Assembler; 75 class CodeGenerationData; 76 class CodeGenerator; 77 class CompilerOptions; 78 class StackMapStream; 79 class ParallelMoveResolver; 80 81 namespace linker { 82 class LinkerPatch; 83 } // namespace linker 84 85 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> { 86 public: SlowPathCode(HInstruction * instruction)87 explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) { 88 for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) { 89 saved_core_stack_offsets_[i] = kRegisterNotSaved; 90 saved_fpu_stack_offsets_[i] = kRegisterNotSaved; 91 } 92 } 93 ~SlowPathCode()94 virtual ~SlowPathCode() {} 95 96 virtual void EmitNativeCode(CodeGenerator* codegen) = 0; 97 98 // Save live core and floating-point caller-save registers and 99 // update the stack mask in `locations` for registers holding object 100 // references. 101 virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 102 // Restore live core and floating-point caller-save registers. 103 virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 104 IsCoreRegisterSaved(int reg)105 bool IsCoreRegisterSaved(int reg) const { 106 return saved_core_stack_offsets_[reg] != kRegisterNotSaved; 107 } 108 IsFpuRegisterSaved(int reg)109 bool IsFpuRegisterSaved(int reg) const { 110 return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved; 111 } 112 GetStackOffsetOfCoreRegister(int reg)113 uint32_t GetStackOffsetOfCoreRegister(int reg) const { 114 return saved_core_stack_offsets_[reg]; 115 } 116 GetStackOffsetOfFpuRegister(int reg)117 uint32_t GetStackOffsetOfFpuRegister(int reg) const { 118 return saved_fpu_stack_offsets_[reg]; 119 } 120 IsFatal()121 virtual bool IsFatal() const { return false; } 122 123 virtual const char* GetDescription() const = 0; 124 GetEntryLabel()125 Label* GetEntryLabel() { return &entry_label_; } GetExitLabel()126 Label* GetExitLabel() { return &exit_label_; } 127 GetInstruction()128 HInstruction* GetInstruction() const { 129 return instruction_; 130 } 131 GetDexPc()132 uint32_t GetDexPc() const { 133 return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc; 134 } 135 136 protected: 137 static constexpr size_t kMaximumNumberOfExpectedRegisters = 32; 138 static constexpr uint32_t kRegisterNotSaved = -1; 139 // The instruction where this slow path is happening. 140 HInstruction* instruction_; 141 uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 142 uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 143 144 private: 145 Label entry_label_; 146 Label exit_label_; 147 148 DISALLOW_COPY_AND_ASSIGN(SlowPathCode); 149 }; 150 151 class InvokeDexCallingConventionVisitor { 152 public: 153 virtual Location GetNextLocation(DataType::Type type) = 0; 154 virtual Location GetReturnLocation(DataType::Type type) const = 0; 155 virtual Location GetMethodLocation() const = 0; 156 157 protected: InvokeDexCallingConventionVisitor()158 InvokeDexCallingConventionVisitor() {} ~InvokeDexCallingConventionVisitor()159 virtual ~InvokeDexCallingConventionVisitor() {} 160 161 // The current index for core registers. 162 uint32_t gp_index_ = 0u; 163 // The current index for floating-point registers. 164 uint32_t float_index_ = 0u; 165 // The current stack index. 166 uint32_t stack_index_ = 0u; 167 168 private: 169 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); 170 }; 171 172 class FieldAccessCallingConvention { 173 public: 174 virtual Location GetObjectLocation() const = 0; 175 virtual Location GetFieldIndexLocation() const = 0; 176 virtual Location GetReturnLocation(DataType::Type type) const = 0; 177 virtual Location GetSetValueLocation(DataType::Type type, bool is_instance) const = 0; 178 virtual Location GetFpuLocation(DataType::Type type) const = 0; ~FieldAccessCallingConvention()179 virtual ~FieldAccessCallingConvention() {} 180 181 protected: FieldAccessCallingConvention()182 FieldAccessCallingConvention() {} 183 184 private: 185 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention); 186 }; 187 188 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { 189 public: 190 // Compiles the graph to executable instructions. 191 void Compile(); 192 static std::unique_ptr<CodeGenerator> Create(HGraph* graph, 193 const CompilerOptions& compiler_options, 194 OptimizingCompilerStats* stats = nullptr); 195 virtual ~CodeGenerator(); 196 197 // Get the graph. This is the outermost graph, never the graph of a method being inlined. GetGraph()198 HGraph* GetGraph() const { return graph_; } 199 200 HBasicBlock* GetNextBlockToEmit() const; 201 HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; 202 bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; 203 GetStackSlotOfParameter(HParameterValue * parameter)204 size_t GetStackSlotOfParameter(HParameterValue* parameter) const { 205 // Note that this follows the current calling convention. 206 return GetFrameSize() 207 + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet())) // Art method 208 + parameter->GetIndex() * kVRegSize; 209 } 210 211 virtual void Initialize() = 0; 212 virtual void Finalize(); 213 virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches); 214 virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const; 215 virtual void EmitThunkCode(const linker::LinkerPatch& patch, 216 /*out*/ ArenaVector<uint8_t>* code, 217 /*out*/ std::string* debug_name); 218 virtual void GenerateFrameEntry() = 0; 219 virtual void GenerateFrameExit() = 0; 220 virtual void Bind(HBasicBlock* block) = 0; 221 virtual void MoveConstant(Location destination, int32_t value) = 0; 222 virtual void MoveLocation(Location dst, Location src, DataType::Type dst_type) = 0; 223 virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0; 224 225 virtual Assembler* GetAssembler() = 0; 226 virtual const Assembler& GetAssembler() const = 0; 227 virtual size_t GetWordSize() const = 0; 228 229 // Returns whether the target supports predicated SIMD instructions. SupportsPredicatedSIMD()230 virtual bool SupportsPredicatedSIMD() const { return false; } 231 232 // Get FP register width in bytes for spilling/restoring in the slow paths. 233 // 234 // Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers 235 // alias and live SIMD registers are forced to be spilled in full size in the slow paths. GetSlowPathFPWidth()236 virtual size_t GetSlowPathFPWidth() const { 237 // Default implementation. 238 return GetCalleePreservedFPWidth(); 239 } 240 241 // Get FP register width required to be preserved by the target ABI. 242 virtual size_t GetCalleePreservedFPWidth() const = 0; 243 244 // Get the size of the target SIMD register in bytes. 245 virtual size_t GetSIMDRegisterWidth() const = 0; 246 virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0; 247 void InitializeCodeGeneration(size_t number_of_spill_slots, 248 size_t maximum_safepoint_spill_size, 249 size_t number_of_out_slots, 250 const ArenaVector<HBasicBlock*>& block_order); 251 // Backends can override this as necessary. For most, no special alignment is required. GetPreferredSlotsAlignment()252 virtual uint32_t GetPreferredSlotsAlignment() const { return 1; } 253 GetFrameSize()254 uint32_t GetFrameSize() const { return frame_size_; } SetFrameSize(uint32_t size)255 void SetFrameSize(uint32_t size) { frame_size_ = size; } GetMaximumFrameSize()256 uint32_t GetMaximumFrameSize() const { 257 return GetStackOverflowReservedBytes(GetInstructionSet()); 258 } 259 GetCoreSpillMask()260 uint32_t GetCoreSpillMask() const { return core_spill_mask_; } GetFpuSpillMask()261 uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } 262 GetNumberOfCoreRegisters()263 size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } GetNumberOfFloatingPointRegisters()264 size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } 265 virtual void SetupBlockedRegisters() const = 0; 266 ComputeSpillMask()267 virtual void ComputeSpillMask() { 268 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; 269 DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; 270 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; 271 } 272 273 virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; 274 virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; 275 virtual InstructionSet GetInstructionSet() const = 0; 276 277 // Saves the register in the stack. Returns the size taken on stack. 278 virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 279 // Restores the register from the stack. Returns the size taken on stack. 280 virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 281 282 virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 283 virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 284 285 virtual bool NeedsTwoRegisters(DataType::Type type) const = 0; 286 // Returns whether we should split long moves in parallel moves. ShouldSplitLongMoves()287 virtual bool ShouldSplitLongMoves() const { return false; } 288 289 // Returns true if `invoke` is an implemented intrinsic in this codegen's arch. IsImplementedIntrinsic(HInvoke * invoke)290 bool IsImplementedIntrinsic(HInvoke* invoke) const { 291 return invoke->IsIntrinsic() && 292 !unimplemented_intrinsics_[static_cast<size_t>(invoke->GetIntrinsic())]; 293 } 294 GetNumberOfCoreCalleeSaveRegisters()295 size_t GetNumberOfCoreCalleeSaveRegisters() const { 296 return POPCOUNT(core_callee_save_mask_); 297 } 298 GetNumberOfCoreCallerSaveRegisters()299 size_t GetNumberOfCoreCallerSaveRegisters() const { 300 DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters()); 301 return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters(); 302 } 303 IsCoreCalleeSaveRegister(int reg)304 bool IsCoreCalleeSaveRegister(int reg) const { 305 return (core_callee_save_mask_ & (1 << reg)) != 0; 306 } 307 IsFloatingPointCalleeSaveRegister(int reg)308 bool IsFloatingPointCalleeSaveRegister(int reg) const { 309 return (fpu_callee_save_mask_ & (1 << reg)) != 0; 310 } 311 GetSlowPathSpills(LocationSummary * locations,bool core_registers)312 uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const { 313 DCHECK(locations->OnlyCallsOnSlowPath() || 314 (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() && 315 !locations->HasCustomSlowPathCallingConvention())); 316 uint32_t live_registers = core_registers 317 ? locations->GetLiveRegisters()->GetCoreRegisters() 318 : locations->GetLiveRegisters()->GetFloatingPointRegisters(); 319 if (locations->HasCustomSlowPathCallingConvention()) { 320 // Save only the live registers that the custom calling convention wants us to save. 321 uint32_t caller_saves = core_registers 322 ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters() 323 : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters(); 324 return live_registers & caller_saves; 325 } else { 326 // Default ABI, we need to spill non-callee-save live registers. 327 uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_; 328 return live_registers & ~callee_saves; 329 } 330 } 331 GetNumberOfSlowPathSpills(LocationSummary * locations,bool core_registers)332 size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const { 333 return POPCOUNT(GetSlowPathSpills(locations, core_registers)); 334 } 335 GetStackOffsetOfShouldDeoptimizeFlag()336 size_t GetStackOffsetOfShouldDeoptimizeFlag() const { 337 DCHECK(GetGraph()->HasShouldDeoptimizeFlag()); 338 DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize); 339 return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize; 340 } 341 342 // For stack overflow checks and native-debug-info entries without dex register 343 // mapping i.e. start of basic block or at frame entry. 344 void RecordPcInfoForFrameOrBlockEntry(uint32_t dex_pc = 0); 345 346 // Record native to dex mapping for a suspend point. 347 // The native_pc is used from Assembler::CodePosition. 348 // 349 // Note: As Assembler::CodePosition is target dependent, it does not guarantee the exact native_pc 350 // for the instruction. If the exact native_pc is required it must be provided explicitly. 351 void RecordPcInfo(HInstruction* instruction, 352 SlowPathCode* slow_path = nullptr, 353 bool native_debug_info = false); 354 355 // Record native to dex mapping for a suspend point. Required by runtime. 356 // Do not use directly. Use the method above. 357 void RecordPcInfo(HInstruction* instruction, 358 uint32_t dex_pc, 359 uint32_t native_pc, 360 SlowPathCode* slow_path = nullptr, 361 bool native_debug_info = false); 362 363 // Check whether we have already recorded mapping at this PC. 364 bool HasStackMapAtCurrentPc(); 365 366 // Record extra stack maps if we support native debugging. 367 // 368 // ARM specific behaviour: The recorded native PC might be a branch over pools to instructions 369 // corresponding the dex PC. 370 void MaybeRecordNativeDebugInfoForBlockEntry(uint32_t dex_pc); 371 void MaybeRecordNativeDebugInfo(HInstruction* instruction, 372 uint32_t dex_pc, 373 SlowPathCode* slow_path = nullptr); 374 375 bool CanMoveNullCheckToUser(HNullCheck* null_check); 376 virtual void MaybeRecordImplicitNullCheck(HInstruction* instruction); 377 LocationSummary* CreateThrowingSlowPathLocations( 378 HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty()); 379 void GenerateNullCheck(HNullCheck* null_check); 380 virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0; 381 virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0; 382 383 // Records a stack map which the runtime might use to set catch phi values 384 // during exception delivery. 385 // TODO: Replace with a catch-entering instruction that records the environment. 386 void RecordCatchBlockInfo(); 387 GetCompilerOptions()388 const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } 389 bool EmitReadBarrier() const; 390 bool EmitBakerReadBarrier() const; 391 bool EmitNonBakerReadBarrier() const; 392 ReadBarrierOption GetCompilerReadBarrierOption() const; 393 394 // Returns true if we should check the GC card for consistency purposes. 395 bool ShouldCheckGCCard(DataType::Type type, 396 HInstruction* value, 397 WriteBarrierKind write_barrier_kind) const; 398 399 // Get the ScopedArenaAllocator used for codegen memory allocation. 400 ScopedArenaAllocator* GetScopedAllocator(); 401 402 void AddSlowPath(SlowPathCode* slow_path); 403 404 ScopedArenaVector<uint8_t> BuildStackMaps(const dex::CodeItem* code_item_for_osr_check); 405 size_t GetNumberOfJitRoots() const; 406 407 // Fills the `literals` array with literals collected during code generation. 408 // Also emits literal patches. 409 void EmitJitRoots(uint8_t* code, 410 const uint8_t* roots_data, 411 /*out*/std::vector<Handle<mirror::Object>>* roots) 412 REQUIRES_SHARED(Locks::mutator_lock_); 413 IsLeafMethod()414 bool IsLeafMethod() const { 415 return is_leaf_; 416 } 417 MarkNotLeaf()418 void MarkNotLeaf() { 419 is_leaf_ = false; 420 requires_current_method_ = true; 421 } 422 NeedsSuspendCheckEntry()423 bool NeedsSuspendCheckEntry() const { 424 return needs_suspend_check_entry_; 425 } 426 MarkNeedsSuspendCheckEntry()427 void MarkNeedsSuspendCheckEntry() { 428 needs_suspend_check_entry_ = true; 429 } 430 SetRequiresCurrentMethod()431 void SetRequiresCurrentMethod() { 432 requires_current_method_ = true; 433 } 434 RequiresCurrentMethod()435 bool RequiresCurrentMethod() const { 436 return requires_current_method_; 437 } 438 439 // Clears the spill slots taken by loop phis in the `LocationSummary` of the 440 // suspend check. This is called when the code generator generates code 441 // for the suspend check at the back edge (instead of where the suspend check 442 // is, which is the loop entry). At this point, the spill slots for the phis 443 // have not been written to. 444 void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check, 445 HParallelMove* spills) const; 446 GetBlockedCoreRegisters()447 bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } GetBlockedFloatingPointRegisters()448 bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } 449 IsBlockedCoreRegister(size_t i)450 bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; } IsBlockedFloatingPointRegister(size_t i)451 bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; } 452 453 // Helper that returns the offset of the array's length field. 454 // Note: Besides the normal arrays, we also use the HArrayLength for 455 // accessing the String's `count` field in String intrinsics. 456 static uint32_t GetArrayLengthOffset(HArrayLength* array_length); 457 458 // Helper that returns the offset of the array's data. 459 // Note: Besides the normal arrays, we also use the HArrayGet for 460 // accessing the String's `value` field in String intrinsics. 461 static uint32_t GetArrayDataOffset(HArrayGet* array_get); 462 463 void EmitParallelMoves(Location from1, 464 Location to1, 465 DataType::Type type1, 466 Location from2, 467 Location to2, 468 DataType::Type type2); 469 InstanceOfNeedsReadBarrier(HInstanceOf * instance_of)470 bool InstanceOfNeedsReadBarrier(HInstanceOf* instance_of) { 471 // Used only for `kExactCheck`, `kAbstractClassCheck`, `kClassHierarchyCheck`, 472 // `kArrayObjectCheck` and `kInterfaceCheck`. 473 DCHECK(instance_of->GetTypeCheckKind() == TypeCheckKind::kExactCheck || 474 instance_of->GetTypeCheckKind() == TypeCheckKind::kAbstractClassCheck || 475 instance_of->GetTypeCheckKind() == TypeCheckKind::kClassHierarchyCheck || 476 instance_of->GetTypeCheckKind() == TypeCheckKind::kArrayObjectCheck || 477 instance_of->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) 478 << instance_of->GetTypeCheckKind(); 479 // If the target class is in the boot or app image, it's non-moveable and it doesn't matter 480 // if we compare it with a from-space or to-space reference, the result is the same. 481 // It's OK to traverse a class hierarchy jumping between from-space and to-space. 482 return EmitReadBarrier() && !instance_of->GetTargetClass()->IsInImage(); 483 } 484 ReadBarrierOptionForInstanceOf(HInstanceOf * instance_of)485 ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) { 486 return InstanceOfNeedsReadBarrier(instance_of) ? kWithReadBarrier : kWithoutReadBarrier; 487 } 488 IsTypeCheckSlowPathFatal(HCheckCast * check_cast)489 bool IsTypeCheckSlowPathFatal(HCheckCast* check_cast) { 490 switch (check_cast->GetTypeCheckKind()) { 491 case TypeCheckKind::kExactCheck: 492 case TypeCheckKind::kAbstractClassCheck: 493 case TypeCheckKind::kClassHierarchyCheck: 494 case TypeCheckKind::kArrayObjectCheck: 495 case TypeCheckKind::kInterfaceCheck: { 496 bool needs_read_barrier = 497 EmitReadBarrier() && !check_cast->GetTargetClass()->IsInImage(); 498 // We do not emit read barriers for HCheckCast, so we can get false negatives 499 // and the slow path shall re-check and simply return if the cast is actually OK. 500 return !needs_read_barrier; 501 } 502 case TypeCheckKind::kArrayCheck: 503 case TypeCheckKind::kUnresolvedCheck: 504 return false; 505 case TypeCheckKind::kBitstringCheck: 506 return true; 507 } 508 LOG(FATAL) << "Unreachable"; 509 UNREACHABLE(); 510 } 511 GetCheckCastCallKind(HCheckCast * check_cast)512 LocationSummary::CallKind GetCheckCastCallKind(HCheckCast* check_cast) { 513 return (IsTypeCheckSlowPathFatal(check_cast) && !check_cast->CanThrowIntoCatchBlock()) 514 ? LocationSummary::kNoCall // In fact, call on a fatal (non-returning) slow path. 515 : LocationSummary::kCallOnSlowPath; 516 } 517 StoreNeedsWriteBarrier(DataType::Type type,HInstruction * value)518 static bool StoreNeedsWriteBarrier(DataType::Type type, HInstruction* value) { 519 // Check that null value is not represented as an integer constant. 520 DCHECK_IMPLIES(type == DataType::Type::kReference, !value->IsIntConstant()); 521 return type == DataType::Type::kReference && !value->IsNullConstant(); 522 } 523 524 // If we are compiling a graph with the WBE pass enabled, we want to honor the WriteBarrierKind 525 // set during the WBE pass. 526 bool StoreNeedsWriteBarrier(DataType::Type type, 527 HInstruction* value, 528 WriteBarrierKind write_barrier_kind) const; 529 530 // Performs checks pertaining to an InvokeRuntime call. 531 void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint, 532 HInstruction* instruction, 533 SlowPathCode* slow_path); 534 535 // Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call. 536 static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction, 537 SlowPathCode* slow_path); 538 AddAllocatedRegister(Location location)539 void AddAllocatedRegister(Location location) { 540 allocated_registers_.Add(location); 541 } 542 HasAllocatedRegister(bool is_core,int reg)543 bool HasAllocatedRegister(bool is_core, int reg) const { 544 return is_core 545 ? allocated_registers_.ContainsCoreRegister(reg) 546 : allocated_registers_.ContainsFloatingPointRegister(reg); 547 } 548 549 void AllocateLocations(HInstruction* instruction); 550 551 // Tells whether the stack frame of the compiled method is 552 // considered "empty", that is either actually having a size of zero, 553 // or just containing the saved return address register. HasEmptyFrame()554 bool HasEmptyFrame() const { 555 return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); 556 } 557 GetInt8ValueOf(HConstant * constant)558 static int8_t GetInt8ValueOf(HConstant* constant) { 559 DCHECK(constant->IsIntConstant()); 560 return constant->AsIntConstant()->GetValue(); 561 } 562 GetInt16ValueOf(HConstant * constant)563 static int16_t GetInt16ValueOf(HConstant* constant) { 564 DCHECK(constant->IsIntConstant()); 565 return constant->AsIntConstant()->GetValue(); 566 } 567 GetInt32ValueOf(HConstant * constant)568 static int32_t GetInt32ValueOf(HConstant* constant) { 569 if (constant->IsIntConstant()) { 570 return constant->AsIntConstant()->GetValue(); 571 } else if (constant->IsNullConstant()) { 572 return 0; 573 } else { 574 DCHECK(constant->IsFloatConstant()); 575 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 576 } 577 } 578 GetInt64ValueOf(HConstant * constant)579 static int64_t GetInt64ValueOf(HConstant* constant) { 580 if (constant->IsIntConstant()) { 581 return constant->AsIntConstant()->GetValue(); 582 } else if (constant->IsNullConstant()) { 583 return 0; 584 } else if (constant->IsFloatConstant()) { 585 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 586 } else if (constant->IsLongConstant()) { 587 return constant->AsLongConstant()->GetValue(); 588 } else { 589 DCHECK(constant->IsDoubleConstant()); 590 return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); 591 } 592 } 593 GetFirstRegisterSlotInSlowPath()594 size_t GetFirstRegisterSlotInSlowPath() const { 595 return first_register_slot_in_slow_path_; 596 } 597 FrameEntrySpillSize()598 uint32_t FrameEntrySpillSize() const { 599 return GetFpuSpillSize() + GetCoreSpillSize(); 600 } 601 602 virtual ParallelMoveResolver* GetMoveResolver() = 0; 603 604 static void CreateCommonInvokeLocationSummary( 605 HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor); 606 607 template <typename CriticalNativeCallingConventionVisitor, 608 size_t kNativeStackAlignment, 609 size_t GetCriticalNativeDirectCallFrameSize(std::string_view shorty)> PrepareCriticalNativeCall(HInvokeStaticOrDirect * invoke)610 size_t PrepareCriticalNativeCall(HInvokeStaticOrDirect* invoke) { 611 DCHECK(!invoke->GetLocations()->Intrinsified()); 612 CriticalNativeCallingConventionVisitor calling_convention_visitor( 613 /*for_register_allocation=*/ false); 614 HParallelMove parallel_move(GetGraph()->GetAllocator()); 615 PrepareCriticalNativeArgumentMoves(invoke, &calling_convention_visitor, ¶llel_move); 616 size_t out_frame_size = 617 RoundUp(calling_convention_visitor.GetStackOffset(), kNativeStackAlignment); 618 if (kIsDebugBuild) { 619 std::string_view shorty = GetCriticalNativeShorty(invoke); 620 CHECK_EQ(GetCriticalNativeDirectCallFrameSize(shorty), out_frame_size); 621 } 622 if (out_frame_size != 0u) { 623 FinishCriticalNativeFrameSetup(out_frame_size, ¶llel_move); 624 } 625 return out_frame_size; 626 } 627 628 void GenerateInvokeStaticOrDirectRuntimeCall( 629 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path); 630 631 void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); 632 633 void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke, SlowPathCode* slow_path = nullptr); 634 635 void GenerateInvokeCustomCall(HInvokeCustom* invoke); 636 637 void CreateStringBuilderAppendLocations(HStringBuilderAppend* instruction, Location out); 638 639 void CreateUnresolvedFieldLocationSummary( 640 HInstruction* field_access, 641 DataType::Type field_type, 642 const FieldAccessCallingConvention& calling_convention); 643 644 void GenerateUnresolvedFieldAccess( 645 HInstruction* field_access, 646 DataType::Type field_type, 647 uint32_t field_index, 648 const FieldAccessCallingConvention& calling_convention); 649 650 static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, 651 Location runtime_type_index_location, 652 Location runtime_return_location); 653 void GenerateLoadClassRuntimeCall(HLoadClass* cls); 654 655 static void CreateLoadMethodHandleRuntimeCallLocationSummary(HLoadMethodHandle* method_handle, 656 Location runtime_handle_index_location, 657 Location runtime_return_location); 658 void GenerateLoadMethodHandleRuntimeCall(HLoadMethodHandle* method_handle); 659 660 static void CreateLoadMethodTypeRuntimeCallLocationSummary(HLoadMethodType* method_type, 661 Location runtime_type_index_location, 662 Location runtime_return_location); 663 void GenerateLoadMethodTypeRuntimeCall(HLoadMethodType* method_type); 664 665 static uint32_t GetBootImageOffset(ObjPtr<mirror::Object> object) 666 REQUIRES_SHARED(Locks::mutator_lock_); 667 static uint32_t GetBootImageOffset(HLoadClass* load_class); 668 static uint32_t GetBootImageOffset(HLoadString* load_string); 669 static uint32_t GetBootImageOffset(HInvoke* invoke); 670 static uint32_t GetBootImageOffset(ClassRoot class_root); 671 static uint32_t GetBootImageOffsetOfIntrinsicDeclaringClass(HInvoke* invoke); 672 673 static LocationSummary* CreateSystemArrayCopyLocationSummary( 674 HInvoke* invoke, int32_t length_threshold = -1, size_t num_temps = 3); 675 SetDisassemblyInformation(DisassemblyInformation * info)676 void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } GetDisassemblyInformation()677 DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; } 678 679 virtual void InvokeRuntime(QuickEntrypointEnum entrypoint, 680 HInstruction* instruction, 681 SlowPathCode* slow_path = nullptr) = 0; 682 683 // Check if the desired_string_load_kind is supported. If it is, return it, 684 // otherwise return a fall-back kind that should be used instead. 685 virtual HLoadString::LoadKind GetSupportedLoadStringKind( 686 HLoadString::LoadKind desired_string_load_kind) = 0; 687 688 // Check if the desired_class_load_kind is supported. If it is, return it, 689 // otherwise return a fall-back kind that should be used instead. 690 virtual HLoadClass::LoadKind GetSupportedLoadClassKind( 691 HLoadClass::LoadKind desired_class_load_kind) = 0; 692 GetLoadStringCallKind(HLoadString * load)693 LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) { 694 switch (load->GetLoadKind()) { 695 case HLoadString::LoadKind::kBssEntry: 696 DCHECK(load->NeedsEnvironment()); 697 return LocationSummary::kCallOnSlowPath; 698 case HLoadString::LoadKind::kRuntimeCall: 699 DCHECK(load->NeedsEnvironment()); 700 return LocationSummary::kCallOnMainOnly; 701 case HLoadString::LoadKind::kJitTableAddress: 702 DCHECK(!load->NeedsEnvironment()); 703 return EmitReadBarrier() 704 ? LocationSummary::kCallOnSlowPath 705 : LocationSummary::kNoCall; 706 default: 707 DCHECK(!load->NeedsEnvironment()); 708 return LocationSummary::kNoCall; 709 } 710 } 711 712 // Check if the desired_dispatch_info is supported. If it is, return it, 713 // otherwise return a fall-back info that should be used instead. 714 virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 715 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 716 ArtMethod* method) = 0; 717 718 // Generate a call to a static or direct method. 719 virtual void GenerateStaticOrDirectCall( 720 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; 721 // Generate a call to a virtual method. 722 virtual void GenerateVirtualCall( 723 HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; 724 725 // Copy the result of a call into the given target. 726 virtual void MoveFromReturnRegister(Location trg, DataType::Type type) = 0; 727 728 virtual void IncreaseFrame(size_t adjustment) = 0; 729 virtual void DecreaseFrame(size_t adjustment) = 0; 730 731 virtual void GenerateNop() = 0; 732 733 static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array); 734 static ScaleFactor ScaleFactorForType(DataType::Type type); 735 GetCode()736 ArrayRef<const uint8_t> GetCode() const { 737 return ArrayRef<const uint8_t>(GetAssembler().CodeBufferBaseAddress(), 738 GetAssembler().CodeSize()); 739 } 740 741 protected: 742 // Patch info used for recording locations of required linker patches and their targets, 743 // i.e. target method, string, type or code identified by their dex file and index, 744 // or boot image .data.img.rel.ro entries identified by the boot image offset. 745 template <typename LabelType> 746 struct PatchInfo { PatchInfoPatchInfo747 PatchInfo(const DexFile* dex_file, uint32_t off_or_idx) 748 : target_dex_file(dex_file), offset_or_index(off_or_idx), label() { } 749 750 // Target dex file or null for boot image .data.img.rel.ro patches. 751 const DexFile* target_dex_file; 752 // Either the boot image offset (to write to .data.img.rel.ro) or string/type/method index. 753 uint32_t offset_or_index; 754 // Label for the instruction to patch. 755 LabelType label; 756 }; 757 758 CodeGenerator(HGraph* graph, 759 size_t number_of_core_registers, 760 size_t number_of_fpu_registers, 761 size_t number_of_register_pairs, 762 uint32_t core_callee_save_mask, 763 uint32_t fpu_callee_save_mask, 764 const CompilerOptions& compiler_options, 765 OptimizingCompilerStats* stats, 766 const art::ArrayRef<const bool>& unimplemented_intrinsics); 767 768 virtual HGraphVisitor* GetLocationBuilder() = 0; 769 virtual HGraphVisitor* GetInstructionVisitor() = 0; 770 771 template <typename RegType> ComputeRegisterMask(const RegType * registers,size_t length)772 static uint32_t ComputeRegisterMask(const RegType* registers, size_t length) { 773 uint32_t mask = 0; 774 for (size_t i = 0, e = length; i < e; ++i) { 775 mask |= (1 << registers[i]); 776 } 777 return mask; 778 } 779 780 // Returns the location of the first spilled entry for floating point registers, 781 // relative to the stack pointer. GetFpuSpillStart()782 uint32_t GetFpuSpillStart() const { 783 return GetFrameSize() - FrameEntrySpillSize(); 784 } 785 GetFpuSpillSize()786 uint32_t GetFpuSpillSize() const { 787 return POPCOUNT(fpu_spill_mask_) * GetCalleePreservedFPWidth(); 788 } 789 GetCoreSpillSize()790 uint32_t GetCoreSpillSize() const { 791 return POPCOUNT(core_spill_mask_) * GetWordSize(); 792 } 793 HasAllocatedCalleeSaveRegisters()794 virtual bool HasAllocatedCalleeSaveRegisters() const { 795 // We check the core registers against 1 because it always comprises the return PC. 796 return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) 797 || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); 798 } 799 CallPushesPC()800 bool CallPushesPC() const { 801 InstructionSet instruction_set = GetInstructionSet(); 802 return instruction_set == InstructionSet::kX86 || instruction_set == InstructionSet::kX86_64; 803 } 804 805 // Arm64 has its own type for a label, so we need to templatize these methods 806 // to share the logic. 807 808 template <typename LabelType> CommonInitializeLabels()809 LabelType* CommonInitializeLabels() { 810 // We use raw array allocations instead of ArenaVector<> because Labels are 811 // non-constructible and non-movable and as such cannot be held in a vector. 812 size_t size = GetGraph()->GetBlocks().size(); 813 LabelType* labels = 814 GetGraph()->GetAllocator()->AllocArray<LabelType>(size, kArenaAllocCodeGenerator); 815 for (size_t i = 0; i != size; ++i) { 816 new(labels + i) LabelType(); 817 } 818 return labels; 819 } 820 821 template <typename LabelType> CommonGetLabelOf(LabelType * raw_pointer_to_labels_array,HBasicBlock * block)822 LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const { 823 block = FirstNonEmptyBlock(block); 824 return raw_pointer_to_labels_array + block->GetBlockId(); 825 } 826 GetCurrentSlowPath()827 SlowPathCode* GetCurrentSlowPath() { 828 return current_slow_path_; 829 } 830 831 StackMapStream* GetStackMapStream(); 832 GetCodeGenerationData()833 CodeGenerationData* GetCodeGenerationData() { 834 return code_generation_data_.get(); 835 } 836 837 void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string); 838 uint64_t GetJitStringRootIndex(StringReference string_reference); 839 void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass); 840 uint64_t GetJitClassRootIndex(TypeReference type_reference); 841 void ReserveJitMethodTypeRoot(ProtoReference proto_reference, 842 Handle<mirror::MethodType> method_type); 843 uint64_t GetJitMethodTypeRootIndex(ProtoReference proto_reference); 844 845 // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code. 846 virtual void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data); 847 848 // Frame size required for this method. 849 uint32_t frame_size_; 850 uint32_t core_spill_mask_; 851 uint32_t fpu_spill_mask_; 852 uint32_t first_register_slot_in_slow_path_; 853 854 // Registers that were allocated during linear scan. 855 RegisterSet allocated_registers_; 856 857 // Arrays used when doing register allocation to know which 858 // registers we can allocate. `SetupBlockedRegisters` updates the 859 // arrays. 860 bool* const blocked_core_registers_; 861 bool* const blocked_fpu_registers_; 862 size_t number_of_core_registers_; 863 size_t number_of_fpu_registers_; 864 size_t number_of_register_pairs_; 865 const uint32_t core_callee_save_mask_; 866 const uint32_t fpu_callee_save_mask_; 867 868 // The order to use for code generation. 869 const ArenaVector<HBasicBlock*>* block_order_; 870 871 DisassemblyInformation* disasm_info_; 872 873 private: 874 void InitializeCodeGenerationData(); 875 size_t GetStackOffsetOfSavedRegister(size_t index); 876 void GenerateSlowPaths(); 877 void BlockIfInRegister(Location location, bool is_out = false) const; 878 void EmitEnvironment(HEnvironment* environment, 879 SlowPathCode* slow_path, 880 bool needs_vreg_info = true, 881 bool is_for_catch_handler = false, 882 bool innermost_environment = true); 883 void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path, bool is_for_catch_handler); 884 void EmitVRegInfoOnlyCatchPhis(HEnvironment* environment); 885 886 static void PrepareCriticalNativeArgumentMoves( 887 HInvokeStaticOrDirect* invoke, 888 /*inout*/InvokeDexCallingConventionVisitor* visitor, 889 /*out*/HParallelMove* parallel_move); 890 891 void FinishCriticalNativeFrameSetup(size_t out_frame_size, /*inout*/HParallelMove* parallel_move); 892 893 static std::string_view GetCriticalNativeShorty(HInvokeStaticOrDirect* invoke); 894 895 OptimizingCompilerStats* stats_; 896 897 HGraph* const graph_; 898 const CompilerOptions& compiler_options_; 899 900 // The current slow-path that we're generating code for. 901 SlowPathCode* current_slow_path_; 902 903 // The current block index in `block_order_` of the block 904 // we are generating code for. 905 size_t current_block_index_; 906 907 // Whether the method is a leaf method. 908 bool is_leaf_; 909 910 // Whether the method has to emit a SuspendCheck at entry. 911 bool needs_suspend_check_entry_; 912 913 // Whether an instruction in the graph accesses the current method. 914 // TODO: Rename: this actually indicates that some instruction in the method 915 // needs the environment including a valid stack frame. 916 bool requires_current_method_; 917 918 // The CodeGenerationData contains a ScopedArenaAllocator intended for reusing the 919 // ArenaStack memory allocated in previous passes instead of adding to the memory 920 // held by the ArenaAllocator. This ScopedArenaAllocator is created in 921 // CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed. 922 std::unique_ptr<CodeGenerationData> code_generation_data_; 923 924 // Which intrinsics we don't have handcrafted code for. 925 art::ArrayRef<const bool> unimplemented_intrinsics_; 926 927 friend class OptimizingCFITest; 928 ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeSIMD); 929 ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeNoSIMD); 930 931 DISALLOW_COPY_AND_ASSIGN(CodeGenerator); 932 }; 933 934 template <typename C, typename F> 935 class CallingConvention { 936 public: CallingConvention(const C * registers,size_t number_of_registers,const F * fpu_registers,size_t number_of_fpu_registers,PointerSize pointer_size)937 CallingConvention(const C* registers, 938 size_t number_of_registers, 939 const F* fpu_registers, 940 size_t number_of_fpu_registers, 941 PointerSize pointer_size) 942 : registers_(registers), 943 number_of_registers_(number_of_registers), 944 fpu_registers_(fpu_registers), 945 number_of_fpu_registers_(number_of_fpu_registers), 946 pointer_size_(pointer_size) {} 947 GetNumberOfRegisters()948 size_t GetNumberOfRegisters() const { return number_of_registers_; } GetNumberOfFpuRegisters()949 size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } 950 GetRegisterAt(size_t index)951 C GetRegisterAt(size_t index) const { 952 DCHECK_LT(index, number_of_registers_); 953 return registers_[index]; 954 } 955 GetFpuRegisterAt(size_t index)956 F GetFpuRegisterAt(size_t index) const { 957 DCHECK_LT(index, number_of_fpu_registers_); 958 return fpu_registers_[index]; 959 } 960 GetStackOffsetOf(size_t index)961 size_t GetStackOffsetOf(size_t index) const { 962 // We still reserve the space for parameters passed by registers. 963 // Add space for the method pointer. 964 return static_cast<size_t>(pointer_size_) + index * kVRegSize; 965 } 966 967 private: 968 const C* registers_; 969 const size_t number_of_registers_; 970 const F* fpu_registers_; 971 const size_t number_of_fpu_registers_; 972 const PointerSize pointer_size_; 973 974 DISALLOW_COPY_AND_ASSIGN(CallingConvention); 975 }; 976 977 /** 978 * A templated class SlowPathGenerator with a templated method NewSlowPath() 979 * that can be used by any code generator to share equivalent slow-paths with 980 * the objective of reducing generated code size. 981 * 982 * InstructionType: instruction that requires SlowPathCodeType 983 * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *) 984 */ 985 template <typename InstructionType> 986 class SlowPathGenerator { 987 static_assert(std::is_base_of<HInstruction, InstructionType>::value, 988 "InstructionType is not a subclass of art::HInstruction"); 989 990 public: SlowPathGenerator(HGraph * graph,CodeGenerator * codegen)991 SlowPathGenerator(HGraph* graph, CodeGenerator* codegen) 992 : graph_(graph), 993 codegen_(codegen), 994 slow_path_map_(std::less<uint32_t>(), 995 graph->GetAllocator()->Adapter(kArenaAllocSlowPaths)) {} 996 997 // Creates and adds a new slow-path, if needed, or returns existing one otherwise. 998 // Templating the method (rather than the whole class) on the slow-path type enables 999 // keeping this code at a generic, non architecture-specific place. 1000 // 1001 // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType. 1002 // To relax this requirement, we would need some RTTI on the stored slow-paths, 1003 // or template the class as a whole on SlowPathType. 1004 template <typename SlowPathCodeType> NewSlowPath(InstructionType * instruction)1005 SlowPathCodeType* NewSlowPath(InstructionType* instruction) { 1006 static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value, 1007 "SlowPathCodeType is not a subclass of art::SlowPathCode"); 1008 static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value, 1009 "SlowPathCodeType is not constructible from InstructionType*"); 1010 // Iterate over potential candidates for sharing. Currently, only same-typed 1011 // slow-paths with exactly the same dex-pc are viable candidates. 1012 // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing? 1013 const uint32_t dex_pc = instruction->GetDexPc(); 1014 auto iter = slow_path_map_.find(dex_pc); 1015 if (iter != slow_path_map_.end()) { 1016 const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second; 1017 for (const auto& it : candidates) { 1018 InstructionType* other_instruction = it.first; 1019 SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second); 1020 // Determine if the instructions allow for slow-path sharing. 1021 if (HaveSameLiveRegisters(instruction, other_instruction) && 1022 HaveSameStackMap(instruction, other_instruction)) { 1023 // Can share: reuse existing one. 1024 return other_slow_path; 1025 } 1026 } 1027 } else { 1028 // First time this dex-pc is seen. 1029 iter = slow_path_map_.Put(dex_pc, 1030 {{}, {graph_->GetAllocator()->Adapter(kArenaAllocSlowPaths)}}); 1031 } 1032 // Cannot share: create and add new slow-path for this particular dex-pc. 1033 SlowPathCodeType* slow_path = 1034 new (codegen_->GetScopedAllocator()) SlowPathCodeType(instruction); 1035 iter->second.emplace_back(std::make_pair(instruction, slow_path)); 1036 codegen_->AddSlowPath(slow_path); 1037 return slow_path; 1038 } 1039 1040 private: 1041 // Tests if both instructions have same set of live physical registers. This ensures 1042 // the slow-path has exactly the same preamble on saving these registers to stack. HaveSameLiveRegisters(const InstructionType * i1,const InstructionType * i2)1043 bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const { 1044 const uint32_t core_spill = ~codegen_->GetCoreSpillMask(); 1045 const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask(); 1046 RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters(); 1047 RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters(); 1048 return (((live1->GetCoreRegisters() & core_spill) == 1049 (live2->GetCoreRegisters() & core_spill)) && 1050 ((live1->GetFloatingPointRegisters() & fpu_spill) == 1051 (live2->GetFloatingPointRegisters() & fpu_spill))); 1052 } 1053 1054 // Tests if both instructions have the same stack map. This ensures the interpreter 1055 // will find exactly the same dex-registers at the same entries. HaveSameStackMap(const InstructionType * i1,const InstructionType * i2)1056 bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const { 1057 DCHECK(i1->HasEnvironment()); 1058 DCHECK(i2->HasEnvironment()); 1059 // We conservatively test if the two instructions find exactly the same instructions 1060 // and location in each dex-register. This guarantees they will have the same stack map. 1061 HEnvironment* e1 = i1->GetEnvironment(); 1062 HEnvironment* e2 = i2->GetEnvironment(); 1063 if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) { 1064 return false; 1065 } 1066 for (size_t i = 0, sz = e1->Size(); i < sz; ++i) { 1067 if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) || 1068 !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) { 1069 return false; 1070 } 1071 } 1072 return true; 1073 } 1074 1075 HGraph* const graph_; 1076 CodeGenerator* const codegen_; 1077 1078 // Map from dex-pc to vector of already existing instruction/slow-path pairs. 1079 ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_; 1080 1081 DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator); 1082 }; 1083 1084 class InstructionCodeGenerator : public HGraphVisitor { 1085 public: InstructionCodeGenerator(HGraph * graph,CodeGenerator * codegen)1086 InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen) 1087 : HGraphVisitor(graph), 1088 deopt_slow_paths_(graph, codegen) {} 1089 1090 protected: 1091 // Add slow-path generator for each instruction/slow-path combination that desires sharing. 1092 // TODO: under current regime, only deopt sharing make sense; extend later. 1093 SlowPathGenerator<HDeoptimize> deopt_slow_paths_; 1094 }; 1095 1096 } // namespace art 1097 1098 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 1099