1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 19 20 #include "arch/instruction_set.h" 21 #include "arch/instruction_set_features.h" 22 #include "base/arena_containers.h" 23 #include "base/arena_object.h" 24 #include "base/bit_field.h" 25 #include "compiled_method.h" 26 #include "driver/compiler_options.h" 27 #include "globals.h" 28 #include "graph_visualizer.h" 29 #include "locations.h" 30 #include "memory_region.h" 31 #include "nodes.h" 32 #include "optimizing_compiler_stats.h" 33 #include "stack_map_stream.h" 34 #include "utils/label.h" 35 36 namespace art { 37 38 // Binary encoding of 2^32 for type double. 39 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); 40 // Binary encoding of 2^31 for type double. 41 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); 42 43 // Minimum value for a primitive integer. 44 static int32_t constexpr kPrimIntMin = 0x80000000; 45 // Minimum value for a primitive long. 46 static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000); 47 48 // Maximum value for a primitive integer. 49 static int32_t constexpr kPrimIntMax = 0x7fffffff; 50 // Maximum value for a primitive long. 51 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); 52 53 class Assembler; 54 class CodeGenerator; 55 class CompilerDriver; 56 class LinkerPatch; 57 class ParallelMoveResolver; 58 59 class CodeAllocator { 60 public: CodeAllocator()61 CodeAllocator() {} ~CodeAllocator()62 virtual ~CodeAllocator() {} 63 64 virtual uint8_t* Allocate(size_t size) = 0; 65 66 private: 67 DISALLOW_COPY_AND_ASSIGN(CodeAllocator); 68 }; 69 70 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> { 71 public: SlowPathCode(HInstruction * instruction)72 explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) { 73 for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) { 74 saved_core_stack_offsets_[i] = kRegisterNotSaved; 75 saved_fpu_stack_offsets_[i] = kRegisterNotSaved; 76 } 77 } 78 ~SlowPathCode()79 virtual ~SlowPathCode() {} 80 81 virtual void EmitNativeCode(CodeGenerator* codegen) = 0; 82 83 virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 84 virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 85 IsCoreRegisterSaved(int reg)86 bool IsCoreRegisterSaved(int reg) const { 87 return saved_core_stack_offsets_[reg] != kRegisterNotSaved; 88 } 89 IsFpuRegisterSaved(int reg)90 bool IsFpuRegisterSaved(int reg) const { 91 return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved; 92 } 93 GetStackOffsetOfCoreRegister(int reg)94 uint32_t GetStackOffsetOfCoreRegister(int reg) const { 95 return saved_core_stack_offsets_[reg]; 96 } 97 GetStackOffsetOfFpuRegister(int reg)98 uint32_t GetStackOffsetOfFpuRegister(int reg) const { 99 return saved_fpu_stack_offsets_[reg]; 100 } 101 IsFatal()102 virtual bool IsFatal() const { return false; } 103 104 virtual const char* GetDescription() const = 0; 105 GetEntryLabel()106 Label* GetEntryLabel() { return &entry_label_; } GetExitLabel()107 Label* GetExitLabel() { return &exit_label_; } 108 GetInstruction()109 HInstruction* GetInstruction() const { 110 return instruction_; 111 } 112 GetDexPc()113 uint32_t GetDexPc() const { 114 return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc; 115 } 116 117 protected: 118 static constexpr size_t kMaximumNumberOfExpectedRegisters = 32; 119 static constexpr uint32_t kRegisterNotSaved = -1; 120 // The instruction where this slow path is happening. 121 HInstruction* instruction_; 122 uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 123 uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 124 125 private: 126 Label entry_label_; 127 Label exit_label_; 128 129 DISALLOW_COPY_AND_ASSIGN(SlowPathCode); 130 }; 131 132 class InvokeDexCallingConventionVisitor { 133 public: 134 virtual Location GetNextLocation(Primitive::Type type) = 0; 135 virtual Location GetReturnLocation(Primitive::Type type) const = 0; 136 virtual Location GetMethodLocation() const = 0; 137 138 protected: InvokeDexCallingConventionVisitor()139 InvokeDexCallingConventionVisitor() {} ~InvokeDexCallingConventionVisitor()140 virtual ~InvokeDexCallingConventionVisitor() {} 141 142 // The current index for core registers. 143 uint32_t gp_index_ = 0u; 144 // The current index for floating-point registers. 145 uint32_t float_index_ = 0u; 146 // The current stack index. 147 uint32_t stack_index_ = 0u; 148 149 private: 150 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); 151 }; 152 153 class FieldAccessCallingConvention { 154 public: 155 virtual Location GetObjectLocation() const = 0; 156 virtual Location GetFieldIndexLocation() const = 0; 157 virtual Location GetReturnLocation(Primitive::Type type) const = 0; 158 virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0; 159 virtual Location GetFpuLocation(Primitive::Type type) const = 0; ~FieldAccessCallingConvention()160 virtual ~FieldAccessCallingConvention() {} 161 162 protected: FieldAccessCallingConvention()163 FieldAccessCallingConvention() {} 164 165 private: 166 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention); 167 }; 168 169 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { 170 public: 171 // Compiles the graph to executable instructions. 172 void Compile(CodeAllocator* allocator); 173 static std::unique_ptr<CodeGenerator> Create(HGraph* graph, 174 InstructionSet instruction_set, 175 const InstructionSetFeatures& isa_features, 176 const CompilerOptions& compiler_options, 177 OptimizingCompilerStats* stats = nullptr); ~CodeGenerator()178 virtual ~CodeGenerator() {} 179 180 // Get the graph. This is the outermost graph, never the graph of a method being inlined. GetGraph()181 HGraph* GetGraph() const { return graph_; } 182 183 HBasicBlock* GetNextBlockToEmit() const; 184 HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; 185 bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; 186 GetStackSlotOfParameter(HParameterValue * parameter)187 size_t GetStackSlotOfParameter(HParameterValue* parameter) const { 188 // Note that this follows the current calling convention. 189 return GetFrameSize() 190 + InstructionSetPointerSize(GetInstructionSet()) // Art method 191 + parameter->GetIndex() * kVRegSize; 192 } 193 194 virtual void Initialize() = 0; 195 virtual void Finalize(CodeAllocator* allocator); 196 virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches); 197 virtual void GenerateFrameEntry() = 0; 198 virtual void GenerateFrameExit() = 0; 199 virtual void Bind(HBasicBlock* block) = 0; 200 virtual void MoveConstant(Location destination, int32_t value) = 0; 201 virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0; 202 virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0; 203 204 virtual Assembler* GetAssembler() = 0; 205 virtual const Assembler& GetAssembler() const = 0; 206 virtual size_t GetWordSize() const = 0; 207 virtual size_t GetFloatingPointSpillSlotSize() const = 0; 208 virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0; 209 void InitializeCodeGeneration(size_t number_of_spill_slots, 210 size_t maximum_number_of_live_core_registers, 211 size_t maximum_number_of_live_fpu_registers, 212 size_t number_of_out_slots, 213 const ArenaVector<HBasicBlock*>& block_order); 214 GetFrameSize()215 uint32_t GetFrameSize() const { return frame_size_; } SetFrameSize(uint32_t size)216 void SetFrameSize(uint32_t size) { frame_size_ = size; } GetCoreSpillMask()217 uint32_t GetCoreSpillMask() const { return core_spill_mask_; } GetFpuSpillMask()218 uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } 219 GetNumberOfCoreRegisters()220 size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } GetNumberOfFloatingPointRegisters()221 size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } 222 virtual void SetupBlockedRegisters() const = 0; 223 ComputeSpillMask()224 virtual void ComputeSpillMask() { 225 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; 226 DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; 227 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; 228 } 229 ComputeRegisterMask(const int * registers,size_t length)230 static uint32_t ComputeRegisterMask(const int* registers, size_t length) { 231 uint32_t mask = 0; 232 for (size_t i = 0, e = length; i < e; ++i) { 233 mask |= (1 << registers[i]); 234 } 235 return mask; 236 } 237 238 virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; 239 virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; 240 virtual InstructionSet GetInstructionSet() const = 0; 241 GetCompilerOptions()242 const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } 243 244 void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const; 245 246 // Saves the register in the stack. Returns the size taken on stack. 247 virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 248 // Restores the register from the stack. Returns the size taken on stack. 249 virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 250 251 virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 252 virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 253 254 virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; 255 // Returns whether we should split long moves in parallel moves. ShouldSplitLongMoves()256 virtual bool ShouldSplitLongMoves() const { return false; } 257 GetNumberOfCoreCalleeSaveRegisters()258 size_t GetNumberOfCoreCalleeSaveRegisters() const { 259 return POPCOUNT(core_callee_save_mask_); 260 } 261 GetNumberOfCoreCallerSaveRegisters()262 size_t GetNumberOfCoreCallerSaveRegisters() const { 263 DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters()); 264 return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters(); 265 } 266 IsCoreCalleeSaveRegister(int reg)267 bool IsCoreCalleeSaveRegister(int reg) const { 268 return (core_callee_save_mask_ & (1 << reg)) != 0; 269 } 270 IsFloatingPointCalleeSaveRegister(int reg)271 bool IsFloatingPointCalleeSaveRegister(int reg) const { 272 return (fpu_callee_save_mask_ & (1 << reg)) != 0; 273 } 274 275 // Record native to dex mapping for a suspend point. Required by runtime. 276 void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); 277 // Check whether we have already recorded mapping at this PC. 278 bool HasStackMapAtCurrentPc(); 279 // Record extra stack maps if we support native debugging. 280 void MaybeRecordNativeDebugInfo(HInstruction* instruction, 281 uint32_t dex_pc, 282 SlowPathCode* slow_path = nullptr); 283 284 bool CanMoveNullCheckToUser(HNullCheck* null_check); 285 void MaybeRecordImplicitNullCheck(HInstruction* instruction); 286 void GenerateNullCheck(HNullCheck* null_check); 287 virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0; 288 virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0; 289 290 // Records a stack map which the runtime might use to set catch phi values 291 // during exception delivery. 292 // TODO: Replace with a catch-entering instruction that records the environment. 293 void RecordCatchBlockInfo(); 294 295 // Returns true if implicit null checks are allowed in the compiler options 296 // and if the null check is not inside a try block. We currently cannot do 297 // implicit null checks in that case because we need the NullCheckSlowPath to 298 // save live registers, which may be needed by the runtime to set catch phis. 299 bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const; 300 301 // TODO: Avoid creating the `std::unique_ptr` here. AddSlowPath(SlowPathCode * slow_path)302 void AddSlowPath(SlowPathCode* slow_path) { 303 slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path)); 304 } 305 306 void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item); 307 size_t ComputeStackMapsSize(); 308 IsLeafMethod()309 bool IsLeafMethod() const { 310 return is_leaf_; 311 } 312 MarkNotLeaf()313 void MarkNotLeaf() { 314 is_leaf_ = false; 315 requires_current_method_ = true; 316 } 317 SetRequiresCurrentMethod()318 void SetRequiresCurrentMethod() { 319 requires_current_method_ = true; 320 } 321 RequiresCurrentMethod()322 bool RequiresCurrentMethod() const { 323 return requires_current_method_; 324 } 325 326 // Clears the spill slots taken by loop phis in the `LocationSummary` of the 327 // suspend check. This is called when the code generator generates code 328 // for the suspend check at the back edge (instead of where the suspend check 329 // is, which is the loop entry). At this point, the spill slots for the phis 330 // have not been written to. 331 void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; 332 GetBlockedCoreRegisters()333 bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } GetBlockedFloatingPointRegisters()334 bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } 335 336 // Helper that returns the pointer offset of an index in an object array. 337 // Note: this method assumes we always have the same pointer size, regardless 338 // of the architecture. 339 static size_t GetCacheOffset(uint32_t index); 340 // Pointer variant for ArtMethod and ArtField arrays. 341 size_t GetCachePointerOffset(uint32_t index); 342 343 void EmitParallelMoves(Location from1, 344 Location to1, 345 Primitive::Type type1, 346 Location from2, 347 Location to2, 348 Primitive::Type type2); 349 StoreNeedsWriteBarrier(Primitive::Type type,HInstruction * value)350 static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) { 351 // Check that null value is not represented as an integer constant. 352 DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant()); 353 return type == Primitive::kPrimNot && !value->IsNullConstant(); 354 } 355 356 void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path); 357 AddAllocatedRegister(Location location)358 void AddAllocatedRegister(Location location) { 359 allocated_registers_.Add(location); 360 } 361 HasAllocatedRegister(bool is_core,int reg)362 bool HasAllocatedRegister(bool is_core, int reg) const { 363 return is_core 364 ? allocated_registers_.ContainsCoreRegister(reg) 365 : allocated_registers_.ContainsFloatingPointRegister(reg); 366 } 367 368 void AllocateLocations(HInstruction* instruction); 369 370 // Tells whether the stack frame of the compiled method is 371 // considered "empty", that is either actually having a size of zero, 372 // or just containing the saved return address register. HasEmptyFrame()373 bool HasEmptyFrame() const { 374 return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); 375 } 376 GetInt32ValueOf(HConstant * constant)377 static int32_t GetInt32ValueOf(HConstant* constant) { 378 if (constant->IsIntConstant()) { 379 return constant->AsIntConstant()->GetValue(); 380 } else if (constant->IsNullConstant()) { 381 return 0; 382 } else { 383 DCHECK(constant->IsFloatConstant()); 384 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 385 } 386 } 387 GetInt64ValueOf(HConstant * constant)388 static int64_t GetInt64ValueOf(HConstant* constant) { 389 if (constant->IsIntConstant()) { 390 return constant->AsIntConstant()->GetValue(); 391 } else if (constant->IsNullConstant()) { 392 return 0; 393 } else if (constant->IsFloatConstant()) { 394 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 395 } else if (constant->IsLongConstant()) { 396 return constant->AsLongConstant()->GetValue(); 397 } else { 398 DCHECK(constant->IsDoubleConstant()); 399 return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); 400 } 401 } 402 GetFirstRegisterSlotInSlowPath()403 size_t GetFirstRegisterSlotInSlowPath() const { 404 return first_register_slot_in_slow_path_; 405 } 406 FrameEntrySpillSize()407 uint32_t FrameEntrySpillSize() const { 408 return GetFpuSpillSize() + GetCoreSpillSize(); 409 } 410 411 virtual ParallelMoveResolver* GetMoveResolver() = 0; 412 413 static void CreateCommonInvokeLocationSummary( 414 HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor); 415 416 void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); 417 418 void CreateUnresolvedFieldLocationSummary( 419 HInstruction* field_access, 420 Primitive::Type field_type, 421 const FieldAccessCallingConvention& calling_convention); 422 423 void GenerateUnresolvedFieldAccess( 424 HInstruction* field_access, 425 Primitive::Type field_type, 426 uint32_t field_index, 427 uint32_t dex_pc, 428 const FieldAccessCallingConvention& calling_convention); 429 430 // TODO: This overlaps a bit with MoveFromReturnRegister. Refactor for a better design. 431 static void CreateLoadClassLocationSummary(HLoadClass* cls, 432 Location runtime_type_index_location, 433 Location runtime_return_location, 434 bool code_generator_supports_read_barrier = false); 435 436 static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke); 437 SetDisassemblyInformation(DisassemblyInformation * info)438 void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } GetDisassemblyInformation()439 DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; } 440 441 virtual void InvokeRuntime(QuickEntrypointEnum entrypoint, 442 HInstruction* instruction, 443 uint32_t dex_pc, 444 SlowPathCode* slow_path) = 0; 445 446 // Check if the desired_string_load_kind is supported. If it is, return it, 447 // otherwise return a fall-back info that should be used instead. 448 virtual HLoadString::LoadKind GetSupportedLoadStringKind( 449 HLoadString::LoadKind desired_string_load_kind) = 0; 450 451 // Check if the desired_dispatch_info is supported. If it is, return it, 452 // otherwise return a fall-back info that should be used instead. 453 virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 454 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 455 MethodReference target_method) = 0; 456 457 // Generate a call to a static or direct method. 458 virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0; 459 // Generate a call to a virtual method. 460 virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0; 461 462 // Copy the result of a call into the given target. 463 virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0; 464 465 virtual void GenerateNop() = 0; 466 467 protected: 468 // Method patch info used for recording locations of required linker patches and 469 // target methods. The target method can be used for various purposes, whether for 470 // patching the address of the method or the code pointer or a PC-relative call. 471 template <typename LabelType> 472 struct MethodPatchInfo { MethodPatchInfoMethodPatchInfo473 explicit MethodPatchInfo(MethodReference m) : target_method(m), label() { } 474 475 MethodReference target_method; 476 LabelType label; 477 }; 478 479 // String patch info used for recording locations of required linker patches and 480 // target strings. The actual string address can be absolute or PC-relative. 481 template <typename LabelType> 482 struct StringPatchInfo { StringPatchInfoStringPatchInfo483 StringPatchInfo(const DexFile& df, uint32_t index) 484 : dex_file(df), string_index(index), label() { } 485 486 const DexFile& dex_file; 487 uint32_t string_index; 488 LabelType label; 489 }; 490 CodeGenerator(HGraph * graph,size_t number_of_core_registers,size_t number_of_fpu_registers,size_t number_of_register_pairs,uint32_t core_callee_save_mask,uint32_t fpu_callee_save_mask,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)491 CodeGenerator(HGraph* graph, 492 size_t number_of_core_registers, 493 size_t number_of_fpu_registers, 494 size_t number_of_register_pairs, 495 uint32_t core_callee_save_mask, 496 uint32_t fpu_callee_save_mask, 497 const CompilerOptions& compiler_options, 498 OptimizingCompilerStats* stats) 499 : frame_size_(0), 500 core_spill_mask_(0), 501 fpu_spill_mask_(0), 502 first_register_slot_in_slow_path_(0), 503 blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers, 504 kArenaAllocCodeGenerator)), 505 blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers, 506 kArenaAllocCodeGenerator)), 507 blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs, 508 kArenaAllocCodeGenerator)), 509 number_of_core_registers_(number_of_core_registers), 510 number_of_fpu_registers_(number_of_fpu_registers), 511 number_of_register_pairs_(number_of_register_pairs), 512 core_callee_save_mask_(core_callee_save_mask), 513 fpu_callee_save_mask_(fpu_callee_save_mask), 514 stack_map_stream_(graph->GetArena()), 515 block_order_(nullptr), 516 disasm_info_(nullptr), 517 stats_(stats), 518 graph_(graph), 519 compiler_options_(compiler_options), 520 slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 521 current_slow_path_(nullptr), 522 current_block_index_(0), 523 is_leaf_(true), 524 requires_current_method_(false) { 525 slow_paths_.reserve(8); 526 } 527 528 virtual HGraphVisitor* GetLocationBuilder() = 0; 529 virtual HGraphVisitor* GetInstructionVisitor() = 0; 530 531 // Returns the location of the first spilled entry for floating point registers, 532 // relative to the stack pointer. GetFpuSpillStart()533 uint32_t GetFpuSpillStart() const { 534 return GetFrameSize() - FrameEntrySpillSize(); 535 } 536 GetFpuSpillSize()537 uint32_t GetFpuSpillSize() const { 538 return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize(); 539 } 540 GetCoreSpillSize()541 uint32_t GetCoreSpillSize() const { 542 return POPCOUNT(core_spill_mask_) * GetWordSize(); 543 } 544 HasAllocatedCalleeSaveRegisters()545 bool HasAllocatedCalleeSaveRegisters() const { 546 // We check the core registers against 1 because it always comprises the return PC. 547 return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) 548 || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); 549 } 550 CallPushesPC()551 bool CallPushesPC() const { 552 InstructionSet instruction_set = GetInstructionSet(); 553 return instruction_set == kX86 || instruction_set == kX86_64; 554 } 555 556 // Arm64 has its own type for a label, so we need to templatize these methods 557 // to share the logic. 558 559 template <typename LabelType> CommonInitializeLabels()560 LabelType* CommonInitializeLabels() { 561 // We use raw array allocations instead of ArenaVector<> because Labels are 562 // non-constructible and non-movable and as such cannot be held in a vector. 563 size_t size = GetGraph()->GetBlocks().size(); 564 LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size, 565 kArenaAllocCodeGenerator); 566 for (size_t i = 0; i != size; ++i) { 567 new(labels + i) LabelType(); 568 } 569 return labels; 570 } 571 572 template <typename LabelType> CommonGetLabelOf(LabelType * raw_pointer_to_labels_array,HBasicBlock * block)573 LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const { 574 block = FirstNonEmptyBlock(block); 575 return raw_pointer_to_labels_array + block->GetBlockId(); 576 } 577 GetCurrentSlowPath()578 SlowPathCode* GetCurrentSlowPath() { 579 return current_slow_path_; 580 } 581 582 // Frame size required for this method. 583 uint32_t frame_size_; 584 uint32_t core_spill_mask_; 585 uint32_t fpu_spill_mask_; 586 uint32_t first_register_slot_in_slow_path_; 587 588 // Registers that were allocated during linear scan. 589 RegisterSet allocated_registers_; 590 591 // Arrays used when doing register allocation to know which 592 // registers we can allocate. `SetupBlockedRegisters` updates the 593 // arrays. 594 bool* const blocked_core_registers_; 595 bool* const blocked_fpu_registers_; 596 bool* const blocked_register_pairs_; 597 size_t number_of_core_registers_; 598 size_t number_of_fpu_registers_; 599 size_t number_of_register_pairs_; 600 const uint32_t core_callee_save_mask_; 601 const uint32_t fpu_callee_save_mask_; 602 603 StackMapStream stack_map_stream_; 604 605 // The order to use for code generation. 606 const ArenaVector<HBasicBlock*>* block_order_; 607 608 DisassemblyInformation* disasm_info_; 609 610 private: 611 size_t GetStackOffsetOfSavedRegister(size_t index); 612 void GenerateSlowPaths(); 613 void BlockIfInRegister(Location location, bool is_out = false) const; 614 void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path); 615 616 OptimizingCompilerStats* stats_; 617 618 HGraph* const graph_; 619 const CompilerOptions& compiler_options_; 620 621 ArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_; 622 623 // The current slow-path that we're generating code for. 624 SlowPathCode* current_slow_path_; 625 626 // The current block index in `block_order_` of the block 627 // we are generating code for. 628 size_t current_block_index_; 629 630 // Whether the method is a leaf method. 631 bool is_leaf_; 632 633 // Whether an instruction in the graph accesses the current method. 634 bool requires_current_method_; 635 636 friend class OptimizingCFITest; 637 638 DISALLOW_COPY_AND_ASSIGN(CodeGenerator); 639 }; 640 641 template <typename C, typename F> 642 class CallingConvention { 643 public: CallingConvention(const C * registers,size_t number_of_registers,const F * fpu_registers,size_t number_of_fpu_registers,size_t pointer_size)644 CallingConvention(const C* registers, 645 size_t number_of_registers, 646 const F* fpu_registers, 647 size_t number_of_fpu_registers, 648 size_t pointer_size) 649 : registers_(registers), 650 number_of_registers_(number_of_registers), 651 fpu_registers_(fpu_registers), 652 number_of_fpu_registers_(number_of_fpu_registers), 653 pointer_size_(pointer_size) {} 654 GetNumberOfRegisters()655 size_t GetNumberOfRegisters() const { return number_of_registers_; } GetNumberOfFpuRegisters()656 size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } 657 GetRegisterAt(size_t index)658 C GetRegisterAt(size_t index) const { 659 DCHECK_LT(index, number_of_registers_); 660 return registers_[index]; 661 } 662 GetFpuRegisterAt(size_t index)663 F GetFpuRegisterAt(size_t index) const { 664 DCHECK_LT(index, number_of_fpu_registers_); 665 return fpu_registers_[index]; 666 } 667 GetStackOffsetOf(size_t index)668 size_t GetStackOffsetOf(size_t index) const { 669 // We still reserve the space for parameters passed by registers. 670 // Add space for the method pointer. 671 return pointer_size_ + index * kVRegSize; 672 } 673 674 private: 675 const C* registers_; 676 const size_t number_of_registers_; 677 const F* fpu_registers_; 678 const size_t number_of_fpu_registers_; 679 const size_t pointer_size_; 680 681 DISALLOW_COPY_AND_ASSIGN(CallingConvention); 682 }; 683 684 /** 685 * A templated class SlowPathGenerator with a templated method NewSlowPath() 686 * that can be used by any code generator to share equivalent slow-paths with 687 * the objective of reducing generated code size. 688 * 689 * InstructionType: instruction that requires SlowPathCodeType 690 * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *) 691 */ 692 template <typename InstructionType> 693 class SlowPathGenerator { 694 static_assert(std::is_base_of<HInstruction, InstructionType>::value, 695 "InstructionType is not a subclass of art::HInstruction"); 696 697 public: SlowPathGenerator(HGraph * graph,CodeGenerator * codegen)698 SlowPathGenerator(HGraph* graph, CodeGenerator* codegen) 699 : graph_(graph), 700 codegen_(codegen), 701 slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {} 702 703 // Creates and adds a new slow-path, if needed, or returns existing one otherwise. 704 // Templating the method (rather than the whole class) on the slow-path type enables 705 // keeping this code at a generic, non architecture-specific place. 706 // 707 // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType. 708 // To relax this requirement, we would need some RTTI on the stored slow-paths, 709 // or template the class as a whole on SlowPathType. 710 template <typename SlowPathCodeType> NewSlowPath(InstructionType * instruction)711 SlowPathCodeType* NewSlowPath(InstructionType* instruction) { 712 static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value, 713 "SlowPathCodeType is not a subclass of art::SlowPathCode"); 714 static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value, 715 "SlowPathCodeType is not constructible from InstructionType*"); 716 // Iterate over potential candidates for sharing. Currently, only same-typed 717 // slow-paths with exactly the same dex-pc are viable candidates. 718 // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing? 719 const uint32_t dex_pc = instruction->GetDexPc(); 720 auto iter = slow_path_map_.find(dex_pc); 721 if (iter != slow_path_map_.end()) { 722 auto candidates = iter->second; 723 for (const auto& it : candidates) { 724 InstructionType* other_instruction = it.first; 725 SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second); 726 // Determine if the instructions allow for slow-path sharing. 727 if (HaveSameLiveRegisters(instruction, other_instruction) && 728 HaveSameStackMap(instruction, other_instruction)) { 729 // Can share: reuse existing one. 730 return other_slow_path; 731 } 732 } 733 } else { 734 // First time this dex-pc is seen. 735 iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}}); 736 } 737 // Cannot share: create and add new slow-path for this particular dex-pc. 738 SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction); 739 iter->second.emplace_back(std::make_pair(instruction, slow_path)); 740 codegen_->AddSlowPath(slow_path); 741 return slow_path; 742 } 743 744 private: 745 // Tests if both instructions have same set of live physical registers. This ensures 746 // the slow-path has exactly the same preamble on saving these registers to stack. HaveSameLiveRegisters(const InstructionType * i1,const InstructionType * i2)747 bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const { 748 const uint32_t core_spill = ~codegen_->GetCoreSpillMask(); 749 const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask(); 750 RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters(); 751 RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters(); 752 return (((live1->GetCoreRegisters() & core_spill) == 753 (live2->GetCoreRegisters() & core_spill)) && 754 ((live1->GetFloatingPointRegisters() & fpu_spill) == 755 (live2->GetFloatingPointRegisters() & fpu_spill))); 756 } 757 758 // Tests if both instructions have the same stack map. This ensures the interpreter 759 // will find exactly the same dex-registers at the same entries. HaveSameStackMap(const InstructionType * i1,const InstructionType * i2)760 bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const { 761 DCHECK(i1->HasEnvironment()); 762 DCHECK(i2->HasEnvironment()); 763 // We conservatively test if the two instructions find exactly the same instructions 764 // and location in each dex-register. This guarantees they will have the same stack map. 765 HEnvironment* e1 = i1->GetEnvironment(); 766 HEnvironment* e2 = i2->GetEnvironment(); 767 if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) { 768 return false; 769 } 770 for (size_t i = 0, sz = e1->Size(); i < sz; ++i) { 771 if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) || 772 !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) { 773 return false; 774 } 775 } 776 return true; 777 } 778 779 HGraph* const graph_; 780 CodeGenerator* const codegen_; 781 782 // Map from dex-pc to vector of already existing instruction/slow-path pairs. 783 ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_; 784 785 DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator); 786 }; 787 788 class InstructionCodeGenerator : public HGraphVisitor { 789 public: InstructionCodeGenerator(HGraph * graph,CodeGenerator * codegen)790 InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen) 791 : HGraphVisitor(graph), 792 deopt_slow_paths_(graph, codegen) {} 793 794 protected: 795 // Add slow-path generator for each instruction/slow-path combination that desires sharing. 796 // TODO: under current regime, only deopt sharing make sense; extend later. 797 SlowPathGenerator<HDeoptimize> deopt_slow_paths_; 798 }; 799 800 } // namespace art 801 802 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 803