1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_INLINER_H_ 18 #define ART_COMPILER_OPTIMIZING_INLINER_H_ 19 20 #include "dex/dex_file_types.h" 21 #include "dex/invoke_type.h" 22 #include "jit/profiling_info.h" 23 #include "optimization.h" 24 #include "profile/profile_compilation_info.h" 25 26 namespace art { 27 28 class CodeGenerator; 29 class DexCompilationUnit; 30 class HGraph; 31 class HInvoke; 32 class OptimizingCompilerStats; 33 34 class HInliner : public HOptimization { 35 public: 36 HInliner(HGraph* outer_graph, 37 HGraph* outermost_graph, 38 CodeGenerator* codegen, 39 const DexCompilationUnit& outer_compilation_unit, 40 const DexCompilationUnit& caller_compilation_unit, 41 OptimizingCompilerStats* stats, 42 size_t total_number_of_dex_registers, 43 size_t total_number_of_instructions, 44 HInliner* parent, 45 size_t depth = 0, 46 const char* name = kInlinerPassName) HOptimization(outer_graph,name,stats)47 : HOptimization(outer_graph, name, stats), 48 outermost_graph_(outermost_graph), 49 outer_compilation_unit_(outer_compilation_unit), 50 caller_compilation_unit_(caller_compilation_unit), 51 codegen_(codegen), 52 total_number_of_dex_registers_(total_number_of_dex_registers), 53 total_number_of_instructions_(total_number_of_instructions), 54 parent_(parent), 55 depth_(depth), 56 inlining_budget_(0), 57 inline_stats_(nullptr) {} 58 59 bool Run() override; 60 61 static constexpr const char* kInlinerPassName = "inliner"; 62 63 private: 64 enum InlineCacheType { 65 kInlineCacheNoData = 0, 66 kInlineCacheUninitialized = 1, 67 kInlineCacheMonomorphic = 2, 68 kInlineCachePolymorphic = 3, 69 kInlineCacheMegamorphic = 4, 70 kInlineCacheMissingTypes = 5 71 }; 72 73 bool TryInline(HInvoke* invoke_instruction); 74 75 // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether 76 // reference type propagation can run after the inlining. If the inlining is successful, this 77 // method will replace and remove the `invoke_instruction`. 78 bool TryInlineAndReplace(HInvoke* invoke_instruction, 79 ArtMethod* resolved_method, 80 ReferenceTypeInfo receiver_type, 81 bool do_rtp) 82 REQUIRES_SHARED(Locks::mutator_lock_); 83 84 bool TryBuildAndInline(HInvoke* invoke_instruction, 85 ArtMethod* resolved_method, 86 ReferenceTypeInfo receiver_type, 87 HInstruction** return_replacement) 88 REQUIRES_SHARED(Locks::mutator_lock_); 89 90 bool TryBuildAndInlineHelper(HInvoke* invoke_instruction, 91 ArtMethod* resolved_method, 92 ReferenceTypeInfo receiver_type, 93 HInstruction** return_replacement) 94 REQUIRES_SHARED(Locks::mutator_lock_); 95 96 // Substitutes parameters in the callee graph with their values from the caller. 97 void SubstituteArguments(HGraph* callee_graph, 98 HInvoke* invoke_instruction, 99 ReferenceTypeInfo receiver_type, 100 const DexCompilationUnit& dex_compilation_unit) 101 REQUIRES_SHARED(Locks::mutator_lock_); 102 103 // Run simple optimizations on `callee_graph`. 104 void RunOptimizations(HGraph* callee_graph, 105 const dex::CodeItem* code_item, 106 const DexCompilationUnit& dex_compilation_unit) 107 REQUIRES_SHARED(Locks::mutator_lock_); 108 109 // Try to recognize known simple patterns and replace invoke call with appropriate instructions. 110 bool TryPatternSubstitution(HInvoke* invoke_instruction, 111 ArtMethod* method, 112 HInstruction** return_replacement) 113 REQUIRES_SHARED(Locks::mutator_lock_); 114 115 // Returns whether inlining is allowed based on ART semantics. 116 bool IsInliningAllowed(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const 117 REQUIRES_SHARED(Locks::mutator_lock_); 118 119 120 // Returns whether ART supports inlining this method. 121 // 122 // Some methods are not supported because they have features for which inlining 123 // is not implemented. For example, we do not currently support inlining throw 124 // instructions into a try block. 125 bool IsInliningSupported(const HInvoke* invoke_instruction, 126 art::ArtMethod* method, 127 const CodeItemDataAccessor& accessor) const 128 REQUIRES_SHARED(Locks::mutator_lock_); 129 130 // Returns whether the inlining budget allows inlining method. 131 // 132 // For example, this checks whether the function has grown too large and 133 // inlining should be prevented. 134 bool IsInliningBudgetAvailable(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const 135 REQUIRES_SHARED(Locks::mutator_lock_); 136 137 // Inspects the body of a method (callee_graph) and returns whether it can be 138 // inlined. 139 // 140 // This checks for instructions and constructs that we do not support 141 // inlining, such as inlining a throw instruction into a try block. 142 bool CanInlineBody(const HGraph* callee_graph, 143 const HBasicBlock* target_block, 144 size_t* out_number_of_instructions) const 145 REQUIRES_SHARED(Locks::mutator_lock_); 146 147 // Create a new HInstanceFieldGet. 148 HInstanceFieldGet* CreateInstanceFieldGet(uint32_t field_index, 149 ArtMethod* referrer, 150 HInstruction* obj); 151 // Create a new HInstanceFieldSet. 152 HInstanceFieldSet* CreateInstanceFieldSet(uint32_t field_index, 153 ArtMethod* referrer, 154 HInstruction* obj, 155 HInstruction* value, 156 bool* is_final = nullptr); 157 158 // Try inlining the invoke instruction using inline caches. 159 bool TryInlineFromInlineCache(HInvoke* invoke_instruction) 160 REQUIRES_SHARED(Locks::mutator_lock_); 161 162 // Try inlining the invoke instruction using CHA. 163 bool TryInlineFromCHA(HInvoke* invoke_instruction) 164 REQUIRES_SHARED(Locks::mutator_lock_); 165 166 // When we fail inlining `invoke_instruction`, we will try to devirtualize the 167 // call. 168 bool TryDevirtualize(HInvoke* invoke_instruction, 169 ArtMethod* method, 170 HInvoke** replacement) 171 REQUIRES_SHARED(Locks::mutator_lock_); 172 173 // Try getting the inline cache from JIT code cache. 174 // Return true if the inline cache was successfully allocated and the 175 // invoke info was found in the profile info. 176 InlineCacheType GetInlineCacheJIT( 177 HInvoke* invoke_instruction, 178 /*out*/StackHandleScope<InlineCache::kIndividualCacheSize>* classes) 179 REQUIRES_SHARED(Locks::mutator_lock_); 180 181 // Try getting the inline cache from AOT offline profile. 182 // Return true if the inline cache was successfully allocated and the 183 // invoke info was found in the profile info. 184 InlineCacheType GetInlineCacheAOT( 185 HInvoke* invoke_instruction, 186 /*out*/StackHandleScope<InlineCache::kIndividualCacheSize>* classes) 187 REQUIRES_SHARED(Locks::mutator_lock_); 188 189 // Compute the inline cache type. 190 static InlineCacheType GetInlineCacheType( 191 const StackHandleScope<InlineCache::kIndividualCacheSize>& classes) 192 REQUIRES_SHARED(Locks::mutator_lock_); 193 194 // Try to inline the target of a monomorphic call. If successful, the code 195 // in the graph will look like: 196 // if (receiver.getClass() != ic.GetMonomorphicType()) deopt 197 // ... // inlined code 198 bool TryInlineMonomorphicCall(HInvoke* invoke_instruction, 199 const StackHandleScope<InlineCache::kIndividualCacheSize>& classes) 200 REQUIRES_SHARED(Locks::mutator_lock_); 201 202 // Try to inline targets of a polymorphic call. 203 bool TryInlinePolymorphicCall(HInvoke* invoke_instruction, 204 const StackHandleScope<InlineCache::kIndividualCacheSize>& classes) 205 REQUIRES_SHARED(Locks::mutator_lock_); 206 207 bool TryInlinePolymorphicCallToSameTarget( 208 HInvoke* invoke_instruction, 209 const StackHandleScope<InlineCache::kIndividualCacheSize>& classes) 210 REQUIRES_SHARED(Locks::mutator_lock_); 211 212 // Returns whether or not we should use only polymorphic inlining with no deoptimizations. 213 bool UseOnlyPolymorphicInliningWithNoDeopt(); 214 215 // Try CHA-based devirtualization to change virtual method calls into 216 // direct calls. 217 // Returns the actual method that resolved_method can be devirtualized to. 218 ArtMethod* FindMethodFromCHA(ArtMethod* resolved_method) 219 REQUIRES_SHARED(Locks::mutator_lock_); 220 221 // Add a CHA guard for a CHA-based devirtualized call. A CHA guard checks a 222 // should_deoptimize flag and if it's true, does deoptimization. 223 void AddCHAGuard(HInstruction* invoke_instruction, 224 uint32_t dex_pc, 225 HInstruction* cursor, 226 HBasicBlock* bb_cursor); 227 228 HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker, 229 HInstruction* receiver, 230 uint32_t dex_pc) const 231 REQUIRES_SHARED(Locks::mutator_lock_); 232 233 void MaybeRunReferenceTypePropagation(HInstruction* replacement, 234 HInvoke* invoke_instruction) 235 REQUIRES_SHARED(Locks::mutator_lock_); 236 237 void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement) 238 REQUIRES_SHARED(Locks::mutator_lock_); 239 240 bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method) 241 REQUIRES_SHARED(Locks::mutator_lock_); 242 243 bool ReturnTypeMoreSpecific(HInstruction* return_replacement, HInvoke* invoke_instruction) 244 REQUIRES_SHARED(Locks::mutator_lock_); 245 246 // Add a type guard on the given `receiver`. This will add to the graph: 247 // i0 = HFieldGet(receiver, klass) 248 // i1 = HLoadClass(class_index, is_referrer) 249 // i2 = HNotEqual(i0, i1) 250 // 251 // And if `with_deoptimization` is true: 252 // HDeoptimize(i2) 253 // 254 // The method returns the `HNotEqual`, that will be used for polymorphic inlining. 255 HInstruction* AddTypeGuard(HInstruction* receiver, 256 HInstruction* cursor, 257 HBasicBlock* bb_cursor, 258 dex::TypeIndex class_index, 259 Handle<mirror::Class> klass, 260 HInstruction* invoke_instruction, 261 bool with_deoptimization) 262 REQUIRES_SHARED(Locks::mutator_lock_); 263 264 /* 265 * Ad-hoc implementation for implementing a diamond pattern in the graph for 266 * polymorphic inlining: 267 * 1) `compare` becomes the input of the new `HIf`. 268 * 2) Everything up until `invoke_instruction` is in the then branch (could 269 * contain multiple blocks). 270 * 3) `invoke_instruction` is moved to the otherwise block. 271 * 4) If `return_replacement` is not null, the merge block will have 272 * a phi whose inputs are `return_replacement` and `invoke_instruction`. 273 * 274 * Before: 275 * Block1 276 * compare 277 * ... 278 * invoke_instruction 279 * 280 * After: 281 * Block1 282 * compare 283 * if 284 * / \ 285 * / \ 286 * Then block Otherwise block 287 * ... invoke_instruction 288 * \ / 289 * \ / 290 * Merge block 291 * phi(return_replacement, invoke_instruction) 292 */ 293 void CreateDiamondPatternForPolymorphicInline(HInstruction* compare, 294 HInstruction* return_replacement, 295 HInstruction* invoke_instruction); 296 297 // Update the inlining budget based on `total_number_of_instructions_`. 298 void UpdateInliningBudget(); 299 300 // Count the number of calls of `method` being inlined recursively. 301 size_t CountRecursiveCallsOf(ArtMethod* method) const; 302 303 // Pretty-print for spaces during logging. 304 std::string DepthString(int line) const; 305 306 HGraph* const outermost_graph_; 307 const DexCompilationUnit& outer_compilation_unit_; 308 const DexCompilationUnit& caller_compilation_unit_; 309 CodeGenerator* const codegen_; 310 const size_t total_number_of_dex_registers_; 311 size_t total_number_of_instructions_; 312 313 // The 'parent' inliner, that means the inlinigng optimization that requested 314 // `graph_` to be inlined. 315 const HInliner* const parent_; 316 const size_t depth_; 317 318 // The budget left for inlining, in number of instructions. 319 size_t inlining_budget_; 320 321 // Used to record stats about optimizations on the inlined graph. 322 // If the inlining is successful, these stats are merged to the caller graph's stats. 323 OptimizingCompilerStats* inline_stats_; 324 325 DISALLOW_COPY_AND_ASSIGN(HInliner); 326 }; 327 328 } // namespace art 329 330 #endif // ART_COMPILER_OPTIMIZING_INLINER_H_ 331