1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_INLINER_H_ 18 #define ART_COMPILER_OPTIMIZING_INLINER_H_ 19 20 #include "dex/dex_file_types.h" 21 #include "dex/invoke_type.h" 22 #include "jit/profiling_info.h" 23 #include "optimization.h" 24 #include "profile/profile_compilation_info.h" 25 26 namespace art { 27 28 class CodeGenerator; 29 class DexCompilationUnit; 30 class HGraph; 31 class HInvoke; 32 class OptimizingCompilerStats; 33 34 class HInliner : public HOptimization { 35 public: 36 HInliner(HGraph* outer_graph, 37 HGraph* outermost_graph, 38 CodeGenerator* codegen, 39 const DexCompilationUnit& outer_compilation_unit, 40 const DexCompilationUnit& caller_compilation_unit, 41 OptimizingCompilerStats* stats, 42 size_t total_number_of_dex_registers, 43 size_t total_number_of_instructions, 44 HInliner* parent, 45 size_t depth = 0, 46 const char* name = kInlinerPassName) HOptimization(outer_graph,name,stats)47 : HOptimization(outer_graph, name, stats), 48 outermost_graph_(outermost_graph), 49 outer_compilation_unit_(outer_compilation_unit), 50 caller_compilation_unit_(caller_compilation_unit), 51 codegen_(codegen), 52 total_number_of_dex_registers_(total_number_of_dex_registers), 53 total_number_of_instructions_(total_number_of_instructions), 54 parent_(parent), 55 depth_(depth), 56 inlining_budget_(0), 57 inline_stats_(nullptr) {} 58 59 bool Run() override; 60 61 static constexpr const char* kInlinerPassName = "inliner"; 62 63 private: 64 enum InlineCacheType { 65 kInlineCacheNoData = 0, 66 kInlineCacheUninitialized = 1, 67 kInlineCacheMonomorphic = 2, 68 kInlineCachePolymorphic = 3, 69 kInlineCacheMegamorphic = 4, 70 kInlineCacheMissingTypes = 5 71 }; 72 73 // We set `did_set_always_throws` as true if we analyzed `invoke_instruction` and it always 74 // throws. 75 bool TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_always_throws); 76 77 // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether 78 // reference type propagation can run after the inlining. If the inlining is successful, this 79 // method will replace and remove the `invoke_instruction`. 80 bool TryInlineAndReplace(HInvoke* invoke_instruction, 81 ArtMethod* resolved_method, 82 ReferenceTypeInfo receiver_type, 83 bool do_rtp) 84 REQUIRES_SHARED(Locks::mutator_lock_); 85 86 bool TryBuildAndInline(HInvoke* invoke_instruction, 87 ArtMethod* resolved_method, 88 ReferenceTypeInfo receiver_type, 89 HInstruction** return_replacement) 90 REQUIRES_SHARED(Locks::mutator_lock_); 91 92 bool TryBuildAndInlineHelper(HInvoke* invoke_instruction, 93 ArtMethod* resolved_method, 94 ReferenceTypeInfo receiver_type, 95 HInstruction** return_replacement) 96 REQUIRES_SHARED(Locks::mutator_lock_); 97 98 // Substitutes parameters in the callee graph with their values from the caller. 99 void SubstituteArguments(HGraph* callee_graph, 100 HInvoke* invoke_instruction, 101 ReferenceTypeInfo receiver_type, 102 const DexCompilationUnit& dex_compilation_unit) 103 REQUIRES_SHARED(Locks::mutator_lock_); 104 105 // Run simple optimizations on `callee_graph`. 106 void RunOptimizations(HGraph* callee_graph, 107 const dex::CodeItem* code_item, 108 const DexCompilationUnit& dex_compilation_unit) 109 REQUIRES_SHARED(Locks::mutator_lock_); 110 111 // Try to recognize known simple patterns and replace invoke call with appropriate instructions. 112 bool TryPatternSubstitution(HInvoke* invoke_instruction, 113 ArtMethod* method, 114 HInstruction** return_replacement) 115 REQUIRES_SHARED(Locks::mutator_lock_); 116 117 // Returns whether inlining is allowed based on ART semantics. 118 bool IsInliningAllowed(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const 119 REQUIRES_SHARED(Locks::mutator_lock_); 120 121 122 // Returns whether ART supports inlining this method. 123 // 124 // Some methods are not supported because they have features for which inlining 125 // is not implemented. For example, we do not currently support inlining throw 126 // instructions into a try block. 127 bool IsInliningSupported(const HInvoke* invoke_instruction, 128 art::ArtMethod* method, 129 const CodeItemDataAccessor& accessor) const 130 REQUIRES_SHARED(Locks::mutator_lock_); 131 132 // Returns whether the inlining budget allows inlining method. 133 // 134 // For example, this checks whether the function has grown too large and 135 // inlining should be prevented. 136 bool IsInliningBudgetAvailable(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const 137 REQUIRES_SHARED(Locks::mutator_lock_); 138 139 // Inspects the body of a method (callee_graph) and returns whether it can be 140 // inlined. 141 // 142 // This checks for instructions and constructs that we do not support 143 // inlining, such as inlining a throw instruction into a try block. 144 bool CanInlineBody(const HGraph* callee_graph, 145 const HBasicBlock* target_block, 146 size_t* out_number_of_instructions) const 147 REQUIRES_SHARED(Locks::mutator_lock_); 148 149 // Create a new HInstanceFieldGet. 150 HInstanceFieldGet* CreateInstanceFieldGet(uint32_t field_index, 151 ArtMethod* referrer, 152 HInstruction* obj); 153 // Create a new HInstanceFieldSet. 154 HInstanceFieldSet* CreateInstanceFieldSet(uint32_t field_index, 155 ArtMethod* referrer, 156 HInstruction* obj, 157 HInstruction* value, 158 bool* is_final = nullptr); 159 160 // Try inlining the invoke instruction using inline caches. 161 bool TryInlineFromInlineCache(HInvoke* invoke_instruction) 162 REQUIRES_SHARED(Locks::mutator_lock_); 163 164 // Try inlining the invoke instruction using CHA. 165 bool TryInlineFromCHA(HInvoke* invoke_instruction) 166 REQUIRES_SHARED(Locks::mutator_lock_); 167 168 // When we fail inlining `invoke_instruction`, we will try to devirtualize the 169 // call. 170 bool TryDevirtualize(HInvoke* invoke_instruction, 171 ArtMethod* method, 172 HInvoke** replacement) 173 REQUIRES_SHARED(Locks::mutator_lock_); 174 175 // Try getting the inline cache from JIT code cache. 176 // Return true if the inline cache was successfully allocated and the 177 // invoke info was found in the profile info. 178 InlineCacheType GetInlineCacheJIT( 179 HInvoke* invoke_instruction, 180 /*out*/StackHandleScope<InlineCache::kIndividualCacheSize>* classes) 181 REQUIRES_SHARED(Locks::mutator_lock_); 182 183 // Try getting the inline cache from AOT offline profile. 184 // Return true if the inline cache was successfully allocated and the 185 // invoke info was found in the profile info. 186 InlineCacheType GetInlineCacheAOT( 187 HInvoke* invoke_instruction, 188 /*out*/StackHandleScope<InlineCache::kIndividualCacheSize>* classes) 189 REQUIRES_SHARED(Locks::mutator_lock_); 190 191 // Compute the inline cache type. 192 static InlineCacheType GetInlineCacheType( 193 const StackHandleScope<InlineCache::kIndividualCacheSize>& classes) 194 REQUIRES_SHARED(Locks::mutator_lock_); 195 196 // Try to inline the target of a monomorphic call. If successful, the code 197 // in the graph will look like: 198 // if (receiver.getClass() != ic.GetMonomorphicType()) deopt 199 // ... // inlined code 200 bool TryInlineMonomorphicCall(HInvoke* invoke_instruction, 201 const StackHandleScope<InlineCache::kIndividualCacheSize>& classes) 202 REQUIRES_SHARED(Locks::mutator_lock_); 203 204 // Try to inline targets of a polymorphic call. 205 bool TryInlinePolymorphicCall(HInvoke* invoke_instruction, 206 const StackHandleScope<InlineCache::kIndividualCacheSize>& classes) 207 REQUIRES_SHARED(Locks::mutator_lock_); 208 209 bool TryInlinePolymorphicCallToSameTarget( 210 HInvoke* invoke_instruction, 211 const StackHandleScope<InlineCache::kIndividualCacheSize>& classes) 212 REQUIRES_SHARED(Locks::mutator_lock_); 213 214 // Returns whether or not we should use only polymorphic inlining with no deoptimizations. 215 bool UseOnlyPolymorphicInliningWithNoDeopt(); 216 217 // Try CHA-based devirtualization to change virtual method calls into 218 // direct calls. 219 // Returns the actual method that resolved_method can be devirtualized to. 220 ArtMethod* FindMethodFromCHA(ArtMethod* resolved_method) 221 REQUIRES_SHARED(Locks::mutator_lock_); 222 223 // Add a CHA guard for a CHA-based devirtualized call. A CHA guard checks a 224 // should_deoptimize flag and if it's true, does deoptimization. 225 void AddCHAGuard(HInstruction* invoke_instruction, 226 uint32_t dex_pc, 227 HInstruction* cursor, 228 HBasicBlock* bb_cursor); 229 230 HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker, 231 HInstruction* receiver, 232 uint32_t dex_pc) const 233 REQUIRES_SHARED(Locks::mutator_lock_); 234 235 void MaybeRunReferenceTypePropagation(HInstruction* replacement, 236 HInvoke* invoke_instruction) 237 REQUIRES_SHARED(Locks::mutator_lock_); 238 239 void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement) 240 REQUIRES_SHARED(Locks::mutator_lock_); 241 242 bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method) 243 REQUIRES_SHARED(Locks::mutator_lock_); 244 245 bool ReturnTypeMoreSpecific(HInstruction* return_replacement, HInvoke* invoke_instruction) 246 REQUIRES_SHARED(Locks::mutator_lock_); 247 248 // Add a type guard on the given `receiver`. This will add to the graph: 249 // i0 = HFieldGet(receiver, klass) 250 // i1 = HLoadClass(class_index, is_referrer) 251 // i2 = HNotEqual(i0, i1) 252 // 253 // And if `with_deoptimization` is true: 254 // HDeoptimize(i2) 255 // 256 // The method returns the `HNotEqual`, that will be used for polymorphic inlining. 257 HInstruction* AddTypeGuard(HInstruction* receiver, 258 HInstruction* cursor, 259 HBasicBlock* bb_cursor, 260 dex::TypeIndex class_index, 261 Handle<mirror::Class> klass, 262 HInstruction* invoke_instruction, 263 bool with_deoptimization) 264 REQUIRES_SHARED(Locks::mutator_lock_); 265 266 /* 267 * Ad-hoc implementation for implementing a diamond pattern in the graph for 268 * polymorphic inlining: 269 * 1) `compare` becomes the input of the new `HIf`. 270 * 2) Everything up until `invoke_instruction` is in the then branch (could 271 * contain multiple blocks). 272 * 3) `invoke_instruction` is moved to the otherwise block. 273 * 4) If `return_replacement` is not null, the merge block will have 274 * a phi whose inputs are `return_replacement` and `invoke_instruction`. 275 * 276 * Before: 277 * Block1 278 * compare 279 * ... 280 * invoke_instruction 281 * 282 * After: 283 * Block1 284 * compare 285 * if 286 * / \ 287 * / \ 288 * Then block Otherwise block 289 * ... invoke_instruction 290 * \ / 291 * \ / 292 * Merge block 293 * phi(return_replacement, invoke_instruction) 294 */ 295 void CreateDiamondPatternForPolymorphicInline(HInstruction* compare, 296 HInstruction* return_replacement, 297 HInstruction* invoke_instruction); 298 299 // Update the inlining budget based on `total_number_of_instructions_`. 300 void UpdateInliningBudget(); 301 302 // Count the number of calls of `method` being inlined recursively. 303 size_t CountRecursiveCallsOf(ArtMethod* method) const; 304 305 // Pretty-print for spaces during logging. 306 std::string DepthString(int line) const; 307 308 HGraph* const outermost_graph_; 309 const DexCompilationUnit& outer_compilation_unit_; 310 const DexCompilationUnit& caller_compilation_unit_; 311 CodeGenerator* const codegen_; 312 const size_t total_number_of_dex_registers_; 313 size_t total_number_of_instructions_; 314 315 // The 'parent' inliner, that means the inlinigng optimization that requested 316 // `graph_` to be inlined. 317 const HInliner* const parent_; 318 const size_t depth_; 319 320 // The budget left for inlining, in number of instructions. 321 size_t inlining_budget_; 322 323 // Used to record stats about optimizations on the inlined graph. 324 // If the inlining is successful, these stats are merged to the caller graph's stats. 325 OptimizingCompilerStats* inline_stats_; 326 327 DISALLOW_COPY_AND_ASSIGN(HInliner); 328 }; 329 330 } // namespace art 331 332 #endif // ART_COMPILER_OPTIMIZING_INLINER_H_ 333