• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_INLINER_H_
18 #define ART_COMPILER_OPTIMIZING_INLINER_H_
19 
20 #include "dex/dex_file_types.h"
21 #include "dex/invoke_type.h"
22 #include "jit/profiling_info.h"
23 #include "optimization.h"
24 #include "profile/profile_compilation_info.h"
25 
26 namespace art {
27 
28 class CodeGenerator;
29 class DexCompilationUnit;
30 class HGraph;
31 class HInvoke;
32 class OptimizingCompilerStats;
33 
34 class HInliner : public HOptimization {
35  public:
36   HInliner(HGraph* outer_graph,
37            HGraph* outermost_graph,
38            CodeGenerator* codegen,
39            const DexCompilationUnit& outer_compilation_unit,
40            const DexCompilationUnit& caller_compilation_unit,
41            OptimizingCompilerStats* stats,
42            size_t total_number_of_dex_registers,
43            size_t total_number_of_instructions,
44            HInliner* parent,
45            size_t depth = 0,
46            const char* name = kInlinerPassName)
HOptimization(outer_graph,name,stats)47       : HOptimization(outer_graph, name, stats),
48         outermost_graph_(outermost_graph),
49         outer_compilation_unit_(outer_compilation_unit),
50         caller_compilation_unit_(caller_compilation_unit),
51         codegen_(codegen),
52         total_number_of_dex_registers_(total_number_of_dex_registers),
53         total_number_of_instructions_(total_number_of_instructions),
54         parent_(parent),
55         depth_(depth),
56         inlining_budget_(0),
57         inline_stats_(nullptr) {}
58 
59   bool Run() override;
60 
61   static constexpr const char* kInlinerPassName = "inliner";
62 
63  private:
64   enum InlineCacheType {
65     kInlineCacheNoData = 0,
66     kInlineCacheUninitialized = 1,
67     kInlineCacheMonomorphic = 2,
68     kInlineCachePolymorphic = 3,
69     kInlineCacheMegamorphic = 4,
70     kInlineCacheMissingTypes = 5
71   };
72 
73   // We set `did_set_always_throws` as true if we analyzed `invoke_instruction` and it always
74   // throws.
75   bool TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_always_throws);
76 
77   // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
78   // reference type propagation can run after the inlining. If the inlining is successful, this
79   // method will replace and remove the `invoke_instruction`.
80   bool TryInlineAndReplace(HInvoke* invoke_instruction,
81                            ArtMethod* resolved_method,
82                            ReferenceTypeInfo receiver_type,
83                            bool do_rtp)
84     REQUIRES_SHARED(Locks::mutator_lock_);
85 
86   bool TryBuildAndInline(HInvoke* invoke_instruction,
87                          ArtMethod* resolved_method,
88                          ReferenceTypeInfo receiver_type,
89                          HInstruction** return_replacement)
90     REQUIRES_SHARED(Locks::mutator_lock_);
91 
92   bool TryBuildAndInlineHelper(HInvoke* invoke_instruction,
93                                ArtMethod* resolved_method,
94                                ReferenceTypeInfo receiver_type,
95                                HInstruction** return_replacement)
96     REQUIRES_SHARED(Locks::mutator_lock_);
97 
98   // Substitutes parameters in the callee graph with their values from the caller.
99   void SubstituteArguments(HGraph* callee_graph,
100                            HInvoke* invoke_instruction,
101                            ReferenceTypeInfo receiver_type,
102                            const DexCompilationUnit& dex_compilation_unit)
103     REQUIRES_SHARED(Locks::mutator_lock_);
104 
105   // Run simple optimizations on `callee_graph`.
106   void RunOptimizations(HGraph* callee_graph,
107                         const dex::CodeItem* code_item,
108                         const DexCompilationUnit& dex_compilation_unit)
109     REQUIRES_SHARED(Locks::mutator_lock_);
110 
111   // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
112   bool TryPatternSubstitution(HInvoke* invoke_instruction,
113                               ArtMethod* method,
114                               HInstruction** return_replacement)
115     REQUIRES_SHARED(Locks::mutator_lock_);
116 
117   // Returns whether inlining is allowed based on ART semantics.
118   bool IsInliningAllowed(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const
119     REQUIRES_SHARED(Locks::mutator_lock_);
120 
121 
122   // Returns whether ART supports inlining this method.
123   //
124   // Some methods are not supported because they have features for which inlining
125   // is not implemented. For example, we do not currently support inlining throw
126   // instructions into a try block.
127   bool IsInliningSupported(const HInvoke* invoke_instruction,
128                            art::ArtMethod* method,
129                            const CodeItemDataAccessor& accessor) const
130     REQUIRES_SHARED(Locks::mutator_lock_);
131 
132   // Returns whether the inlining budget allows inlining method.
133   //
134   // For example, this checks whether the function has grown too large and
135   // inlining should be prevented.
136   bool IsInliningBudgetAvailable(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const
137     REQUIRES_SHARED(Locks::mutator_lock_);
138 
139   // Inspects the body of a method (callee_graph) and returns whether it can be
140   // inlined.
141   //
142   // This checks for instructions and constructs that we do not support
143   // inlining, such as inlining a throw instruction into a try block.
144   bool CanInlineBody(const HGraph* callee_graph,
145                      const HBasicBlock* target_block,
146                      size_t* out_number_of_instructions) const
147     REQUIRES_SHARED(Locks::mutator_lock_);
148 
149   // Create a new HInstanceFieldGet.
150   HInstanceFieldGet* CreateInstanceFieldGet(uint32_t field_index,
151                                             ArtMethod* referrer,
152                                             HInstruction* obj);
153   // Create a new HInstanceFieldSet.
154   HInstanceFieldSet* CreateInstanceFieldSet(uint32_t field_index,
155                                             ArtMethod* referrer,
156                                             HInstruction* obj,
157                                             HInstruction* value,
158                                             bool* is_final = nullptr);
159 
160   // Try inlining the invoke instruction using inline caches.
161   bool TryInlineFromInlineCache(HInvoke* invoke_instruction)
162     REQUIRES_SHARED(Locks::mutator_lock_);
163 
164   // Try inlining the invoke instruction using CHA.
165   bool TryInlineFromCHA(HInvoke* invoke_instruction)
166     REQUIRES_SHARED(Locks::mutator_lock_);
167 
168   // When we fail inlining `invoke_instruction`, we will try to devirtualize the
169   // call.
170   bool TryDevirtualize(HInvoke* invoke_instruction,
171                        ArtMethod* method,
172                        HInvoke** replacement)
173     REQUIRES_SHARED(Locks::mutator_lock_);
174 
175   // Try getting the inline cache from JIT code cache.
176   // Return true if the inline cache was successfully allocated and the
177   // invoke info was found in the profile info.
178   InlineCacheType GetInlineCacheJIT(
179       HInvoke* invoke_instruction,
180       /*out*/StackHandleScope<InlineCache::kIndividualCacheSize>* classes)
181     REQUIRES_SHARED(Locks::mutator_lock_);
182 
183   // Try getting the inline cache from AOT offline profile.
184   // Return true if the inline cache was successfully allocated and the
185   // invoke info was found in the profile info.
186   InlineCacheType GetInlineCacheAOT(
187       HInvoke* invoke_instruction,
188       /*out*/StackHandleScope<InlineCache::kIndividualCacheSize>* classes)
189     REQUIRES_SHARED(Locks::mutator_lock_);
190 
191   // Compute the inline cache type.
192   static InlineCacheType GetInlineCacheType(
193       const StackHandleScope<InlineCache::kIndividualCacheSize>& classes)
194     REQUIRES_SHARED(Locks::mutator_lock_);
195 
196   // Try to inline the target of a monomorphic call. If successful, the code
197   // in the graph will look like:
198   // if (receiver.getClass() != ic.GetMonomorphicType()) deopt
199   // ... // inlined code
200   bool TryInlineMonomorphicCall(HInvoke* invoke_instruction,
201                                 const StackHandleScope<InlineCache::kIndividualCacheSize>& classes)
202     REQUIRES_SHARED(Locks::mutator_lock_);
203 
204   // Try to inline targets of a polymorphic call.
205   bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
206                                 const StackHandleScope<InlineCache::kIndividualCacheSize>& classes)
207     REQUIRES_SHARED(Locks::mutator_lock_);
208 
209   bool TryInlinePolymorphicCallToSameTarget(
210       HInvoke* invoke_instruction,
211       const StackHandleScope<InlineCache::kIndividualCacheSize>& classes)
212     REQUIRES_SHARED(Locks::mutator_lock_);
213 
214   // Returns whether or not we should use only polymorphic inlining with no deoptimizations.
215   bool UseOnlyPolymorphicInliningWithNoDeopt();
216 
217   // Try CHA-based devirtualization to change virtual method calls into
218   // direct calls.
219   // Returns the actual method that resolved_method can be devirtualized to.
220   ArtMethod* FindMethodFromCHA(ArtMethod* resolved_method)
221     REQUIRES_SHARED(Locks::mutator_lock_);
222 
223   // Add a CHA guard for a CHA-based devirtualized call. A CHA guard checks a
224   // should_deoptimize flag and if it's true, does deoptimization.
225   void AddCHAGuard(HInstruction* invoke_instruction,
226                    uint32_t dex_pc,
227                    HInstruction* cursor,
228                    HBasicBlock* bb_cursor);
229 
230   HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
231                                            HInstruction* receiver,
232                                            uint32_t dex_pc) const
233     REQUIRES_SHARED(Locks::mutator_lock_);
234 
235   void MaybeRunReferenceTypePropagation(HInstruction* replacement,
236                                         HInvoke* invoke_instruction)
237     REQUIRES_SHARED(Locks::mutator_lock_);
238 
239   void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement)
240     REQUIRES_SHARED(Locks::mutator_lock_);
241 
242   bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method)
243     REQUIRES_SHARED(Locks::mutator_lock_);
244 
245   bool ReturnTypeMoreSpecific(HInstruction* return_replacement, HInvoke* invoke_instruction)
246     REQUIRES_SHARED(Locks::mutator_lock_);
247 
248   // Add a type guard on the given `receiver`. This will add to the graph:
249   // i0 = HFieldGet(receiver, klass)
250   // i1 = HLoadClass(class_index, is_referrer)
251   // i2 = HNotEqual(i0, i1)
252   //
253   // And if `with_deoptimization` is true:
254   // HDeoptimize(i2)
255   //
256   // The method returns the `HNotEqual`, that will be used for polymorphic inlining.
257   HInstruction* AddTypeGuard(HInstruction* receiver,
258                              HInstruction* cursor,
259                              HBasicBlock* bb_cursor,
260                              dex::TypeIndex class_index,
261                              Handle<mirror::Class> klass,
262                              HInstruction* invoke_instruction,
263                              bool with_deoptimization)
264     REQUIRES_SHARED(Locks::mutator_lock_);
265 
266   /*
267    * Ad-hoc implementation for implementing a diamond pattern in the graph for
268    * polymorphic inlining:
269    * 1) `compare` becomes the input of the new `HIf`.
270    * 2) Everything up until `invoke_instruction` is in the then branch (could
271    *    contain multiple blocks).
272    * 3) `invoke_instruction` is moved to the otherwise block.
273    * 4) If `return_replacement` is not null, the merge block will have
274    *    a phi whose inputs are `return_replacement` and `invoke_instruction`.
275    *
276    * Before:
277    *             Block1
278    *             compare
279    *              ...
280    *         invoke_instruction
281    *
282    * After:
283    *            Block1
284    *            compare
285    *              if
286    *          /        \
287    *         /          \
288    *   Then block    Otherwise block
289    *      ...       invoke_instruction
290    *       \              /
291    *        \            /
292    *          Merge block
293    *  phi(return_replacement, invoke_instruction)
294    */
295   void CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
296                                                 HInstruction* return_replacement,
297                                                 HInstruction* invoke_instruction);
298 
299   // Update the inlining budget based on `total_number_of_instructions_`.
300   void UpdateInliningBudget();
301 
302   // Count the number of calls of `method` being inlined recursively.
303   size_t CountRecursiveCallsOf(ArtMethod* method) const;
304 
305   // Pretty-print for spaces during logging.
306   std::string DepthString(int line) const;
307 
308   HGraph* const outermost_graph_;
309   const DexCompilationUnit& outer_compilation_unit_;
310   const DexCompilationUnit& caller_compilation_unit_;
311   CodeGenerator* const codegen_;
312   const size_t total_number_of_dex_registers_;
313   size_t total_number_of_instructions_;
314 
315   // The 'parent' inliner, that means the inlinigng optimization that requested
316   // `graph_` to be inlined.
317   const HInliner* const parent_;
318   const size_t depth_;
319 
320   // The budget left for inlining, in number of instructions.
321   size_t inlining_budget_;
322 
323   // Used to record stats about optimizations on the inlined graph.
324   // If the inlining is successful, these stats are merged to the caller graph's stats.
325   OptimizingCompilerStats* inline_stats_;
326 
327   DISALLOW_COPY_AND_ASSIGN(HInliner);
328 };
329 
330 }  // namespace art
331 
332 #endif  // ART_COMPILER_OPTIMIZING_INLINER_H_
333