• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_INLINER_H_
18 #define ART_COMPILER_OPTIMIZING_INLINER_H_
19 
20 #include "dex/dex_file_types.h"
21 #include "dex/invoke_type.h"
22 #include "jit/profiling_info.h"
23 #include "optimization.h"
24 #include "profile/profile_compilation_info.h"
25 
26 namespace art {
27 
28 class CodeGenerator;
29 class DexCompilationUnit;
30 class HGraph;
31 class HInvoke;
32 class OptimizingCompilerStats;
33 
34 class HInliner : public HOptimization {
35  public:
36   HInliner(HGraph* outer_graph,
37            HGraph* outermost_graph,
38            CodeGenerator* codegen,
39            const DexCompilationUnit& outer_compilation_unit,
40            const DexCompilationUnit& caller_compilation_unit,
41            OptimizingCompilerStats* stats,
42            size_t total_number_of_dex_registers,
43            size_t total_number_of_instructions,
44            HInliner* parent,
45            size_t depth = 0,
46            const char* name = kInlinerPassName)
HOptimization(outer_graph,name,stats)47       : HOptimization(outer_graph, name, stats),
48         outermost_graph_(outermost_graph),
49         outer_compilation_unit_(outer_compilation_unit),
50         caller_compilation_unit_(caller_compilation_unit),
51         codegen_(codegen),
52         total_number_of_dex_registers_(total_number_of_dex_registers),
53         total_number_of_instructions_(total_number_of_instructions),
54         parent_(parent),
55         depth_(depth),
56         inlining_budget_(0),
57         inline_stats_(nullptr) {}
58 
59   bool Run() override;
60 
61   static constexpr const char* kInlinerPassName = "inliner";
62 
63  private:
64   enum InlineCacheType {
65     kInlineCacheNoData = 0,
66     kInlineCacheUninitialized = 1,
67     kInlineCacheMonomorphic = 2,
68     kInlineCachePolymorphic = 3,
69     kInlineCacheMegamorphic = 4,
70     kInlineCacheMissingTypes = 5
71   };
72 
73   bool TryInline(HInvoke* invoke_instruction);
74 
75   // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
76   // reference type propagation can run after the inlining. If the inlining is successful, this
77   // method will replace and remove the `invoke_instruction`.
78   bool TryInlineAndReplace(HInvoke* invoke_instruction,
79                            ArtMethod* resolved_method,
80                            ReferenceTypeInfo receiver_type,
81                            bool do_rtp)
82     REQUIRES_SHARED(Locks::mutator_lock_);
83 
84   bool TryBuildAndInline(HInvoke* invoke_instruction,
85                          ArtMethod* resolved_method,
86                          ReferenceTypeInfo receiver_type,
87                          HInstruction** return_replacement)
88     REQUIRES_SHARED(Locks::mutator_lock_);
89 
90   bool TryBuildAndInlineHelper(HInvoke* invoke_instruction,
91                                ArtMethod* resolved_method,
92                                ReferenceTypeInfo receiver_type,
93                                HInstruction** return_replacement)
94     REQUIRES_SHARED(Locks::mutator_lock_);
95 
96   // Substitutes parameters in the callee graph with their values from the caller.
97   void SubstituteArguments(HGraph* callee_graph,
98                            HInvoke* invoke_instruction,
99                            ReferenceTypeInfo receiver_type,
100                            const DexCompilationUnit& dex_compilation_unit)
101     REQUIRES_SHARED(Locks::mutator_lock_);
102 
103   // Run simple optimizations on `callee_graph`.
104   void RunOptimizations(HGraph* callee_graph,
105                         const dex::CodeItem* code_item,
106                         const DexCompilationUnit& dex_compilation_unit)
107     REQUIRES_SHARED(Locks::mutator_lock_);
108 
109   // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
110   bool TryPatternSubstitution(HInvoke* invoke_instruction,
111                               ArtMethod* method,
112                               HInstruction** return_replacement)
113     REQUIRES_SHARED(Locks::mutator_lock_);
114 
115   // Returns whether inlining is allowed based on ART semantics.
116   bool IsInliningAllowed(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const
117     REQUIRES_SHARED(Locks::mutator_lock_);
118 
119 
120   // Returns whether ART supports inlining this method.
121   //
122   // Some methods are not supported because they have features for which inlining
123   // is not implemented. For example, we do not currently support inlining throw
124   // instructions into a try block.
125   bool IsInliningSupported(const HInvoke* invoke_instruction,
126                            art::ArtMethod* method,
127                            const CodeItemDataAccessor& accessor) const
128     REQUIRES_SHARED(Locks::mutator_lock_);
129 
130   // Returns whether the inlining budget allows inlining method.
131   //
132   // For example, this checks whether the function has grown too large and
133   // inlining should be prevented.
134   bool IsInliningBudgetAvailable(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const
135     REQUIRES_SHARED(Locks::mutator_lock_);
136 
137   // Inspects the body of a method (callee_graph) and returns whether it can be
138   // inlined.
139   //
140   // This checks for instructions and constructs that we do not support
141   // inlining, such as inlining a throw instruction into a try block.
142   bool CanInlineBody(const HGraph* callee_graph,
143                      const HBasicBlock* target_block,
144                      size_t* out_number_of_instructions) const
145     REQUIRES_SHARED(Locks::mutator_lock_);
146 
147   // Create a new HInstanceFieldGet.
148   HInstanceFieldGet* CreateInstanceFieldGet(uint32_t field_index,
149                                             ArtMethod* referrer,
150                                             HInstruction* obj);
151   // Create a new HInstanceFieldSet.
152   HInstanceFieldSet* CreateInstanceFieldSet(uint32_t field_index,
153                                             ArtMethod* referrer,
154                                             HInstruction* obj,
155                                             HInstruction* value,
156                                             bool* is_final = nullptr);
157 
158   // Try inlining the invoke instruction using inline caches.
159   bool TryInlineFromInlineCache(HInvoke* invoke_instruction)
160     REQUIRES_SHARED(Locks::mutator_lock_);
161 
162   // Try inlining the invoke instruction using CHA.
163   bool TryInlineFromCHA(HInvoke* invoke_instruction)
164     REQUIRES_SHARED(Locks::mutator_lock_);
165 
166   // When we fail inlining `invoke_instruction`, we will try to devirtualize the
167   // call.
168   bool TryDevirtualize(HInvoke* invoke_instruction,
169                        ArtMethod* method,
170                        HInvoke** replacement)
171     REQUIRES_SHARED(Locks::mutator_lock_);
172 
173   // Try getting the inline cache from JIT code cache.
174   // Return true if the inline cache was successfully allocated and the
175   // invoke info was found in the profile info.
176   InlineCacheType GetInlineCacheJIT(
177       HInvoke* invoke_instruction,
178       /*out*/StackHandleScope<InlineCache::kIndividualCacheSize>* classes)
179     REQUIRES_SHARED(Locks::mutator_lock_);
180 
181   // Try getting the inline cache from AOT offline profile.
182   // Return true if the inline cache was successfully allocated and the
183   // invoke info was found in the profile info.
184   InlineCacheType GetInlineCacheAOT(
185       HInvoke* invoke_instruction,
186       /*out*/StackHandleScope<InlineCache::kIndividualCacheSize>* classes)
187     REQUIRES_SHARED(Locks::mutator_lock_);
188 
189   // Compute the inline cache type.
190   static InlineCacheType GetInlineCacheType(
191       const StackHandleScope<InlineCache::kIndividualCacheSize>& classes)
192     REQUIRES_SHARED(Locks::mutator_lock_);
193 
194   // Try to inline the target of a monomorphic call. If successful, the code
195   // in the graph will look like:
196   // if (receiver.getClass() != ic.GetMonomorphicType()) deopt
197   // ... // inlined code
198   bool TryInlineMonomorphicCall(HInvoke* invoke_instruction,
199                                 const StackHandleScope<InlineCache::kIndividualCacheSize>& classes)
200     REQUIRES_SHARED(Locks::mutator_lock_);
201 
202   // Try to inline targets of a polymorphic call.
203   bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
204                                 const StackHandleScope<InlineCache::kIndividualCacheSize>& classes)
205     REQUIRES_SHARED(Locks::mutator_lock_);
206 
207   bool TryInlinePolymorphicCallToSameTarget(
208       HInvoke* invoke_instruction,
209       const StackHandleScope<InlineCache::kIndividualCacheSize>& classes)
210     REQUIRES_SHARED(Locks::mutator_lock_);
211 
212   // Returns whether or not we should use only polymorphic inlining with no deoptimizations.
213   bool UseOnlyPolymorphicInliningWithNoDeopt();
214 
215   // Try CHA-based devirtualization to change virtual method calls into
216   // direct calls.
217   // Returns the actual method that resolved_method can be devirtualized to.
218   ArtMethod* FindMethodFromCHA(ArtMethod* resolved_method)
219     REQUIRES_SHARED(Locks::mutator_lock_);
220 
221   // Add a CHA guard for a CHA-based devirtualized call. A CHA guard checks a
222   // should_deoptimize flag and if it's true, does deoptimization.
223   void AddCHAGuard(HInstruction* invoke_instruction,
224                    uint32_t dex_pc,
225                    HInstruction* cursor,
226                    HBasicBlock* bb_cursor);
227 
228   HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
229                                            HInstruction* receiver,
230                                            uint32_t dex_pc) const
231     REQUIRES_SHARED(Locks::mutator_lock_);
232 
233   void MaybeRunReferenceTypePropagation(HInstruction* replacement,
234                                         HInvoke* invoke_instruction)
235     REQUIRES_SHARED(Locks::mutator_lock_);
236 
237   void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement)
238     REQUIRES_SHARED(Locks::mutator_lock_);
239 
240   bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method)
241     REQUIRES_SHARED(Locks::mutator_lock_);
242 
243   bool ReturnTypeMoreSpecific(HInstruction* return_replacement, HInvoke* invoke_instruction)
244     REQUIRES_SHARED(Locks::mutator_lock_);
245 
246   // Add a type guard on the given `receiver`. This will add to the graph:
247   // i0 = HFieldGet(receiver, klass)
248   // i1 = HLoadClass(class_index, is_referrer)
249   // i2 = HNotEqual(i0, i1)
250   //
251   // And if `with_deoptimization` is true:
252   // HDeoptimize(i2)
253   //
254   // The method returns the `HNotEqual`, that will be used for polymorphic inlining.
255   HInstruction* AddTypeGuard(HInstruction* receiver,
256                              HInstruction* cursor,
257                              HBasicBlock* bb_cursor,
258                              dex::TypeIndex class_index,
259                              Handle<mirror::Class> klass,
260                              HInstruction* invoke_instruction,
261                              bool with_deoptimization)
262     REQUIRES_SHARED(Locks::mutator_lock_);
263 
264   /*
265    * Ad-hoc implementation for implementing a diamond pattern in the graph for
266    * polymorphic inlining:
267    * 1) `compare` becomes the input of the new `HIf`.
268    * 2) Everything up until `invoke_instruction` is in the then branch (could
269    *    contain multiple blocks).
270    * 3) `invoke_instruction` is moved to the otherwise block.
271    * 4) If `return_replacement` is not null, the merge block will have
272    *    a phi whose inputs are `return_replacement` and `invoke_instruction`.
273    *
274    * Before:
275    *             Block1
276    *             compare
277    *              ...
278    *         invoke_instruction
279    *
280    * After:
281    *            Block1
282    *            compare
283    *              if
284    *          /        \
285    *         /          \
286    *   Then block    Otherwise block
287    *      ...       invoke_instruction
288    *       \              /
289    *        \            /
290    *          Merge block
291    *  phi(return_replacement, invoke_instruction)
292    */
293   void CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
294                                                 HInstruction* return_replacement,
295                                                 HInstruction* invoke_instruction);
296 
297   // Update the inlining budget based on `total_number_of_instructions_`.
298   void UpdateInliningBudget();
299 
300   // Count the number of calls of `method` being inlined recursively.
301   size_t CountRecursiveCallsOf(ArtMethod* method) const;
302 
303   // Pretty-print for spaces during logging.
304   std::string DepthString(int line) const;
305 
306   HGraph* const outermost_graph_;
307   const DexCompilationUnit& outer_compilation_unit_;
308   const DexCompilationUnit& caller_compilation_unit_;
309   CodeGenerator* const codegen_;
310   const size_t total_number_of_dex_registers_;
311   size_t total_number_of_instructions_;
312 
313   // The 'parent' inliner, that means the inlinigng optimization that requested
314   // `graph_` to be inlined.
315   const HInliner* const parent_;
316   const size_t depth_;
317 
318   // The budget left for inlining, in number of instructions.
319   size_t inlining_budget_;
320 
321   // Used to record stats about optimizations on the inlined graph.
322   // If the inlining is successful, these stats are merged to the caller graph's stats.
323   OptimizingCompilerStats* inline_stats_;
324 
325   DISALLOW_COPY_AND_ASSIGN(HInliner);
326 };
327 
328 }  // namespace art
329 
330 #endif  // ART_COMPILER_OPTIMIZING_INLINER_H_
331