1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "inliner.h"
18
19 #include "art_method-inl.h"
20 #include "base/enums.h"
21 #include "builder.h"
22 #include "class_linker.h"
23 #include "constant_folding.h"
24 #include "dead_code_elimination.h"
25 #include "dex/inline_method_analyser.h"
26 #include "dex/verified_method.h"
27 #include "dex/verification_results.h"
28 #include "driver/compiler_driver-inl.h"
29 #include "driver/compiler_options.h"
30 #include "driver/dex_compilation_unit.h"
31 #include "instruction_simplifier.h"
32 #include "intrinsics.h"
33 #include "jit/jit.h"
34 #include "jit/jit_code_cache.h"
35 #include "mirror/class_loader.h"
36 #include "mirror/dex_cache.h"
37 #include "nodes.h"
38 #include "optimizing_compiler.h"
39 #include "reference_type_propagation.h"
40 #include "register_allocator_linear_scan.h"
41 #include "sharpening.h"
42 #include "ssa_builder.h"
43 #include "ssa_phi_elimination.h"
44 #include "scoped_thread_state_change-inl.h"
45 #include "thread.h"
46
47 namespace art {
48
49 // Instruction limit to control memory.
50 static constexpr size_t kMaximumNumberOfTotalInstructions = 1024;
51
52 // Maximum number of instructions for considering a method small,
53 // which we will always try to inline if the other non-instruction limits
54 // are not reached.
55 static constexpr size_t kMaximumNumberOfInstructionsForSmallMethod = 3;
56
57 // Limit the number of dex registers that we accumulate while inlining
58 // to avoid creating large amount of nested environments.
59 static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 32;
60
61 // Limit recursive call inlining, which do not benefit from too
62 // much inlining compared to code locality.
63 static constexpr size_t kMaximumNumberOfRecursiveCalls = 4;
64
65 // Controls the use of inline caches in AOT mode.
66 static constexpr bool kUseAOTInlineCaches = true;
67
68 // We check for line numbers to make sure the DepthString implementation
69 // aligns the output nicely.
70 #define LOG_INTERNAL(msg) \
71 static_assert(__LINE__ > 10, "Unhandled line number"); \
72 static_assert(__LINE__ < 10000, "Unhandled line number"); \
73 VLOG(compiler) << DepthString(__LINE__) << msg
74
75 #define LOG_TRY() LOG_INTERNAL("Try inlinining call: ")
76 #define LOG_NOTE() LOG_INTERNAL("Note: ")
77 #define LOG_SUCCESS() LOG_INTERNAL("Success: ")
78 #define LOG_FAIL(stat) MaybeRecordStat(stat); LOG_INTERNAL("Fail: ")
79 #define LOG_FAIL_NO_STAT() LOG_INTERNAL("Fail: ")
80
DepthString(int line) const81 std::string HInliner::DepthString(int line) const {
82 std::string value;
83 // Indent according to the inlining depth.
84 size_t count = depth_;
85 // Line numbers get printed in the log, so add a space if the log's line is less
86 // than 1000, and two if less than 100. 10 cannot be reached as it's the copyright.
87 if (!kIsTargetBuild) {
88 if (line < 100) {
89 value += " ";
90 }
91 if (line < 1000) {
92 value += " ";
93 }
94 // Safeguard if this file reaches more than 10000 lines.
95 DCHECK_LT(line, 10000);
96 }
97 for (size_t i = 0; i < count; ++i) {
98 value += " ";
99 }
100 return value;
101 }
102
CountNumberOfInstructions(HGraph * graph)103 static size_t CountNumberOfInstructions(HGraph* graph) {
104 size_t number_of_instructions = 0;
105 for (HBasicBlock* block : graph->GetReversePostOrderSkipEntryBlock()) {
106 for (HInstructionIterator instr_it(block->GetInstructions());
107 !instr_it.Done();
108 instr_it.Advance()) {
109 ++number_of_instructions;
110 }
111 }
112 return number_of_instructions;
113 }
114
UpdateInliningBudget()115 void HInliner::UpdateInliningBudget() {
116 if (total_number_of_instructions_ >= kMaximumNumberOfTotalInstructions) {
117 // Always try to inline small methods.
118 inlining_budget_ = kMaximumNumberOfInstructionsForSmallMethod;
119 } else {
120 inlining_budget_ = std::max(
121 kMaximumNumberOfInstructionsForSmallMethod,
122 kMaximumNumberOfTotalInstructions - total_number_of_instructions_);
123 }
124 }
125
Run()126 void HInliner::Run() {
127 if (graph_->IsDebuggable()) {
128 // For simplicity, we currently never inline when the graph is debuggable. This avoids
129 // doing some logic in the runtime to discover if a method could have been inlined.
130 return;
131 }
132
133 // Initialize the number of instructions for the method being compiled. Recursive calls
134 // to HInliner::Run have already updated the instruction count.
135 if (outermost_graph_ == graph_) {
136 total_number_of_instructions_ = CountNumberOfInstructions(graph_);
137 }
138
139 UpdateInliningBudget();
140 DCHECK_NE(total_number_of_instructions_, 0u);
141 DCHECK_NE(inlining_budget_, 0u);
142
143 // Keep a copy of all blocks when starting the visit.
144 ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder();
145 DCHECK(!blocks.empty());
146 // Because we are changing the graph when inlining,
147 // we just iterate over the blocks of the outer method.
148 // This avoids doing the inlining work again on the inlined blocks.
149 for (HBasicBlock* block : blocks) {
150 for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) {
151 HInstruction* next = instruction->GetNext();
152 HInvoke* call = instruction->AsInvoke();
153 // As long as the call is not intrinsified, it is worth trying to inline.
154 if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) {
155 if (kIsDebugBuild && IsCompilingWithCoreImage()) {
156 // Debugging case: directives in method names control or assert on inlining.
157 std::string callee_name = outer_compilation_unit_.GetDexFile()->PrettyMethod(
158 call->GetDexMethodIndex(), /* with_signature */ false);
159 // Tests prevent inlining by having $noinline$ in their method names.
160 if (callee_name.find("$noinline$") == std::string::npos) {
161 if (!TryInline(call)) {
162 bool should_have_inlined = (callee_name.find("$inline$") != std::string::npos);
163 CHECK(!should_have_inlined) << "Could not inline " << callee_name;
164 }
165 }
166 } else {
167 // Normal case: try to inline.
168 TryInline(call);
169 }
170 }
171 instruction = next;
172 }
173 }
174 }
175
IsMethodOrDeclaringClassFinal(ArtMethod * method)176 static bool IsMethodOrDeclaringClassFinal(ArtMethod* method)
177 REQUIRES_SHARED(Locks::mutator_lock_) {
178 return method->IsFinal() || method->GetDeclaringClass()->IsFinal();
179 }
180
181 /**
182 * Given the `resolved_method` looked up in the dex cache, try to find
183 * the actual runtime target of an interface or virtual call.
184 * Return nullptr if the runtime target cannot be proven.
185 */
FindVirtualOrInterfaceTarget(HInvoke * invoke,ArtMethod * resolved_method)186 static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resolved_method)
187 REQUIRES_SHARED(Locks::mutator_lock_) {
188 if (IsMethodOrDeclaringClassFinal(resolved_method)) {
189 // No need to lookup further, the resolved method will be the target.
190 return resolved_method;
191 }
192
193 HInstruction* receiver = invoke->InputAt(0);
194 if (receiver->IsNullCheck()) {
195 // Due to multiple levels of inlining within the same pass, it might be that
196 // null check does not have the reference type of the actual receiver.
197 receiver = receiver->InputAt(0);
198 }
199 ReferenceTypeInfo info = receiver->GetReferenceTypeInfo();
200 DCHECK(info.IsValid()) << "Invalid RTI for " << receiver->DebugName();
201 if (!info.IsExact()) {
202 // We currently only support inlining with known receivers.
203 // TODO: Remove this check, we should be able to inline final methods
204 // on unknown receivers.
205 return nullptr;
206 } else if (info.GetTypeHandle()->IsInterface()) {
207 // Statically knowing that the receiver has an interface type cannot
208 // help us find what is the target method.
209 return nullptr;
210 } else if (!resolved_method->GetDeclaringClass()->IsAssignableFrom(info.GetTypeHandle().Get())) {
211 // The method that we're trying to call is not in the receiver's class or super classes.
212 return nullptr;
213 } else if (info.GetTypeHandle()->IsErroneous()) {
214 // If the type is erroneous, do not go further, as we are going to query the vtable or
215 // imt table, that we can only safely do on non-erroneous classes.
216 return nullptr;
217 }
218
219 ClassLinker* cl = Runtime::Current()->GetClassLinker();
220 PointerSize pointer_size = cl->GetImagePointerSize();
221 if (invoke->IsInvokeInterface()) {
222 resolved_method = info.GetTypeHandle()->FindVirtualMethodForInterface(
223 resolved_method, pointer_size);
224 } else {
225 DCHECK(invoke->IsInvokeVirtual());
226 resolved_method = info.GetTypeHandle()->FindVirtualMethodForVirtual(
227 resolved_method, pointer_size);
228 }
229
230 if (resolved_method == nullptr) {
231 // The information we had on the receiver was not enough to find
232 // the target method. Since we check above the exact type of the receiver,
233 // the only reason this can happen is an IncompatibleClassChangeError.
234 return nullptr;
235 } else if (!resolved_method->IsInvokable()) {
236 // The information we had on the receiver was not enough to find
237 // the target method. Since we check above the exact type of the receiver,
238 // the only reason this can happen is an IncompatibleClassChangeError.
239 return nullptr;
240 } else if (IsMethodOrDeclaringClassFinal(resolved_method)) {
241 // A final method has to be the target method.
242 return resolved_method;
243 } else if (info.IsExact()) {
244 // If we found a method and the receiver's concrete type is statically
245 // known, we know for sure the target.
246 return resolved_method;
247 } else {
248 // Even if we did find a method, the receiver type was not enough to
249 // statically find the runtime target.
250 return nullptr;
251 }
252 }
253
FindMethodIndexIn(ArtMethod * method,const DexFile & dex_file,uint32_t name_and_signature_index)254 static uint32_t FindMethodIndexIn(ArtMethod* method,
255 const DexFile& dex_file,
256 uint32_t name_and_signature_index)
257 REQUIRES_SHARED(Locks::mutator_lock_) {
258 if (IsSameDexFile(*method->GetDexFile(), dex_file)) {
259 return method->GetDexMethodIndex();
260 } else {
261 return method->FindDexMethodIndexInOtherDexFile(dex_file, name_and_signature_index);
262 }
263 }
264
FindClassIndexIn(mirror::Class * cls,const DexCompilationUnit & compilation_unit)265 static dex::TypeIndex FindClassIndexIn(mirror::Class* cls,
266 const DexCompilationUnit& compilation_unit)
267 REQUIRES_SHARED(Locks::mutator_lock_) {
268 const DexFile& dex_file = *compilation_unit.GetDexFile();
269 dex::TypeIndex index;
270 if (cls->GetDexCache() == nullptr) {
271 DCHECK(cls->IsArrayClass()) << cls->PrettyClass();
272 index = cls->FindTypeIndexInOtherDexFile(dex_file);
273 } else if (!cls->GetDexTypeIndex().IsValid()) {
274 DCHECK(cls->IsProxyClass()) << cls->PrettyClass();
275 // TODO: deal with proxy classes.
276 } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
277 DCHECK_EQ(cls->GetDexCache(), compilation_unit.GetDexCache().Get());
278 index = cls->GetDexTypeIndex();
279 } else {
280 index = cls->FindTypeIndexInOtherDexFile(dex_file);
281 // We cannot guarantee the entry will resolve to the same class,
282 // as there may be different class loaders. So only return the index if it's
283 // the right class already resolved with the class loader.
284 if (index.IsValid()) {
285 ObjPtr<mirror::Class> resolved = ClassLinker::LookupResolvedType(
286 index, compilation_unit.GetDexCache().Get(), compilation_unit.GetClassLoader().Get());
287 if (resolved != cls) {
288 index = dex::TypeIndex::Invalid();
289 }
290 }
291 }
292
293 return index;
294 }
295
296 class ScopedProfilingInfoInlineUse {
297 public:
ScopedProfilingInfoInlineUse(ArtMethod * method,Thread * self)298 explicit ScopedProfilingInfoInlineUse(ArtMethod* method, Thread* self)
299 : method_(method),
300 self_(self),
301 // Fetch the profiling info ahead of using it. If it's null when fetching,
302 // we should not call JitCodeCache::DoneInlining.
303 profiling_info_(
304 Runtime::Current()->GetJit()->GetCodeCache()->NotifyCompilerUse(method, self)) {
305 }
306
~ScopedProfilingInfoInlineUse()307 ~ScopedProfilingInfoInlineUse() {
308 if (profiling_info_ != nullptr) {
309 PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
310 DCHECK_EQ(profiling_info_, method_->GetProfilingInfo(pointer_size));
311 Runtime::Current()->GetJit()->GetCodeCache()->DoneCompilerUse(method_, self_);
312 }
313 }
314
GetProfilingInfo() const315 ProfilingInfo* GetProfilingInfo() const { return profiling_info_; }
316
317 private:
318 ArtMethod* const method_;
319 Thread* const self_;
320 ProfilingInfo* const profiling_info_;
321 };
322
GetInlineCacheType(const Handle<mirror::ObjectArray<mirror::Class>> & classes)323 HInliner::InlineCacheType HInliner::GetInlineCacheType(
324 const Handle<mirror::ObjectArray<mirror::Class>>& classes)
325 REQUIRES_SHARED(Locks::mutator_lock_) {
326 uint8_t number_of_types = 0;
327 for (; number_of_types < InlineCache::kIndividualCacheSize; ++number_of_types) {
328 if (classes->Get(number_of_types) == nullptr) {
329 break;
330 }
331 }
332
333 if (number_of_types == 0) {
334 return kInlineCacheUninitialized;
335 } else if (number_of_types == 1) {
336 return kInlineCacheMonomorphic;
337 } else if (number_of_types == InlineCache::kIndividualCacheSize) {
338 return kInlineCacheMegamorphic;
339 } else {
340 return kInlineCachePolymorphic;
341 }
342 }
343
GetMonomorphicType(Handle<mirror::ObjectArray<mirror::Class>> classes)344 static mirror::Class* GetMonomorphicType(Handle<mirror::ObjectArray<mirror::Class>> classes)
345 REQUIRES_SHARED(Locks::mutator_lock_) {
346 DCHECK(classes->Get(0) != nullptr);
347 return classes->Get(0);
348 }
349
TryCHADevirtualization(ArtMethod * resolved_method)350 ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) {
351 if (!resolved_method->HasSingleImplementation()) {
352 return nullptr;
353 }
354 if (Runtime::Current()->IsAotCompiler()) {
355 // No CHA-based devirtulization for AOT compiler (yet).
356 return nullptr;
357 }
358 if (outermost_graph_->IsCompilingOsr()) {
359 // We do not support HDeoptimize in OSR methods.
360 return nullptr;
361 }
362 PointerSize pointer_size = caller_compilation_unit_.GetClassLinker()->GetImagePointerSize();
363 ArtMethod* single_impl = resolved_method->GetSingleImplementation(pointer_size);
364 if (single_impl == nullptr) {
365 return nullptr;
366 }
367 if (single_impl->IsProxyMethod()) {
368 // Proxy method is a generic invoker that's not worth
369 // devirtualizing/inlining. It also causes issues when the proxy
370 // method is in another dex file if we try to rewrite invoke-interface to
371 // invoke-virtual because a proxy method doesn't have a real dex file.
372 return nullptr;
373 }
374 if (!single_impl->GetDeclaringClass()->IsResolved()) {
375 // There's a race with the class loading, which updates the CHA info
376 // before setting the class to resolved. So we just bail for this
377 // rare occurence.
378 return nullptr;
379 }
380 return single_impl;
381 }
382
TryInline(HInvoke * invoke_instruction)383 bool HInliner::TryInline(HInvoke* invoke_instruction) {
384 if (invoke_instruction->IsInvokeUnresolved() ||
385 invoke_instruction->IsInvokePolymorphic()) {
386 return false; // Don't bother to move further if we know the method is unresolved or an
387 // invoke-polymorphic.
388 }
389
390 ScopedObjectAccess soa(Thread::Current());
391 uint32_t method_index = invoke_instruction->GetDexMethodIndex();
392 const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
393 LOG_TRY() << caller_dex_file.PrettyMethod(method_index);
394
395 ArtMethod* resolved_method = invoke_instruction->GetResolvedMethod();
396 if (resolved_method == nullptr) {
397 DCHECK(invoke_instruction->IsInvokeStaticOrDirect());
398 DCHECK(invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit());
399 LOG_FAIL_NO_STAT() << "Not inlining a String.<init> method";
400 return false;
401 }
402 ArtMethod* actual_method = nullptr;
403
404 if (invoke_instruction->IsInvokeStaticOrDirect()) {
405 actual_method = resolved_method;
406 } else {
407 // Check if we can statically find the method.
408 actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
409 }
410
411 bool cha_devirtualize = false;
412 if (actual_method == nullptr) {
413 ArtMethod* method = TryCHADevirtualization(resolved_method);
414 if (method != nullptr) {
415 cha_devirtualize = true;
416 actual_method = method;
417 LOG_NOTE() << "Try CHA-based inlining of " << actual_method->PrettyMethod();
418 }
419 }
420
421 if (actual_method != nullptr) {
422 bool result = TryInlineAndReplace(invoke_instruction,
423 actual_method,
424 ReferenceTypeInfo::CreateInvalid(),
425 /* do_rtp */ true,
426 cha_devirtualize);
427 if (result && !invoke_instruction->IsInvokeStaticOrDirect()) {
428 if (cha_devirtualize) {
429 // Add dependency due to devirtulization. We've assumed resolved_method
430 // has single implementation.
431 outermost_graph_->AddCHASingleImplementationDependency(resolved_method);
432 MaybeRecordStat(kCHAInline);
433 } else {
434 MaybeRecordStat(kInlinedInvokeVirtualOrInterface);
435 }
436 }
437 return result;
438 }
439 DCHECK(!invoke_instruction->IsInvokeStaticOrDirect());
440
441 // Try using inline caches.
442 return TryInlineFromInlineCache(caller_dex_file, invoke_instruction, resolved_method);
443 }
444
AllocateInlineCacheHolder(const DexCompilationUnit & compilation_unit,StackHandleScope<1> * hs)445 static Handle<mirror::ObjectArray<mirror::Class>> AllocateInlineCacheHolder(
446 const DexCompilationUnit& compilation_unit,
447 StackHandleScope<1>* hs)
448 REQUIRES_SHARED(Locks::mutator_lock_) {
449 Thread* self = Thread::Current();
450 ClassLinker* class_linker = compilation_unit.GetClassLinker();
451 Handle<mirror::ObjectArray<mirror::Class>> inline_cache = hs->NewHandle(
452 mirror::ObjectArray<mirror::Class>::Alloc(
453 self,
454 class_linker->GetClassRoot(ClassLinker::kClassArrayClass),
455 InlineCache::kIndividualCacheSize));
456 if (inline_cache == nullptr) {
457 // We got an OOME. Just clear the exception, and don't inline.
458 DCHECK(self->IsExceptionPending());
459 self->ClearException();
460 VLOG(compiler) << "Out of memory in the compiler when trying to inline";
461 }
462 return inline_cache;
463 }
464
UseOnlyPolymorphicInliningWithNoDeopt()465 bool HInliner::UseOnlyPolymorphicInliningWithNoDeopt() {
466 // If we are compiling AOT or OSR, pretend the call using inline caches is polymorphic and
467 // do not generate a deopt.
468 //
469 // For AOT:
470 // Generating a deopt does not ensure that we will actually capture the new types;
471 // and the danger is that we could be stuck in a loop with "forever" deoptimizations.
472 // Take for example the following scenario:
473 // - we capture the inline cache in one run
474 // - the next run, we deoptimize because we miss a type check, but the method
475 // never becomes hot again
476 // In this case, the inline cache will not be updated in the profile and the AOT code
477 // will keep deoptimizing.
478 // Another scenario is if we use profile compilation for a process which is not allowed
479 // to JIT (e.g. system server). If we deoptimize we will run interpreted code for the
480 // rest of the lifetime.
481 // TODO(calin):
482 // This is a compromise because we will most likely never update the inline cache
483 // in the profile (unless there's another reason to deopt). So we might be stuck with
484 // a sub-optimal inline cache.
485 // We could be smarter when capturing inline caches to mitigate this.
486 // (e.g. by having different thresholds for new and old methods).
487 //
488 // For OSR:
489 // We may come from the interpreter and it may have seen different receiver types.
490 return Runtime::Current()->IsAotCompiler() || outermost_graph_->IsCompilingOsr();
491 }
TryInlineFromInlineCache(const DexFile & caller_dex_file,HInvoke * invoke_instruction,ArtMethod * resolved_method)492 bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file,
493 HInvoke* invoke_instruction,
494 ArtMethod* resolved_method)
495 REQUIRES_SHARED(Locks::mutator_lock_) {
496 if (Runtime::Current()->IsAotCompiler() && !kUseAOTInlineCaches) {
497 return false;
498 }
499
500 StackHandleScope<1> hs(Thread::Current());
501 Handle<mirror::ObjectArray<mirror::Class>> inline_cache;
502 InlineCacheType inline_cache_type = Runtime::Current()->IsAotCompiler()
503 ? GetInlineCacheAOT(caller_dex_file, invoke_instruction, &hs, &inline_cache)
504 : GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache);
505
506 switch (inline_cache_type) {
507 case kInlineCacheNoData: {
508 LOG_FAIL_NO_STAT()
509 << "Interface or virtual call to "
510 << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
511 << " could not be statically determined";
512 return false;
513 }
514
515 case kInlineCacheUninitialized: {
516 LOG_FAIL_NO_STAT()
517 << "Interface or virtual call to "
518 << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
519 << " is not hit and not inlined";
520 return false;
521 }
522
523 case kInlineCacheMonomorphic: {
524 MaybeRecordStat(kMonomorphicCall);
525 if (UseOnlyPolymorphicInliningWithNoDeopt()) {
526 return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
527 } else {
528 return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache);
529 }
530 }
531
532 case kInlineCachePolymorphic: {
533 MaybeRecordStat(kPolymorphicCall);
534 return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
535 }
536
537 case kInlineCacheMegamorphic: {
538 LOG_FAIL_NO_STAT()
539 << "Interface or virtual call to "
540 << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
541 << " is megamorphic and not inlined";
542 MaybeRecordStat(kMegamorphicCall);
543 return false;
544 }
545
546 case kInlineCacheMissingTypes: {
547 LOG_FAIL_NO_STAT()
548 << "Interface or virtual call to "
549 << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
550 << " is missing types and not inlined";
551 return false;
552 }
553 }
554 UNREACHABLE();
555 }
556
GetInlineCacheJIT(HInvoke * invoke_instruction,StackHandleScope<1> * hs,Handle<mirror::ObjectArray<mirror::Class>> * inline_cache)557 HInliner::InlineCacheType HInliner::GetInlineCacheJIT(
558 HInvoke* invoke_instruction,
559 StackHandleScope<1>* hs,
560 /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
561 REQUIRES_SHARED(Locks::mutator_lock_) {
562 DCHECK(Runtime::Current()->UseJitCompilation());
563
564 ArtMethod* caller = graph_->GetArtMethod();
565 // Under JIT, we should always know the caller.
566 DCHECK(caller != nullptr);
567 ScopedProfilingInfoInlineUse spiis(caller, Thread::Current());
568 ProfilingInfo* profiling_info = spiis.GetProfilingInfo();
569
570 if (profiling_info == nullptr) {
571 return kInlineCacheNoData;
572 }
573
574 *inline_cache = AllocateInlineCacheHolder(caller_compilation_unit_, hs);
575 if (inline_cache->Get() == nullptr) {
576 // We can't extract any data if we failed to allocate;
577 return kInlineCacheNoData;
578 } else {
579 Runtime::Current()->GetJit()->GetCodeCache()->CopyInlineCacheInto(
580 *profiling_info->GetInlineCache(invoke_instruction->GetDexPc()),
581 *inline_cache);
582 return GetInlineCacheType(*inline_cache);
583 }
584 }
585
GetInlineCacheAOT(const DexFile & caller_dex_file,HInvoke * invoke_instruction,StackHandleScope<1> * hs,Handle<mirror::ObjectArray<mirror::Class>> * inline_cache)586 HInliner::InlineCacheType HInliner::GetInlineCacheAOT(
587 const DexFile& caller_dex_file,
588 HInvoke* invoke_instruction,
589 StackHandleScope<1>* hs,
590 /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
591 REQUIRES_SHARED(Locks::mutator_lock_) {
592 DCHECK(Runtime::Current()->IsAotCompiler());
593 const ProfileCompilationInfo* pci = compiler_driver_->GetProfileCompilationInfo();
594 if (pci == nullptr) {
595 return kInlineCacheNoData;
596 }
597
598 std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> offline_profile =
599 pci->GetMethod(caller_dex_file.GetLocation(),
600 caller_dex_file.GetLocationChecksum(),
601 caller_compilation_unit_.GetDexMethodIndex());
602 if (offline_profile == nullptr) {
603 return kInlineCacheNoData; // no profile information for this invocation.
604 }
605
606 *inline_cache = AllocateInlineCacheHolder(caller_compilation_unit_, hs);
607 if (inline_cache == nullptr) {
608 // We can't extract any data if we failed to allocate;
609 return kInlineCacheNoData;
610 } else {
611 return ExtractClassesFromOfflineProfile(invoke_instruction,
612 *(offline_profile.get()),
613 *inline_cache);
614 }
615 }
616
ExtractClassesFromOfflineProfile(const HInvoke * invoke_instruction,const ProfileCompilationInfo::OfflineProfileMethodInfo & offline_profile,Handle<mirror::ObjectArray<mirror::Class>> inline_cache)617 HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile(
618 const HInvoke* invoke_instruction,
619 const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile,
620 /*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache)
621 REQUIRES_SHARED(Locks::mutator_lock_) {
622 const auto it = offline_profile.inline_caches->find(invoke_instruction->GetDexPc());
623 if (it == offline_profile.inline_caches->end()) {
624 return kInlineCacheUninitialized;
625 }
626
627 const ProfileCompilationInfo::DexPcData& dex_pc_data = it->second;
628
629 if (dex_pc_data.is_missing_types) {
630 return kInlineCacheMissingTypes;
631 }
632 if (dex_pc_data.is_megamorphic) {
633 return kInlineCacheMegamorphic;
634 }
635
636 DCHECK_LE(dex_pc_data.classes.size(), InlineCache::kIndividualCacheSize);
637 Thread* self = Thread::Current();
638 // We need to resolve the class relative to the containing dex file.
639 // So first, build a mapping from the index of dex file in the profile to
640 // its dex cache. This will avoid repeating the lookup when walking over
641 // the inline cache types.
642 std::vector<ObjPtr<mirror::DexCache>> dex_profile_index_to_dex_cache(
643 offline_profile.dex_references.size());
644 for (size_t i = 0; i < offline_profile.dex_references.size(); i++) {
645 bool found = false;
646 for (const DexFile* dex_file : compiler_driver_->GetDexFilesForOatFile()) {
647 if (offline_profile.dex_references[i].MatchesDex(dex_file)) {
648 dex_profile_index_to_dex_cache[i] =
649 caller_compilation_unit_.GetClassLinker()->FindDexCache(self, *dex_file);
650 found = true;
651 }
652 }
653 if (!found) {
654 VLOG(compiler) << "Could not find profiled dex file: "
655 << offline_profile.dex_references[i].dex_location;
656 return kInlineCacheMissingTypes;
657 }
658 }
659
660 // Walk over the classes and resolve them. If we cannot find a type we return
661 // kInlineCacheMissingTypes.
662 int ic_index = 0;
663 for (const ProfileCompilationInfo::ClassReference& class_ref : dex_pc_data.classes) {
664 ObjPtr<mirror::DexCache> dex_cache =
665 dex_profile_index_to_dex_cache[class_ref.dex_profile_index];
666 DCHECK(dex_cache != nullptr);
667
668 if (!dex_cache->GetDexFile()->IsTypeIndexValid(class_ref.type_index)) {
669 VLOG(compiler) << "Profile data corrupt: type index " << class_ref.type_index
670 << "is invalid in location" << dex_cache->GetDexFile()->GetLocation();
671 return kInlineCacheNoData;
672 }
673 ObjPtr<mirror::Class> clazz = ClassLinker::LookupResolvedType(
674 class_ref.type_index,
675 dex_cache,
676 caller_compilation_unit_.GetClassLoader().Get());
677 if (clazz != nullptr) {
678 inline_cache->Set(ic_index++, clazz);
679 } else {
680 VLOG(compiler) << "Could not resolve class from inline cache in AOT mode "
681 << caller_compilation_unit_.GetDexFile()->PrettyMethod(
682 invoke_instruction->GetDexMethodIndex()) << " : "
683 << caller_compilation_unit_
684 .GetDexFile()->StringByTypeIdx(class_ref.type_index);
685 return kInlineCacheMissingTypes;
686 }
687 }
688 return GetInlineCacheType(inline_cache);
689 }
690
BuildGetReceiverClass(ClassLinker * class_linker,HInstruction * receiver,uint32_t dex_pc) const691 HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker,
692 HInstruction* receiver,
693 uint32_t dex_pc) const {
694 ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
695 DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
696 HInstanceFieldGet* result = new (graph_->GetArena()) HInstanceFieldGet(
697 receiver,
698 field,
699 Primitive::kPrimNot,
700 field->GetOffset(),
701 field->IsVolatile(),
702 field->GetDexFieldIndex(),
703 field->GetDeclaringClass()->GetDexClassDefIndex(),
704 *field->GetDexFile(),
705 dex_pc);
706 // The class of a field is effectively final, and does not have any memory dependencies.
707 result->SetSideEffects(SideEffects::None());
708 return result;
709 }
710
ResolveMethodFromInlineCache(Handle<mirror::Class> klass,ArtMethod * resolved_method,HInstruction * invoke_instruction,PointerSize pointer_size)711 static ArtMethod* ResolveMethodFromInlineCache(Handle<mirror::Class> klass,
712 ArtMethod* resolved_method,
713 HInstruction* invoke_instruction,
714 PointerSize pointer_size)
715 REQUIRES_SHARED(Locks::mutator_lock_) {
716 if (Runtime::Current()->IsAotCompiler()) {
717 // We can get unrelated types when working with profiles (corruption,
718 // systme updates, or anyone can write to it). So first check if the class
719 // actually implements the declaring class of the method that is being
720 // called in bytecode.
721 // Note: the lookup methods used below require to have assignable types.
722 if (!resolved_method->GetDeclaringClass()->IsAssignableFrom(klass.Get())) {
723 return nullptr;
724 }
725 }
726
727 if (invoke_instruction->IsInvokeInterface()) {
728 resolved_method = klass->FindVirtualMethodForInterface(resolved_method, pointer_size);
729 } else {
730 DCHECK(invoke_instruction->IsInvokeVirtual());
731 resolved_method = klass->FindVirtualMethodForVirtual(resolved_method, pointer_size);
732 }
733 DCHECK(resolved_method != nullptr);
734 return resolved_method;
735 }
736
TryInlineMonomorphicCall(HInvoke * invoke_instruction,ArtMethod * resolved_method,Handle<mirror::ObjectArray<mirror::Class>> classes)737 bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
738 ArtMethod* resolved_method,
739 Handle<mirror::ObjectArray<mirror::Class>> classes) {
740 DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface())
741 << invoke_instruction->DebugName();
742
743 dex::TypeIndex class_index = FindClassIndexIn(
744 GetMonomorphicType(classes), caller_compilation_unit_);
745 if (!class_index.IsValid()) {
746 LOG_FAIL(kNotInlinedDexCache)
747 << "Call to " << ArtMethod::PrettyMethod(resolved_method)
748 << " from inline cache is not inlined because its class is not"
749 << " accessible to the caller";
750 return false;
751 }
752
753 ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
754 PointerSize pointer_size = class_linker->GetImagePointerSize();
755 Handle<mirror::Class> monomorphic_type = handles_->NewHandle(GetMonomorphicType(classes));
756 resolved_method = ResolveMethodFromInlineCache(
757 monomorphic_type, resolved_method, invoke_instruction, pointer_size);
758
759 LOG_NOTE() << "Try inline monomorphic call to " << resolved_method->PrettyMethod();
760 if (resolved_method == nullptr) {
761 // Bogus AOT profile, bail.
762 DCHECK(Runtime::Current()->IsAotCompiler());
763 return false;
764 }
765
766 HInstruction* receiver = invoke_instruction->InputAt(0);
767 HInstruction* cursor = invoke_instruction->GetPrevious();
768 HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
769 if (!TryInlineAndReplace(invoke_instruction,
770 resolved_method,
771 ReferenceTypeInfo::Create(monomorphic_type, /* is_exact */ true),
772 /* do_rtp */ false,
773 /* cha_devirtualize */ false)) {
774 return false;
775 }
776
777 // We successfully inlined, now add a guard.
778 AddTypeGuard(receiver,
779 cursor,
780 bb_cursor,
781 class_index,
782 monomorphic_type,
783 invoke_instruction,
784 /* with_deoptimization */ true);
785
786 // Run type propagation to get the guard typed, and eventually propagate the
787 // type of the receiver.
788 ReferenceTypePropagation rtp_fixup(graph_,
789 outer_compilation_unit_.GetClassLoader(),
790 outer_compilation_unit_.GetDexCache(),
791 handles_,
792 /* is_first_run */ false);
793 rtp_fixup.Run();
794
795 MaybeRecordStat(kInlinedMonomorphicCall);
796 return true;
797 }
798
AddCHAGuard(HInstruction * invoke_instruction,uint32_t dex_pc,HInstruction * cursor,HBasicBlock * bb_cursor)799 void HInliner::AddCHAGuard(HInstruction* invoke_instruction,
800 uint32_t dex_pc,
801 HInstruction* cursor,
802 HBasicBlock* bb_cursor) {
803 HShouldDeoptimizeFlag* deopt_flag = new (graph_->GetArena())
804 HShouldDeoptimizeFlag(graph_->GetArena(), dex_pc);
805 HInstruction* compare = new (graph_->GetArena()) HNotEqual(
806 deopt_flag, graph_->GetIntConstant(0, dex_pc));
807 HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(
808 graph_->GetArena(), compare, DeoptimizationKind::kCHA, dex_pc);
809
810 if (cursor != nullptr) {
811 bb_cursor->InsertInstructionAfter(deopt_flag, cursor);
812 } else {
813 bb_cursor->InsertInstructionBefore(deopt_flag, bb_cursor->GetFirstInstruction());
814 }
815 bb_cursor->InsertInstructionAfter(compare, deopt_flag);
816 bb_cursor->InsertInstructionAfter(deopt, compare);
817
818 // Add receiver as input to aid CHA guard optimization later.
819 deopt_flag->AddInput(invoke_instruction->InputAt(0));
820 DCHECK_EQ(deopt_flag->InputCount(), 1u);
821 deopt->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
822 outermost_graph_->IncrementNumberOfCHAGuards();
823 }
824
AddTypeGuard(HInstruction * receiver,HInstruction * cursor,HBasicBlock * bb_cursor,dex::TypeIndex class_index,Handle<mirror::Class> klass,HInstruction * invoke_instruction,bool with_deoptimization)825 HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
826 HInstruction* cursor,
827 HBasicBlock* bb_cursor,
828 dex::TypeIndex class_index,
829 Handle<mirror::Class> klass,
830 HInstruction* invoke_instruction,
831 bool with_deoptimization) {
832 ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
833 HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
834 class_linker, receiver, invoke_instruction->GetDexPc());
835 if (cursor != nullptr) {
836 bb_cursor->InsertInstructionAfter(receiver_class, cursor);
837 } else {
838 bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
839 }
840
841 const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
842 bool is_referrer;
843 ArtMethod* outermost_art_method = outermost_graph_->GetArtMethod();
844 if (outermost_art_method == nullptr) {
845 DCHECK(Runtime::Current()->IsAotCompiler());
846 // We are in AOT mode and we don't have an ART method to determine
847 // if the inlined method belongs to the referrer. Assume it doesn't.
848 is_referrer = false;
849 } else {
850 is_referrer = klass.Get() == outermost_art_method->GetDeclaringClass();
851 }
852
853 // Note that we will just compare the classes, so we don't need Java semantics access checks.
854 // Note that the type index and the dex file are relative to the method this type guard is
855 // inlined into.
856 HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
857 class_index,
858 caller_dex_file,
859 klass,
860 is_referrer,
861 invoke_instruction->GetDexPc(),
862 /* needs_access_check */ false);
863 HLoadClass::LoadKind kind = HSharpening::ComputeLoadClassKind(
864 load_class, codegen_, compiler_driver_, caller_compilation_unit_);
865 DCHECK(kind != HLoadClass::LoadKind::kInvalid)
866 << "We should always be able to reference a class for inline caches";
867 // Insert before setting the kind, as setting the kind affects the inputs.
868 bb_cursor->InsertInstructionAfter(load_class, receiver_class);
869 load_class->SetLoadKind(kind);
870 // In AOT mode, we will most likely load the class from BSS, which will involve a call
871 // to the runtime. In this case, the load instruction will need an environment so copy
872 // it from the invoke instruction.
873 if (load_class->NeedsEnvironment()) {
874 DCHECK(Runtime::Current()->IsAotCompiler());
875 load_class->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
876 }
877
878 HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
879 bb_cursor->InsertInstructionAfter(compare, load_class);
880 if (with_deoptimization) {
881 HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
882 graph_->GetArena(),
883 compare,
884 receiver,
885 Runtime::Current()->IsAotCompiler()
886 ? DeoptimizationKind::kAotInlineCache
887 : DeoptimizationKind::kJitInlineCache,
888 invoke_instruction->GetDexPc());
889 bb_cursor->InsertInstructionAfter(deoptimize, compare);
890 deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
891 DCHECK_EQ(invoke_instruction->InputAt(0), receiver);
892 receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize);
893 deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo());
894 }
895 return compare;
896 }
897
TryInlinePolymorphicCall(HInvoke * invoke_instruction,ArtMethod * resolved_method,Handle<mirror::ObjectArray<mirror::Class>> classes)898 bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
899 ArtMethod* resolved_method,
900 Handle<mirror::ObjectArray<mirror::Class>> classes) {
901 DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface())
902 << invoke_instruction->DebugName();
903
904 if (TryInlinePolymorphicCallToSameTarget(invoke_instruction, resolved_method, classes)) {
905 return true;
906 }
907
908 ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
909 PointerSize pointer_size = class_linker->GetImagePointerSize();
910
911 bool all_targets_inlined = true;
912 bool one_target_inlined = false;
913 for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
914 if (classes->Get(i) == nullptr) {
915 break;
916 }
917 ArtMethod* method = nullptr;
918
919 Handle<mirror::Class> handle = handles_->NewHandle(classes->Get(i));
920 method = ResolveMethodFromInlineCache(
921 handle, resolved_method, invoke_instruction, pointer_size);
922 if (method == nullptr) {
923 DCHECK(Runtime::Current()->IsAotCompiler());
924 // AOT profile is bogus. This loop expects to iterate over all entries,
925 // so just just continue.
926 all_targets_inlined = false;
927 continue;
928 }
929
930 HInstruction* receiver = invoke_instruction->InputAt(0);
931 HInstruction* cursor = invoke_instruction->GetPrevious();
932 HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
933
934 dex::TypeIndex class_index = FindClassIndexIn(handle.Get(), caller_compilation_unit_);
935 HInstruction* return_replacement = nullptr;
936 LOG_NOTE() << "Try inline polymorphic call to " << method->PrettyMethod();
937 if (!class_index.IsValid() ||
938 !TryBuildAndInline(invoke_instruction,
939 method,
940 ReferenceTypeInfo::Create(handle, /* is_exact */ true),
941 &return_replacement)) {
942 all_targets_inlined = false;
943 } else {
944 one_target_inlined = true;
945
946 LOG_SUCCESS() << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method)
947 << " has inlined " << ArtMethod::PrettyMethod(method);
948
949 // If we have inlined all targets before, and this receiver is the last seen,
950 // we deoptimize instead of keeping the original invoke instruction.
951 bool deoptimize = !UseOnlyPolymorphicInliningWithNoDeopt() &&
952 all_targets_inlined &&
953 (i != InlineCache::kIndividualCacheSize - 1) &&
954 (classes->Get(i + 1) == nullptr);
955
956 HInstruction* compare = AddTypeGuard(receiver,
957 cursor,
958 bb_cursor,
959 class_index,
960 handle,
961 invoke_instruction,
962 deoptimize);
963 if (deoptimize) {
964 if (return_replacement != nullptr) {
965 invoke_instruction->ReplaceWith(return_replacement);
966 }
967 invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
968 // Because the inline cache data can be populated concurrently, we force the end of the
969 // iteration. Otherwise, we could see a new receiver type.
970 break;
971 } else {
972 CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
973 }
974 }
975 }
976
977 if (!one_target_inlined) {
978 LOG_FAIL_NO_STAT()
979 << "Call to " << ArtMethod::PrettyMethod(resolved_method)
980 << " from inline cache is not inlined because none"
981 << " of its targets could be inlined";
982 return false;
983 }
984
985 MaybeRecordStat(kInlinedPolymorphicCall);
986
987 // Run type propagation to get the guards typed.
988 ReferenceTypePropagation rtp_fixup(graph_,
989 outer_compilation_unit_.GetClassLoader(),
990 outer_compilation_unit_.GetDexCache(),
991 handles_,
992 /* is_first_run */ false);
993 rtp_fixup.Run();
994 return true;
995 }
996
CreateDiamondPatternForPolymorphicInline(HInstruction * compare,HInstruction * return_replacement,HInstruction * invoke_instruction)997 void HInliner::CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
998 HInstruction* return_replacement,
999 HInstruction* invoke_instruction) {
1000 uint32_t dex_pc = invoke_instruction->GetDexPc();
1001 HBasicBlock* cursor_block = compare->GetBlock();
1002 HBasicBlock* original_invoke_block = invoke_instruction->GetBlock();
1003 ArenaAllocator* allocator = graph_->GetArena();
1004
1005 // Spit the block after the compare: `cursor_block` will now be the start of the diamond,
1006 // and the returned block is the start of the then branch (that could contain multiple blocks).
1007 HBasicBlock* then = cursor_block->SplitAfterForInlining(compare);
1008
1009 // Split the block containing the invoke before and after the invoke. The returned block
1010 // of the split before will contain the invoke and will be the otherwise branch of
1011 // the diamond. The returned block of the split after will be the merge block
1012 // of the diamond.
1013 HBasicBlock* end_then = invoke_instruction->GetBlock();
1014 HBasicBlock* otherwise = end_then->SplitBeforeForInlining(invoke_instruction);
1015 HBasicBlock* merge = otherwise->SplitAfterForInlining(invoke_instruction);
1016
1017 // If the methods we are inlining return a value, we create a phi in the merge block
1018 // that will have the `invoke_instruction and the `return_replacement` as inputs.
1019 if (return_replacement != nullptr) {
1020 HPhi* phi = new (allocator) HPhi(
1021 allocator, kNoRegNumber, 0, HPhi::ToPhiType(invoke_instruction->GetType()), dex_pc);
1022 merge->AddPhi(phi);
1023 invoke_instruction->ReplaceWith(phi);
1024 phi->AddInput(return_replacement);
1025 phi->AddInput(invoke_instruction);
1026 }
1027
1028 // Add the control flow instructions.
1029 otherwise->AddInstruction(new (allocator) HGoto(dex_pc));
1030 end_then->AddInstruction(new (allocator) HGoto(dex_pc));
1031 cursor_block->AddInstruction(new (allocator) HIf(compare, dex_pc));
1032
1033 // Add the newly created blocks to the graph.
1034 graph_->AddBlock(then);
1035 graph_->AddBlock(otherwise);
1036 graph_->AddBlock(merge);
1037
1038 // Set up successor (and implictly predecessor) relations.
1039 cursor_block->AddSuccessor(otherwise);
1040 cursor_block->AddSuccessor(then);
1041 end_then->AddSuccessor(merge);
1042 otherwise->AddSuccessor(merge);
1043
1044 // Set up dominance information.
1045 then->SetDominator(cursor_block);
1046 cursor_block->AddDominatedBlock(then);
1047 otherwise->SetDominator(cursor_block);
1048 cursor_block->AddDominatedBlock(otherwise);
1049 merge->SetDominator(cursor_block);
1050 cursor_block->AddDominatedBlock(merge);
1051
1052 // Update the revert post order.
1053 size_t index = IndexOfElement(graph_->reverse_post_order_, cursor_block);
1054 MakeRoomFor(&graph_->reverse_post_order_, 1, index);
1055 graph_->reverse_post_order_[++index] = then;
1056 index = IndexOfElement(graph_->reverse_post_order_, end_then);
1057 MakeRoomFor(&graph_->reverse_post_order_, 2, index);
1058 graph_->reverse_post_order_[++index] = otherwise;
1059 graph_->reverse_post_order_[++index] = merge;
1060
1061
1062 graph_->UpdateLoopAndTryInformationOfNewBlock(
1063 then, original_invoke_block, /* replace_if_back_edge */ false);
1064 graph_->UpdateLoopAndTryInformationOfNewBlock(
1065 otherwise, original_invoke_block, /* replace_if_back_edge */ false);
1066
1067 // In case the original invoke location was a back edge, we need to update
1068 // the loop to now have the merge block as a back edge.
1069 graph_->UpdateLoopAndTryInformationOfNewBlock(
1070 merge, original_invoke_block, /* replace_if_back_edge */ true);
1071 }
1072
TryInlinePolymorphicCallToSameTarget(HInvoke * invoke_instruction,ArtMethod * resolved_method,Handle<mirror::ObjectArray<mirror::Class>> classes)1073 bool HInliner::TryInlinePolymorphicCallToSameTarget(
1074 HInvoke* invoke_instruction,
1075 ArtMethod* resolved_method,
1076 Handle<mirror::ObjectArray<mirror::Class>> classes) {
1077 // This optimization only works under JIT for now.
1078 if (!Runtime::Current()->UseJitCompilation()) {
1079 return false;
1080 }
1081
1082 ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
1083 PointerSize pointer_size = class_linker->GetImagePointerSize();
1084
1085 DCHECK(resolved_method != nullptr);
1086 ArtMethod* actual_method = nullptr;
1087 size_t method_index = invoke_instruction->IsInvokeVirtual()
1088 ? invoke_instruction->AsInvokeVirtual()->GetVTableIndex()
1089 : invoke_instruction->AsInvokeInterface()->GetImtIndex();
1090
1091 // Check whether we are actually calling the same method among
1092 // the different types seen.
1093 for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
1094 if (classes->Get(i) == nullptr) {
1095 break;
1096 }
1097 ArtMethod* new_method = nullptr;
1098 if (invoke_instruction->IsInvokeInterface()) {
1099 new_method = classes->Get(i)->GetImt(pointer_size)->Get(
1100 method_index, pointer_size);
1101 if (new_method->IsRuntimeMethod()) {
1102 // Bail out as soon as we see a conflict trampoline in one of the target's
1103 // interface table.
1104 return false;
1105 }
1106 } else {
1107 DCHECK(invoke_instruction->IsInvokeVirtual());
1108 new_method = classes->Get(i)->GetEmbeddedVTableEntry(method_index, pointer_size);
1109 }
1110 DCHECK(new_method != nullptr);
1111 if (actual_method == nullptr) {
1112 actual_method = new_method;
1113 } else if (actual_method != new_method) {
1114 // Different methods, bailout.
1115 return false;
1116 }
1117 }
1118
1119 HInstruction* receiver = invoke_instruction->InputAt(0);
1120 HInstruction* cursor = invoke_instruction->GetPrevious();
1121 HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
1122
1123 HInstruction* return_replacement = nullptr;
1124 if (!TryBuildAndInline(invoke_instruction,
1125 actual_method,
1126 ReferenceTypeInfo::CreateInvalid(),
1127 &return_replacement)) {
1128 return false;
1129 }
1130
1131 // We successfully inlined, now add a guard.
1132 HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
1133 class_linker, receiver, invoke_instruction->GetDexPc());
1134
1135 Primitive::Type type = Is64BitInstructionSet(graph_->GetInstructionSet())
1136 ? Primitive::kPrimLong
1137 : Primitive::kPrimInt;
1138 HClassTableGet* class_table_get = new (graph_->GetArena()) HClassTableGet(
1139 receiver_class,
1140 type,
1141 invoke_instruction->IsInvokeVirtual() ? HClassTableGet::TableKind::kVTable
1142 : HClassTableGet::TableKind::kIMTable,
1143 method_index,
1144 invoke_instruction->GetDexPc());
1145
1146 HConstant* constant;
1147 if (type == Primitive::kPrimLong) {
1148 constant = graph_->GetLongConstant(
1149 reinterpret_cast<intptr_t>(actual_method), invoke_instruction->GetDexPc());
1150 } else {
1151 constant = graph_->GetIntConstant(
1152 reinterpret_cast<intptr_t>(actual_method), invoke_instruction->GetDexPc());
1153 }
1154
1155 HNotEqual* compare = new (graph_->GetArena()) HNotEqual(class_table_get, constant);
1156 if (cursor != nullptr) {
1157 bb_cursor->InsertInstructionAfter(receiver_class, cursor);
1158 } else {
1159 bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
1160 }
1161 bb_cursor->InsertInstructionAfter(class_table_get, receiver_class);
1162 bb_cursor->InsertInstructionAfter(compare, class_table_get);
1163
1164 if (outermost_graph_->IsCompilingOsr()) {
1165 CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
1166 } else {
1167 HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
1168 graph_->GetArena(),
1169 compare,
1170 receiver,
1171 DeoptimizationKind::kJitSameTarget,
1172 invoke_instruction->GetDexPc());
1173 bb_cursor->InsertInstructionAfter(deoptimize, compare);
1174 deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
1175 if (return_replacement != nullptr) {
1176 invoke_instruction->ReplaceWith(return_replacement);
1177 }
1178 receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize);
1179 invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
1180 deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo());
1181 }
1182
1183 // Run type propagation to get the guard typed.
1184 ReferenceTypePropagation rtp_fixup(graph_,
1185 outer_compilation_unit_.GetClassLoader(),
1186 outer_compilation_unit_.GetDexCache(),
1187 handles_,
1188 /* is_first_run */ false);
1189 rtp_fixup.Run();
1190
1191 MaybeRecordStat(kInlinedPolymorphicCall);
1192
1193 LOG_SUCCESS() << "Inlined same polymorphic target " << actual_method->PrettyMethod();
1194 return true;
1195 }
1196
TryInlineAndReplace(HInvoke * invoke_instruction,ArtMethod * method,ReferenceTypeInfo receiver_type,bool do_rtp,bool cha_devirtualize)1197 bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
1198 ArtMethod* method,
1199 ReferenceTypeInfo receiver_type,
1200 bool do_rtp,
1201 bool cha_devirtualize) {
1202 HInstruction* return_replacement = nullptr;
1203 uint32_t dex_pc = invoke_instruction->GetDexPc();
1204 HInstruction* cursor = invoke_instruction->GetPrevious();
1205 HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
1206 if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) {
1207 if (invoke_instruction->IsInvokeInterface()) {
1208 DCHECK(!method->IsProxyMethod());
1209 // Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always
1210 // better than an invoke-interface because:
1211 // 1) In the best case, the interface call has one more indirection (to fetch the IMT).
1212 // 2) We will not go to the conflict trampoline with an invoke-virtual.
1213 // TODO: Consider sharpening once it is not dependent on the compiler driver.
1214
1215 if (method->IsDefault() && !method->IsCopied()) {
1216 // Changing to invoke-virtual cannot be done on an original default method
1217 // since it's not in any vtable. Devirtualization by exact type/inline-cache
1218 // always uses a method in the iftable which is never an original default
1219 // method.
1220 // On the other hand, inlining an original default method by CHA is fine.
1221 DCHECK(cha_devirtualize);
1222 return false;
1223 }
1224
1225 const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
1226 uint32_t dex_method_index = FindMethodIndexIn(
1227 method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
1228 if (dex_method_index == DexFile::kDexNoIndex) {
1229 return false;
1230 }
1231 HInvokeVirtual* new_invoke = new (graph_->GetArena()) HInvokeVirtual(
1232 graph_->GetArena(),
1233 invoke_instruction->GetNumberOfArguments(),
1234 invoke_instruction->GetType(),
1235 invoke_instruction->GetDexPc(),
1236 dex_method_index,
1237 method,
1238 method->GetMethodIndex());
1239 HInputsRef inputs = invoke_instruction->GetInputs();
1240 for (size_t index = 0; index != inputs.size(); ++index) {
1241 new_invoke->SetArgumentAt(index, inputs[index]);
1242 }
1243 invoke_instruction->GetBlock()->InsertInstructionBefore(new_invoke, invoke_instruction);
1244 new_invoke->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
1245 if (invoke_instruction->GetType() == Primitive::kPrimNot) {
1246 new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo());
1247 }
1248 return_replacement = new_invoke;
1249 } else {
1250 // TODO: Consider sharpening an invoke virtual once it is not dependent on the
1251 // compiler driver.
1252 return false;
1253 }
1254 }
1255 if (cha_devirtualize) {
1256 AddCHAGuard(invoke_instruction, dex_pc, cursor, bb_cursor);
1257 }
1258 if (return_replacement != nullptr) {
1259 invoke_instruction->ReplaceWith(return_replacement);
1260 }
1261 invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
1262 FixUpReturnReferenceType(method, return_replacement);
1263 if (do_rtp && ReturnTypeMoreSpecific(invoke_instruction, return_replacement)) {
1264 // Actual return value has a more specific type than the method's declared
1265 // return type. Run RTP again on the outer graph to propagate it.
1266 ReferenceTypePropagation(graph_,
1267 outer_compilation_unit_.GetClassLoader(),
1268 outer_compilation_unit_.GetDexCache(),
1269 handles_,
1270 /* is_first_run */ false).Run();
1271 }
1272 return true;
1273 }
1274
CountRecursiveCallsOf(ArtMethod * method) const1275 size_t HInliner::CountRecursiveCallsOf(ArtMethod* method) const {
1276 const HInliner* current = this;
1277 size_t count = 0;
1278 do {
1279 if (current->graph_->GetArtMethod() == method) {
1280 ++count;
1281 }
1282 current = current->parent_;
1283 } while (current != nullptr);
1284 return count;
1285 }
1286
TryBuildAndInline(HInvoke * invoke_instruction,ArtMethod * method,ReferenceTypeInfo receiver_type,HInstruction ** return_replacement)1287 bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
1288 ArtMethod* method,
1289 ReferenceTypeInfo receiver_type,
1290 HInstruction** return_replacement) {
1291 if (method->IsProxyMethod()) {
1292 LOG_FAIL(kNotInlinedProxy)
1293 << "Method " << method->PrettyMethod()
1294 << " is not inlined because of unimplemented inline support for proxy methods.";
1295 return false;
1296 }
1297
1298 if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) {
1299 LOG_FAIL(kNotInlinedRecursiveBudget)
1300 << "Method "
1301 << method->PrettyMethod()
1302 << " is not inlined because it has reached its recursive call budget.";
1303 return false;
1304 }
1305
1306 // Check whether we're allowed to inline. The outermost compilation unit is the relevant
1307 // dex file here (though the transitivity of an inline chain would allow checking the calller).
1308 if (!compiler_driver_->MayInline(method->GetDexFile(),
1309 outer_compilation_unit_.GetDexFile())) {
1310 if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) {
1311 LOG_SUCCESS() << "Successfully replaced pattern of invoke "
1312 << method->PrettyMethod();
1313 MaybeRecordStat(kReplacedInvokeWithSimplePattern);
1314 return true;
1315 }
1316 LOG_FAIL(kNotInlinedWont)
1317 << "Won't inline " << method->PrettyMethod() << " in "
1318 << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
1319 << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
1320 << method->GetDexFile()->GetLocation();
1321 return false;
1322 }
1323
1324 bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile());
1325
1326 const DexFile::CodeItem* code_item = method->GetCodeItem();
1327
1328 if (code_item == nullptr) {
1329 LOG_FAIL_NO_STAT()
1330 << "Method " << method->PrettyMethod() << " is not inlined because it is native";
1331 return false;
1332 }
1333
1334 size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
1335 if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
1336 LOG_FAIL(kNotInlinedCodeItem)
1337 << "Method " << method->PrettyMethod()
1338 << " is not inlined because its code item is too big: "
1339 << code_item->insns_size_in_code_units_
1340 << " > "
1341 << inline_max_code_units;
1342 return false;
1343 }
1344
1345 if (code_item->tries_size_ != 0) {
1346 LOG_FAIL(kNotInlinedTryCatch)
1347 << "Method " << method->PrettyMethod() << " is not inlined because of try block";
1348 return false;
1349 }
1350
1351 if (!method->IsCompilable()) {
1352 LOG_FAIL(kNotInlinedNotVerified)
1353 << "Method " << method->PrettyMethod()
1354 << " has soft failures un-handled by the compiler, so it cannot be inlined";
1355 }
1356
1357 if (!method->GetDeclaringClass()->IsVerified()) {
1358 uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex();
1359 if (Runtime::Current()->UseJitCompilation() ||
1360 !compiler_driver_->IsMethodVerifiedWithoutFailures(
1361 method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
1362 LOG_FAIL(kNotInlinedNotVerified)
1363 << "Method " << method->PrettyMethod()
1364 << " couldn't be verified, so it cannot be inlined";
1365 return false;
1366 }
1367 }
1368
1369 if (invoke_instruction->IsInvokeStaticOrDirect() &&
1370 invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
1371 // Case of a static method that cannot be inlined because it implicitly
1372 // requires an initialization check of its declaring class.
1373 LOG_FAIL(kNotInlinedDexCache) << "Method " << method->PrettyMethod()
1374 << " is not inlined because it is static and requires a clinit"
1375 << " check that cannot be emitted due to Dex cache limitations";
1376 return false;
1377 }
1378
1379 if (!TryBuildAndInlineHelper(
1380 invoke_instruction, method, receiver_type, same_dex_file, return_replacement)) {
1381 return false;
1382 }
1383
1384 LOG_SUCCESS() << method->PrettyMethod();
1385 MaybeRecordStat(kInlinedInvoke);
1386 return true;
1387 }
1388
GetInvokeInputForArgVRegIndex(HInvoke * invoke_instruction,size_t arg_vreg_index)1389 static HInstruction* GetInvokeInputForArgVRegIndex(HInvoke* invoke_instruction,
1390 size_t arg_vreg_index)
1391 REQUIRES_SHARED(Locks::mutator_lock_) {
1392 size_t input_index = 0;
1393 for (size_t i = 0; i < arg_vreg_index; ++i, ++input_index) {
1394 DCHECK_LT(input_index, invoke_instruction->GetNumberOfArguments());
1395 if (Primitive::Is64BitType(invoke_instruction->InputAt(input_index)->GetType())) {
1396 ++i;
1397 DCHECK_NE(i, arg_vreg_index);
1398 }
1399 }
1400 DCHECK_LT(input_index, invoke_instruction->GetNumberOfArguments());
1401 return invoke_instruction->InputAt(input_index);
1402 }
1403
1404 // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
TryPatternSubstitution(HInvoke * invoke_instruction,ArtMethod * resolved_method,HInstruction ** return_replacement)1405 bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction,
1406 ArtMethod* resolved_method,
1407 HInstruction** return_replacement) {
1408 InlineMethod inline_method;
1409 if (!InlineMethodAnalyser::AnalyseMethodCode(resolved_method, &inline_method)) {
1410 return false;
1411 }
1412
1413 switch (inline_method.opcode) {
1414 case kInlineOpNop:
1415 DCHECK_EQ(invoke_instruction->GetType(), Primitive::kPrimVoid);
1416 *return_replacement = nullptr;
1417 break;
1418 case kInlineOpReturnArg:
1419 *return_replacement = GetInvokeInputForArgVRegIndex(invoke_instruction,
1420 inline_method.d.return_data.arg);
1421 break;
1422 case kInlineOpNonWideConst:
1423 if (resolved_method->GetShorty()[0] == 'L') {
1424 DCHECK_EQ(inline_method.d.data, 0u);
1425 *return_replacement = graph_->GetNullConstant();
1426 } else {
1427 *return_replacement = graph_->GetIntConstant(static_cast<int32_t>(inline_method.d.data));
1428 }
1429 break;
1430 case kInlineOpIGet: {
1431 const InlineIGetIPutData& data = inline_method.d.ifield_data;
1432 if (data.method_is_static || data.object_arg != 0u) {
1433 // TODO: Needs null check.
1434 return false;
1435 }
1436 HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, data.object_arg);
1437 HInstanceFieldGet* iget = CreateInstanceFieldGet(data.field_idx, resolved_method, obj);
1438 DCHECK_EQ(iget->GetFieldOffset().Uint32Value(), data.field_offset);
1439 DCHECK_EQ(iget->IsVolatile() ? 1u : 0u, data.is_volatile);
1440 invoke_instruction->GetBlock()->InsertInstructionBefore(iget, invoke_instruction);
1441 *return_replacement = iget;
1442 break;
1443 }
1444 case kInlineOpIPut: {
1445 const InlineIGetIPutData& data = inline_method.d.ifield_data;
1446 if (data.method_is_static || data.object_arg != 0u) {
1447 // TODO: Needs null check.
1448 return false;
1449 }
1450 HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, data.object_arg);
1451 HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, data.src_arg);
1452 HInstanceFieldSet* iput = CreateInstanceFieldSet(data.field_idx, resolved_method, obj, value);
1453 DCHECK_EQ(iput->GetFieldOffset().Uint32Value(), data.field_offset);
1454 DCHECK_EQ(iput->IsVolatile() ? 1u : 0u, data.is_volatile);
1455 invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction);
1456 if (data.return_arg_plus1 != 0u) {
1457 size_t return_arg = data.return_arg_plus1 - 1u;
1458 *return_replacement = GetInvokeInputForArgVRegIndex(invoke_instruction, return_arg);
1459 }
1460 break;
1461 }
1462 case kInlineOpConstructor: {
1463 const InlineConstructorData& data = inline_method.d.constructor_data;
1464 // Get the indexes to arrays for easier processing.
1465 uint16_t iput_field_indexes[] = {
1466 data.iput0_field_index, data.iput1_field_index, data.iput2_field_index
1467 };
1468 uint16_t iput_args[] = { data.iput0_arg, data.iput1_arg, data.iput2_arg };
1469 static_assert(arraysize(iput_args) == arraysize(iput_field_indexes), "Size mismatch");
1470 // Count valid field indexes.
1471 size_t number_of_iputs = 0u;
1472 while (number_of_iputs != arraysize(iput_field_indexes) &&
1473 iput_field_indexes[number_of_iputs] != DexFile::kDexNoIndex16) {
1474 // Check that there are no duplicate valid field indexes.
1475 DCHECK_EQ(0, std::count(iput_field_indexes + number_of_iputs + 1,
1476 iput_field_indexes + arraysize(iput_field_indexes),
1477 iput_field_indexes[number_of_iputs]));
1478 ++number_of_iputs;
1479 }
1480 // Check that there are no valid field indexes in the rest of the array.
1481 DCHECK_EQ(0, std::count_if(iput_field_indexes + number_of_iputs,
1482 iput_field_indexes + arraysize(iput_field_indexes),
1483 [](uint16_t index) { return index != DexFile::kDexNoIndex16; }));
1484
1485 // Create HInstanceFieldSet for each IPUT that stores non-zero data.
1486 HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, /* this */ 0u);
1487 bool needs_constructor_barrier = false;
1488 for (size_t i = 0; i != number_of_iputs; ++i) {
1489 HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, iput_args[i]);
1490 if (!value->IsConstant() || !value->AsConstant()->IsZeroBitPattern()) {
1491 uint16_t field_index = iput_field_indexes[i];
1492 bool is_final;
1493 HInstanceFieldSet* iput =
1494 CreateInstanceFieldSet(field_index, resolved_method, obj, value, &is_final);
1495 invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction);
1496
1497 // Check whether the field is final. If it is, we need to add a barrier.
1498 if (is_final) {
1499 needs_constructor_barrier = true;
1500 }
1501 }
1502 }
1503 if (needs_constructor_barrier) {
1504 HMemoryBarrier* barrier = new (graph_->GetArena()) HMemoryBarrier(kStoreStore, kNoDexPc);
1505 invoke_instruction->GetBlock()->InsertInstructionBefore(barrier, invoke_instruction);
1506 }
1507 *return_replacement = nullptr;
1508 break;
1509 }
1510 default:
1511 LOG(FATAL) << "UNREACHABLE";
1512 UNREACHABLE();
1513 }
1514 return true;
1515 }
1516
CreateInstanceFieldGet(uint32_t field_index,ArtMethod * referrer,HInstruction * obj)1517 HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index,
1518 ArtMethod* referrer,
1519 HInstruction* obj)
1520 REQUIRES_SHARED(Locks::mutator_lock_) {
1521 ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
1522 ArtField* resolved_field =
1523 class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false);
1524 DCHECK(resolved_field != nullptr);
1525 HInstanceFieldGet* iget = new (graph_->GetArena()) HInstanceFieldGet(
1526 obj,
1527 resolved_field,
1528 resolved_field->GetTypeAsPrimitiveType(),
1529 resolved_field->GetOffset(),
1530 resolved_field->IsVolatile(),
1531 field_index,
1532 resolved_field->GetDeclaringClass()->GetDexClassDefIndex(),
1533 *referrer->GetDexFile(),
1534 // Read barrier generates a runtime call in slow path and we need a valid
1535 // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537.
1536 /* dex_pc */ 0);
1537 if (iget->GetType() == Primitive::kPrimNot) {
1538 // Use the same dex_cache that we used for field lookup as the hint_dex_cache.
1539 Handle<mirror::DexCache> dex_cache = handles_->NewHandle(referrer->GetDexCache());
1540 ReferenceTypePropagation rtp(graph_,
1541 outer_compilation_unit_.GetClassLoader(),
1542 dex_cache,
1543 handles_,
1544 /* is_first_run */ false);
1545 rtp.Visit(iget);
1546 }
1547 return iget;
1548 }
1549
CreateInstanceFieldSet(uint32_t field_index,ArtMethod * referrer,HInstruction * obj,HInstruction * value,bool * is_final)1550 HInstanceFieldSet* HInliner::CreateInstanceFieldSet(uint32_t field_index,
1551 ArtMethod* referrer,
1552 HInstruction* obj,
1553 HInstruction* value,
1554 bool* is_final)
1555 REQUIRES_SHARED(Locks::mutator_lock_) {
1556 ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
1557 ArtField* resolved_field =
1558 class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false);
1559 DCHECK(resolved_field != nullptr);
1560 if (is_final != nullptr) {
1561 // This information is needed only for constructors.
1562 DCHECK(referrer->IsConstructor());
1563 *is_final = resolved_field->IsFinal();
1564 }
1565 HInstanceFieldSet* iput = new (graph_->GetArena()) HInstanceFieldSet(
1566 obj,
1567 value,
1568 resolved_field,
1569 resolved_field->GetTypeAsPrimitiveType(),
1570 resolved_field->GetOffset(),
1571 resolved_field->IsVolatile(),
1572 field_index,
1573 resolved_field->GetDeclaringClass()->GetDexClassDefIndex(),
1574 *referrer->GetDexFile(),
1575 // Read barrier generates a runtime call in slow path and we need a valid
1576 // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537.
1577 /* dex_pc */ 0);
1578 return iput;
1579 }
1580
1581 template <typename T>
NewHandleIfDifferent(T * object,Handle<T> hint,VariableSizedHandleScope * handles)1582 static inline Handle<T> NewHandleIfDifferent(T* object,
1583 Handle<T> hint,
1584 VariableSizedHandleScope* handles)
1585 REQUIRES_SHARED(Locks::mutator_lock_) {
1586 return (object != hint.Get()) ? handles->NewHandle(object) : hint;
1587 }
1588
TryBuildAndInlineHelper(HInvoke * invoke_instruction,ArtMethod * resolved_method,ReferenceTypeInfo receiver_type,bool same_dex_file,HInstruction ** return_replacement)1589 bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
1590 ArtMethod* resolved_method,
1591 ReferenceTypeInfo receiver_type,
1592 bool same_dex_file,
1593 HInstruction** return_replacement) {
1594 DCHECK(!(resolved_method->IsStatic() && receiver_type.IsValid()));
1595 ScopedObjectAccess soa(Thread::Current());
1596 const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
1597 const DexFile& callee_dex_file = *resolved_method->GetDexFile();
1598 uint32_t method_index = resolved_method->GetDexMethodIndex();
1599 ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
1600 Handle<mirror::DexCache> dex_cache = NewHandleIfDifferent(resolved_method->GetDexCache(),
1601 caller_compilation_unit_.GetDexCache(),
1602 handles_);
1603 Handle<mirror::ClassLoader> class_loader =
1604 NewHandleIfDifferent(resolved_method->GetDeclaringClass()->GetClassLoader(),
1605 caller_compilation_unit_.GetClassLoader(),
1606 handles_);
1607
1608 DexCompilationUnit dex_compilation_unit(
1609 class_loader,
1610 class_linker,
1611 callee_dex_file,
1612 code_item,
1613 resolved_method->GetDeclaringClass()->GetDexClassDefIndex(),
1614 method_index,
1615 resolved_method->GetAccessFlags(),
1616 /* verified_method */ nullptr,
1617 dex_cache);
1618
1619 InvokeType invoke_type = invoke_instruction->GetInvokeType();
1620 if (invoke_type == kInterface) {
1621 // We have statically resolved the dispatch. To please the class linker
1622 // at runtime, we change this call as if it was a virtual call.
1623 invoke_type = kVirtual;
1624 }
1625
1626 const int32_t caller_instruction_counter = graph_->GetCurrentInstructionId();
1627 HGraph* callee_graph = new (graph_->GetArena()) HGraph(
1628 graph_->GetArena(),
1629 callee_dex_file,
1630 method_index,
1631 compiler_driver_->GetInstructionSet(),
1632 invoke_type,
1633 graph_->IsDebuggable(),
1634 /* osr */ false,
1635 caller_instruction_counter);
1636 callee_graph->SetArtMethod(resolved_method);
1637
1638 // When they are needed, allocate `inline_stats_` on the Arena instead
1639 // of on the stack, as Clang might produce a stack frame too large
1640 // for this function, that would not fit the requirements of the
1641 // `-Wframe-larger-than` option.
1642 if (stats_ != nullptr) {
1643 // Reuse one object for all inline attempts from this caller to keep Arena memory usage low.
1644 if (inline_stats_ == nullptr) {
1645 void* storage = graph_->GetArena()->Alloc<OptimizingCompilerStats>(kArenaAllocMisc);
1646 inline_stats_ = new (storage) OptimizingCompilerStats;
1647 } else {
1648 inline_stats_->Reset();
1649 }
1650 }
1651 HGraphBuilder builder(callee_graph,
1652 &dex_compilation_unit,
1653 &outer_compilation_unit_,
1654 resolved_method->GetDexFile(),
1655 *code_item,
1656 compiler_driver_,
1657 codegen_,
1658 inline_stats_,
1659 resolved_method->GetQuickenedInfo(class_linker->GetImagePointerSize()),
1660 dex_cache,
1661 handles_);
1662
1663 if (builder.BuildGraph() != kAnalysisSuccess) {
1664 LOG_FAIL(kNotInlinedCannotBuild)
1665 << "Method " << callee_dex_file.PrettyMethod(method_index)
1666 << " could not be built, so cannot be inlined";
1667 return false;
1668 }
1669
1670 if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph,
1671 compiler_driver_->GetInstructionSet())) {
1672 LOG_FAIL(kNotInlinedRegisterAllocator)
1673 << "Method " << callee_dex_file.PrettyMethod(method_index)
1674 << " cannot be inlined because of the register allocator";
1675 return false;
1676 }
1677
1678 size_t parameter_index = 0;
1679 bool run_rtp = false;
1680 for (HInstructionIterator instructions(callee_graph->GetEntryBlock()->GetInstructions());
1681 !instructions.Done();
1682 instructions.Advance()) {
1683 HInstruction* current = instructions.Current();
1684 if (current->IsParameterValue()) {
1685 HInstruction* argument = invoke_instruction->InputAt(parameter_index);
1686 if (argument->IsNullConstant()) {
1687 current->ReplaceWith(callee_graph->GetNullConstant());
1688 } else if (argument->IsIntConstant()) {
1689 current->ReplaceWith(callee_graph->GetIntConstant(argument->AsIntConstant()->GetValue()));
1690 } else if (argument->IsLongConstant()) {
1691 current->ReplaceWith(callee_graph->GetLongConstant(argument->AsLongConstant()->GetValue()));
1692 } else if (argument->IsFloatConstant()) {
1693 current->ReplaceWith(
1694 callee_graph->GetFloatConstant(argument->AsFloatConstant()->GetValue()));
1695 } else if (argument->IsDoubleConstant()) {
1696 current->ReplaceWith(
1697 callee_graph->GetDoubleConstant(argument->AsDoubleConstant()->GetValue()));
1698 } else if (argument->GetType() == Primitive::kPrimNot) {
1699 if (!resolved_method->IsStatic() && parameter_index == 0 && receiver_type.IsValid()) {
1700 run_rtp = true;
1701 current->SetReferenceTypeInfo(receiver_type);
1702 } else {
1703 current->SetReferenceTypeInfo(argument->GetReferenceTypeInfo());
1704 }
1705 current->AsParameterValue()->SetCanBeNull(argument->CanBeNull());
1706 }
1707 ++parameter_index;
1708 }
1709 }
1710
1711 // We have replaced formal arguments with actual arguments. If actual types
1712 // are more specific than the declared ones, run RTP again on the inner graph.
1713 if (run_rtp || ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) {
1714 ReferenceTypePropagation(callee_graph,
1715 outer_compilation_unit_.GetClassLoader(),
1716 dex_compilation_unit.GetDexCache(),
1717 handles_,
1718 /* is_first_run */ false).Run();
1719 }
1720
1721 RunOptimizations(callee_graph, code_item, dex_compilation_unit);
1722
1723 HBasicBlock* exit_block = callee_graph->GetExitBlock();
1724 if (exit_block == nullptr) {
1725 LOG_FAIL(kNotInlinedInfiniteLoop)
1726 << "Method " << callee_dex_file.PrettyMethod(method_index)
1727 << " could not be inlined because it has an infinite loop";
1728 return false;
1729 }
1730
1731 bool has_one_return = false;
1732 for (HBasicBlock* predecessor : exit_block->GetPredecessors()) {
1733 if (predecessor->GetLastInstruction()->IsThrow()) {
1734 if (invoke_instruction->GetBlock()->IsTryBlock()) {
1735 // TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto.
1736 LOG_FAIL(kNotInlinedTryCatch)
1737 << "Method " << callee_dex_file.PrettyMethod(method_index)
1738 << " could not be inlined because one branch always throws and"
1739 << " caller is in a try/catch block";
1740 return false;
1741 } else if (graph_->GetExitBlock() == nullptr) {
1742 // TODO(ngeoffray): Support adding HExit in the caller graph.
1743 LOG_FAIL(kNotInlinedInfiniteLoop)
1744 << "Method " << callee_dex_file.PrettyMethod(method_index)
1745 << " could not be inlined because one branch always throws and"
1746 << " caller does not have an exit block";
1747 return false;
1748 } else if (graph_->HasIrreducibleLoops()) {
1749 // TODO(ngeoffray): Support re-computing loop information to graphs with
1750 // irreducible loops?
1751 VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
1752 << " could not be inlined because one branch always throws and"
1753 << " caller has irreducible loops";
1754 return false;
1755 }
1756 } else {
1757 has_one_return = true;
1758 }
1759 }
1760
1761 if (!has_one_return) {
1762 LOG_FAIL(kNotInlinedAlwaysThrows)
1763 << "Method " << callee_dex_file.PrettyMethod(method_index)
1764 << " could not be inlined because it always throws";
1765 return false;
1766 }
1767
1768 size_t number_of_instructions = 0;
1769 // Skip the entry block, it does not contain instructions that prevent inlining.
1770 for (HBasicBlock* block : callee_graph->GetReversePostOrderSkipEntryBlock()) {
1771 if (block->IsLoopHeader()) {
1772 if (block->GetLoopInformation()->IsIrreducible()) {
1773 // Don't inline methods with irreducible loops, they could prevent some
1774 // optimizations to run.
1775 LOG_FAIL(kNotInlinedIrreducibleLoop)
1776 << "Method " << callee_dex_file.PrettyMethod(method_index)
1777 << " could not be inlined because it contains an irreducible loop";
1778 return false;
1779 }
1780 if (!block->GetLoopInformation()->HasExitEdge()) {
1781 // Don't inline methods with loops without exit, since they cause the
1782 // loop information to be computed incorrectly when updating after
1783 // inlining.
1784 LOG_FAIL(kNotInlinedLoopWithoutExit)
1785 << "Method " << callee_dex_file.PrettyMethod(method_index)
1786 << " could not be inlined because it contains a loop with no exit";
1787 return false;
1788 }
1789 }
1790
1791 for (HInstructionIterator instr_it(block->GetInstructions());
1792 !instr_it.Done();
1793 instr_it.Advance()) {
1794 if (++number_of_instructions >= inlining_budget_) {
1795 LOG_FAIL(kNotInlinedInstructionBudget)
1796 << "Method " << callee_dex_file.PrettyMethod(method_index)
1797 << " is not inlined because the outer method has reached"
1798 << " its instruction budget limit.";
1799 return false;
1800 }
1801 HInstruction* current = instr_it.Current();
1802 if (current->NeedsEnvironment() &&
1803 (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters)) {
1804 LOG_FAIL(kNotInlinedEnvironmentBudget)
1805 << "Method " << callee_dex_file.PrettyMethod(method_index)
1806 << " is not inlined because its caller has reached"
1807 << " its environment budget limit.";
1808 return false;
1809 }
1810
1811 if (current->NeedsEnvironment() &&
1812 !CanEncodeInlinedMethodInStackMap(*caller_compilation_unit_.GetDexFile(),
1813 resolved_method)) {
1814 LOG_FAIL(kNotInlinedStackMaps)
1815 << "Method " << callee_dex_file.PrettyMethod(method_index)
1816 << " could not be inlined because " << current->DebugName()
1817 << " needs an environment, is in a different dex file"
1818 << ", and cannot be encoded in the stack maps.";
1819 return false;
1820 }
1821
1822 if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) {
1823 LOG_FAIL(kNotInlinedDexCache)
1824 << "Method " << callee_dex_file.PrettyMethod(method_index)
1825 << " could not be inlined because " << current->DebugName()
1826 << " it is in a different dex file and requires access to the dex cache";
1827 return false;
1828 }
1829
1830 if (current->IsUnresolvedStaticFieldGet() ||
1831 current->IsUnresolvedInstanceFieldGet() ||
1832 current->IsUnresolvedStaticFieldSet() ||
1833 current->IsUnresolvedInstanceFieldSet()) {
1834 // Entrypoint for unresolved fields does not handle inlined frames.
1835 LOG_FAIL(kNotInlinedUnresolvedEntrypoint)
1836 << "Method " << callee_dex_file.PrettyMethod(method_index)
1837 << " could not be inlined because it is using an unresolved"
1838 << " entrypoint";
1839 return false;
1840 }
1841 }
1842 }
1843 DCHECK_EQ(caller_instruction_counter, graph_->GetCurrentInstructionId())
1844 << "No instructions can be added to the outer graph while inner graph is being built";
1845
1846 // Inline the callee graph inside the caller graph.
1847 const int32_t callee_instruction_counter = callee_graph->GetCurrentInstructionId();
1848 graph_->SetCurrentInstructionId(callee_instruction_counter);
1849 *return_replacement = callee_graph->InlineInto(graph_, invoke_instruction);
1850 // Update our budget for other inlining attempts in `caller_graph`.
1851 total_number_of_instructions_ += number_of_instructions;
1852 UpdateInliningBudget();
1853
1854 DCHECK_EQ(callee_instruction_counter, callee_graph->GetCurrentInstructionId())
1855 << "No instructions can be added to the inner graph during inlining into the outer graph";
1856
1857 if (stats_ != nullptr) {
1858 DCHECK(inline_stats_ != nullptr);
1859 inline_stats_->AddTo(stats_);
1860 }
1861
1862 return true;
1863 }
1864
RunOptimizations(HGraph * callee_graph,const DexFile::CodeItem * code_item,const DexCompilationUnit & dex_compilation_unit)1865 void HInliner::RunOptimizations(HGraph* callee_graph,
1866 const DexFile::CodeItem* code_item,
1867 const DexCompilationUnit& dex_compilation_unit) {
1868 // Note: if the outermost_graph_ is being compiled OSR, we should not run any
1869 // optimization that could lead to a HDeoptimize. The following optimizations do not.
1870 HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
1871 HConstantFolding fold(callee_graph, "constant_folding$inliner");
1872 HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_);
1873 InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_);
1874 IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_);
1875
1876 HOptimization* optimizations[] = {
1877 &intrinsics,
1878 &sharpening,
1879 &simplify,
1880 &fold,
1881 &dce,
1882 };
1883
1884 for (size_t i = 0; i < arraysize(optimizations); ++i) {
1885 HOptimization* optimization = optimizations[i];
1886 optimization->Run();
1887 }
1888
1889 // Bail early for pathological cases on the environment (for example recursive calls,
1890 // or too large environment).
1891 if (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters) {
1892 LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod()
1893 << " will not be inlined because the outer method has reached"
1894 << " its environment budget limit.";
1895 return;
1896 }
1897
1898 // Bail early if we know we already are over the limit.
1899 size_t number_of_instructions = CountNumberOfInstructions(callee_graph);
1900 if (number_of_instructions > inlining_budget_) {
1901 LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod()
1902 << " will not be inlined because the outer method has reached"
1903 << " its instruction budget limit. " << number_of_instructions;
1904 return;
1905 }
1906
1907 HInliner inliner(callee_graph,
1908 outermost_graph_,
1909 codegen_,
1910 outer_compilation_unit_,
1911 dex_compilation_unit,
1912 compiler_driver_,
1913 handles_,
1914 inline_stats_,
1915 total_number_of_dex_registers_ + code_item->registers_size_,
1916 total_number_of_instructions_ + number_of_instructions,
1917 this,
1918 depth_ + 1);
1919 inliner.Run();
1920 }
1921
IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti,bool declared_can_be_null,HInstruction * actual_obj)1922 static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti,
1923 bool declared_can_be_null,
1924 HInstruction* actual_obj)
1925 REQUIRES_SHARED(Locks::mutator_lock_) {
1926 if (declared_can_be_null && !actual_obj->CanBeNull()) {
1927 return true;
1928 }
1929
1930 ReferenceTypeInfo actual_rti = actual_obj->GetReferenceTypeInfo();
1931 return (actual_rti.IsExact() && !declared_rti.IsExact()) ||
1932 declared_rti.IsStrictSupertypeOf(actual_rti);
1933 }
1934
GetClassRTI(mirror::Class * klass)1935 ReferenceTypeInfo HInliner::GetClassRTI(mirror::Class* klass) {
1936 return ReferenceTypePropagation::IsAdmissible(klass)
1937 ? ReferenceTypeInfo::Create(handles_->NewHandle(klass))
1938 : graph_->GetInexactObjectRti();
1939 }
1940
ArgumentTypesMoreSpecific(HInvoke * invoke_instruction,ArtMethod * resolved_method)1941 bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method) {
1942 // If this is an instance call, test whether the type of the `this` argument
1943 // is more specific than the class which declares the method.
1944 if (!resolved_method->IsStatic()) {
1945 if (IsReferenceTypeRefinement(GetClassRTI(resolved_method->GetDeclaringClass()),
1946 /* declared_can_be_null */ false,
1947 invoke_instruction->InputAt(0u))) {
1948 return true;
1949 }
1950 }
1951
1952 // Iterate over the list of parameter types and test whether any of the
1953 // actual inputs has a more specific reference type than the type declared in
1954 // the signature.
1955 const DexFile::TypeList* param_list = resolved_method->GetParameterTypeList();
1956 for (size_t param_idx = 0,
1957 input_idx = resolved_method->IsStatic() ? 0 : 1,
1958 e = (param_list == nullptr ? 0 : param_list->Size());
1959 param_idx < e;
1960 ++param_idx, ++input_idx) {
1961 HInstruction* input = invoke_instruction->InputAt(input_idx);
1962 if (input->GetType() == Primitive::kPrimNot) {
1963 mirror::Class* param_cls = resolved_method->GetClassFromTypeIndex(
1964 param_list->GetTypeItem(param_idx).type_idx_,
1965 /* resolve */ false);
1966 if (IsReferenceTypeRefinement(GetClassRTI(param_cls),
1967 /* declared_can_be_null */ true,
1968 input)) {
1969 return true;
1970 }
1971 }
1972 }
1973
1974 return false;
1975 }
1976
ReturnTypeMoreSpecific(HInvoke * invoke_instruction,HInstruction * return_replacement)1977 bool HInliner::ReturnTypeMoreSpecific(HInvoke* invoke_instruction,
1978 HInstruction* return_replacement) {
1979 // Check the integrity of reference types and run another type propagation if needed.
1980 if (return_replacement != nullptr) {
1981 if (return_replacement->GetType() == Primitive::kPrimNot) {
1982 // Test if the return type is a refinement of the declared return type.
1983 if (IsReferenceTypeRefinement(invoke_instruction->GetReferenceTypeInfo(),
1984 /* declared_can_be_null */ true,
1985 return_replacement)) {
1986 return true;
1987 } else if (return_replacement->IsInstanceFieldGet()) {
1988 HInstanceFieldGet* field_get = return_replacement->AsInstanceFieldGet();
1989 ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
1990 if (field_get->GetFieldInfo().GetField() ==
1991 class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0)) {
1992 return true;
1993 }
1994 }
1995 } else if (return_replacement->IsInstanceOf()) {
1996 // Inlining InstanceOf into an If may put a tighter bound on reference types.
1997 return true;
1998 }
1999 }
2000
2001 return false;
2002 }
2003
FixUpReturnReferenceType(ArtMethod * resolved_method,HInstruction * return_replacement)2004 void HInliner::FixUpReturnReferenceType(ArtMethod* resolved_method,
2005 HInstruction* return_replacement) {
2006 if (return_replacement != nullptr) {
2007 if (return_replacement->GetType() == Primitive::kPrimNot) {
2008 if (!return_replacement->GetReferenceTypeInfo().IsValid()) {
2009 // Make sure that we have a valid type for the return. We may get an invalid one when
2010 // we inline invokes with multiple branches and create a Phi for the result.
2011 // TODO: we could be more precise by merging the phi inputs but that requires
2012 // some functionality from the reference type propagation.
2013 DCHECK(return_replacement->IsPhi());
2014 mirror::Class* cls = resolved_method->GetReturnType(false /* resolve */);
2015 return_replacement->SetReferenceTypeInfo(GetClassRTI(cls));
2016 }
2017 }
2018 }
2019 }
2020
2021 } // namespace art
2022