• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/compiler/js-inlining-heuristic.h"
6 
7 #include "src/codegen/optimized-compilation-info.h"
8 #include "src/compiler/common-operator.h"
9 #include "src/compiler/compiler-source-position-table.h"
10 #include "src/compiler/js-heap-broker.h"
11 #include "src/compiler/node-matchers.h"
12 #include "src/compiler/simplified-operator.h"
13 #include "src/objects/objects-inl.h"
14 
15 namespace v8 {
16 namespace internal {
17 namespace compiler {
18 
19 #define TRACE(...)                                                             \
20   do {                                                                         \
21     if (FLAG_trace_turbo_inlining) StdoutStream{} << __VA_ARGS__ << std::endl; \
22   } while (false)
23 
24 namespace {
IsSmall(int const size)25 bool IsSmall(int const size) {
26   return size <= FLAG_max_inlined_bytecode_size_small;
27 }
28 
CanConsiderForInlining(JSHeapBroker * broker,FeedbackCellRef const & feedback_cell)29 bool CanConsiderForInlining(JSHeapBroker* broker,
30                             FeedbackCellRef const& feedback_cell) {
31   base::Optional<FeedbackVectorRef> feedback_vector =
32       feedback_cell.feedback_vector();
33   if (!feedback_vector.has_value()) {
34     TRACE("Cannot consider " << feedback_cell
35                              << " for inlining (no feedback vector)");
36     return false;
37   }
38   SharedFunctionInfoRef shared = feedback_vector->shared_function_info();
39 
40   if (!shared.HasBytecodeArray()) {
41     TRACE("Cannot consider " << shared << " for inlining (no bytecode)");
42     return false;
43   }
44   // Ensure we have a persistent handle to the bytecode in order to avoid
45   // flushing it during the remaining compilation.
46   shared.GetBytecodeArray();
47 
48   // Read feedback vector again in case it got flushed before we were able to
49   // prevent flushing above.
50   base::Optional<FeedbackVectorRef> feedback_vector_again =
51       feedback_cell.feedback_vector();
52   if (!feedback_vector_again.has_value()) {
53     TRACE("Cannot consider " << shared << " for inlining (no feedback vector)");
54     return false;
55   }
56   if (!feedback_vector_again->equals(*feedback_vector)) {
57     // The new feedback vector likely contains lots of uninitialized slots, so
58     // it doesn't make much sense to inline this function now.
59     TRACE("Not considering " << shared
60                              << " for inlining (feedback vector changed)");
61     return false;
62   }
63 
64   SharedFunctionInfo::Inlineability inlineability = shared.GetInlineability();
65   if (inlineability != SharedFunctionInfo::kIsInlineable) {
66     TRACE("Cannot consider "
67           << shared << " for inlining (reason: " << inlineability << ")");
68     return false;
69   }
70 
71   TRACE("Considering " << shared << " for inlining with " << *feedback_vector);
72   return true;
73 }
74 
CanConsiderForInlining(JSHeapBroker * broker,JSFunctionRef const & function)75 bool CanConsiderForInlining(JSHeapBroker* broker,
76                             JSFunctionRef const& function) {
77   FeedbackCellRef feedback_cell =
78       function.raw_feedback_cell(broker->dependencies());
79   bool const result = CanConsiderForInlining(broker, feedback_cell);
80   if (result) {
81     CHECK(
82         function.shared().equals(feedback_cell.shared_function_info().value()));
83   }
84   return result;
85 }
86 
87 }  // namespace
88 
CollectFunctions(Node * node,int functions_size)89 JSInliningHeuristic::Candidate JSInliningHeuristic::CollectFunctions(
90     Node* node, int functions_size) {
91   DCHECK_NE(0, functions_size);
92   Node* callee = node->InputAt(0);
93   Candidate out;
94   out.node = node;
95 
96   HeapObjectMatcher m(callee);
97   if (m.HasResolvedValue() && m.Ref(broker()).IsJSFunction()) {
98     JSFunctionRef function = m.Ref(broker()).AsJSFunction();
99     out.functions[0] = function;
100     if (CanConsiderForInlining(broker(), function)) {
101       out.bytecode[0] = function.shared().GetBytecodeArray();
102       out.num_functions = 1;
103       return out;
104     }
105   }
106   if (m.IsPhi()) {
107     int const value_input_count = m.node()->op()->ValueInputCount();
108     if (value_input_count > functions_size) {
109       out.num_functions = 0;
110       return out;
111     }
112     for (int n = 0; n < value_input_count; ++n) {
113       HeapObjectMatcher m2(callee->InputAt(n));
114       if (!m2.HasResolvedValue() || !m2.Ref(broker()).IsJSFunction()) {
115         out.num_functions = 0;
116         return out;
117       }
118 
119       out.functions[n] = m2.Ref(broker()).AsJSFunction();
120       JSFunctionRef function = out.functions[n].value();
121       if (CanConsiderForInlining(broker(), function)) {
122         out.bytecode[n] = function.shared().GetBytecodeArray();
123       }
124     }
125     out.num_functions = value_input_count;
126     return out;
127   }
128   if (m.IsCheckClosure()) {
129     DCHECK(!out.functions[0].has_value());
130     FeedbackCellRef feedback_cell = MakeRef(broker(), FeedbackCellOf(m.op()));
131     if (CanConsiderForInlining(broker(), feedback_cell)) {
132       out.shared_info = feedback_cell.shared_function_info().value();
133       out.bytecode[0] = out.shared_info->GetBytecodeArray();
134     }
135     out.num_functions = 1;
136     return out;
137   }
138   if (m.IsJSCreateClosure()) {
139     DCHECK(!out.functions[0].has_value());
140     JSCreateClosureNode n(callee);
141     FeedbackCellRef feedback_cell = n.GetFeedbackCellRefChecked(broker());
142     if (CanConsiderForInlining(broker(), feedback_cell)) {
143       out.shared_info = feedback_cell.shared_function_info().value();
144       out.bytecode[0] = out.shared_info->GetBytecodeArray();
145       CHECK(out.shared_info->equals(n.Parameters().shared_info(broker())));
146     }
147     out.num_functions = 1;
148     return out;
149   }
150   out.num_functions = 0;
151   return out;
152 }
153 
Reduce(Node * node)154 Reduction JSInliningHeuristic::Reduce(Node* node) {
155 #if V8_ENABLE_WEBASSEMBLY
156   if (mode() == kWasmOnly) {
157     if (node->opcode() == IrOpcode::kJSWasmCall) {
158       return inliner_.ReduceJSWasmCall(node);
159     }
160     return NoChange();
161   }
162 #endif  // V8_ENABLE_WEBASSEMBLY
163 
164   DCHECK_EQ(mode(), kJSOnly);
165   if (!IrOpcode::IsInlineeOpcode(node->opcode())) return NoChange();
166 
167   if (total_inlined_bytecode_size_ >= max_inlined_bytecode_size_absolute_) {
168     return NoChange();
169   }
170 
171   // Check if we already saw that {node} before, and if so, just skip it.
172   if (seen_.find(node->id()) != seen_.end()) return NoChange();
173 
174   // Check if the {node} is an appropriate candidate for inlining.
175   Candidate candidate = CollectFunctions(node, kMaxCallPolymorphism);
176   if (candidate.num_functions == 0) {
177     return NoChange();
178   } else if (candidate.num_functions > 1 && !FLAG_polymorphic_inlining) {
179     TRACE("Not considering call site #"
180           << node->id() << ":" << node->op()->mnemonic()
181           << ", because polymorphic inlining is disabled");
182     return NoChange();
183   }
184 
185   bool can_inline_candidate = false, candidate_is_small = true;
186   candidate.total_size = 0;
187   FrameState frame_state{NodeProperties::GetFrameStateInput(node)};
188   FrameStateInfo const& frame_info = frame_state.frame_state_info();
189   Handle<SharedFunctionInfo> frame_shared_info;
190   for (int i = 0; i < candidate.num_functions; ++i) {
191     if (!candidate.bytecode[i].has_value()) {
192       candidate.can_inline_function[i] = false;
193       continue;
194     }
195 
196     SharedFunctionInfoRef shared = candidate.functions[i].has_value()
197                                        ? candidate.functions[i].value().shared()
198                                        : candidate.shared_info.value();
199     candidate.can_inline_function[i] = candidate.bytecode[i].has_value();
200     // Because of concurrent optimization, optimization of the inlining
201     // candidate could have been disabled meanwhile.
202     // JSInliner will check this again and not actually inline the function in
203     // this case.
204     CHECK_IMPLIES(candidate.can_inline_function[i],
205                   shared.IsInlineable() ||
206                       shared.GetInlineability() ==
207                           SharedFunctionInfo::kHasOptimizationDisabled);
208     // Do not allow direct recursion i.e. f() -> f(). We still allow indirect
209     // recursion like f() -> g() -> f(). The indirect recursion is helpful in
210     // cases where f() is a small dispatch function that calls the appropriate
211     // function. In the case of direct recursion, we only have some static
212     // information for the first level of inlining and it may not be that useful
213     // to just inline one level in recursive calls. In some cases like tail
214     // recursion we may benefit from recursive inlining, if we have additional
215     // analysis that converts them to iterative implementations. Though it is
216     // not obvious if such an analysis is needed.
217     if (frame_info.shared_info().ToHandle(&frame_shared_info) &&
218         frame_shared_info.equals(shared.object())) {
219       TRACE("Not considering call site #" << node->id() << ":"
220                                           << node->op()->mnemonic()
221                                           << ", because of recursive inlining");
222       candidate.can_inline_function[i] = false;
223     }
224     if (candidate.can_inline_function[i]) {
225       can_inline_candidate = true;
226       BytecodeArrayRef bytecode = candidate.bytecode[i].value();
227       candidate.total_size += bytecode.length();
228       unsigned inlined_bytecode_size = 0;
229       if (candidate.functions[i].has_value()) {
230         JSFunctionRef function = candidate.functions[i].value();
231         inlined_bytecode_size = function.code().GetInlinedBytecodeSize();
232         candidate.total_size += inlined_bytecode_size;
233       }
234       candidate_is_small = candidate_is_small &&
235                            IsSmall(bytecode.length() + inlined_bytecode_size);
236     }
237   }
238   if (!can_inline_candidate) return NoChange();
239 
240   // Gather feedback on how often this call site has been hit before.
241   if (node->opcode() == IrOpcode::kJSCall) {
242     CallParameters const p = CallParametersOf(node->op());
243     candidate.frequency = p.frequency();
244   } else {
245     ConstructParameters const p = ConstructParametersOf(node->op());
246     candidate.frequency = p.frequency();
247   }
248 
249   // Don't consider a {candidate} whose frequency is below the
250   // threshold, i.e. a call site that is only hit once every N
251   // invocations of the caller.
252   if (candidate.frequency.IsKnown() &&
253       candidate.frequency.value() < FLAG_min_inlining_frequency) {
254     return NoChange();
255   }
256 
257   // Found a candidate. Insert it into the set of seen nodes s.t. we don't
258   // revisit in the future. Note this insertion happens here and not earlier in
259   // order to make inlining decisions order-independent. A node may not be a
260   // candidate when first seen, but later reductions may turn it into a valid
261   // candidate. In that case, the node should be revisited by
262   // JSInliningHeuristic.
263   seen_.insert(node->id());
264 
265   // Forcibly inline small functions here. In the case of polymorphic inlining
266   // candidate_is_small is set only when all functions are small.
267   if (candidate_is_small) {
268     TRACE("Inlining small function(s) at call site #"
269           << node->id() << ":" << node->op()->mnemonic());
270     return InlineCandidate(candidate, true);
271   }
272 
273   // In the general case we remember the candidate for later.
274   candidates_.insert(candidate);
275   return NoChange();
276 }
277 
Finalize()278 void JSInliningHeuristic::Finalize() {
279   if (candidates_.empty()) return;  // Nothing to do without candidates.
280   if (FLAG_trace_turbo_inlining) PrintCandidates();
281 
282   // We inline at most one candidate in every iteration of the fixpoint.
283   // This is to ensure that we don't consume the full inlining budget
284   // on things that aren't called very often.
285   // TODO(bmeurer): Use std::priority_queue instead of std::set here.
286   while (!candidates_.empty()) {
287     auto i = candidates_.begin();
288     Candidate candidate = *i;
289     candidates_.erase(i);
290 
291     // Ignore this candidate if it's no longer valid.
292     if (!IrOpcode::IsInlineeOpcode(candidate.node->opcode())) continue;
293     if (candidate.node->IsDead()) continue;
294 
295     // Make sure we have some extra budget left, so that any small functions
296     // exposed by this function would be given a chance to inline.
297     double size_of_candidate =
298         candidate.total_size * FLAG_reserve_inline_budget_scale_factor;
299     int total_size =
300         total_inlined_bytecode_size_ + static_cast<int>(size_of_candidate);
301     if (total_size > max_inlined_bytecode_size_cumulative_) {
302       // Try if any smaller functions are available to inline.
303       continue;
304     }
305 
306     Reduction const reduction = InlineCandidate(candidate, false);
307     if (reduction.Changed()) return;
308   }
309 }
310 
311 namespace {
312 
313 struct NodeAndIndex {
314   Node* node;
315   int index;
316 };
317 
CollectStateValuesOwnedUses(Node * node,Node * state_values,NodeAndIndex * uses_buffer,size_t * use_count,size_t max_uses)318 bool CollectStateValuesOwnedUses(Node* node, Node* state_values,
319                                  NodeAndIndex* uses_buffer, size_t* use_count,
320                                  size_t max_uses) {
321   // Only accumulate states that are not shared with other users.
322   if (state_values->UseCount() > 1) return true;
323   for (int i = 0; i < state_values->InputCount(); i++) {
324     Node* input = state_values->InputAt(i);
325     if (input->opcode() == IrOpcode::kStateValues) {
326       if (!CollectStateValuesOwnedUses(node, input, uses_buffer, use_count,
327                                        max_uses)) {
328         return false;
329       }
330     } else if (input == node) {
331       if (*use_count >= max_uses) return false;
332       uses_buffer[*use_count] = {state_values, i};
333       (*use_count)++;
334     }
335   }
336   return true;
337 }
338 
339 }  // namespace
340 
DuplicateStateValuesAndRename(Node * state_values,Node * from,Node * to,StateCloneMode mode)341 Node* JSInliningHeuristic::DuplicateStateValuesAndRename(Node* state_values,
342                                                          Node* from, Node* to,
343                                                          StateCloneMode mode) {
344   // Only rename in states that are not shared with other users. This needs to
345   // be in sync with the condition in {CollectStateValuesOwnedUses}.
346   if (state_values->UseCount() > 1) return state_values;
347   Node* copy = mode == kChangeInPlace ? state_values : nullptr;
348   for (int i = 0; i < state_values->InputCount(); i++) {
349     Node* input = state_values->InputAt(i);
350     Node* processed;
351     if (input->opcode() == IrOpcode::kStateValues) {
352       processed = DuplicateStateValuesAndRename(input, from, to, mode);
353     } else if (input == from) {
354       processed = to;
355     } else {
356       processed = input;
357     }
358     if (processed != input) {
359       if (!copy) {
360         copy = graph()->CloneNode(state_values);
361       }
362       copy->ReplaceInput(i, processed);
363     }
364   }
365   return copy ? copy : state_values;
366 }
367 
368 namespace {
369 
CollectFrameStateUniqueUses(Node * node,FrameState frame_state,NodeAndIndex * uses_buffer,size_t * use_count,size_t max_uses)370 bool CollectFrameStateUniqueUses(Node* node, FrameState frame_state,
371                                  NodeAndIndex* uses_buffer, size_t* use_count,
372                                  size_t max_uses) {
373   // Only accumulate states that are not shared with other users.
374   if (frame_state->UseCount() > 1) return true;
375   if (frame_state.stack() == node) {
376     if (*use_count >= max_uses) return false;
377     uses_buffer[*use_count] = {frame_state, FrameState::kFrameStateStackInput};
378     (*use_count)++;
379   }
380   if (!CollectStateValuesOwnedUses(node, frame_state.locals(), uses_buffer,
381                                    use_count, max_uses)) {
382     return false;
383   }
384   return true;
385 }
386 
387 }  // namespace
388 
DuplicateFrameStateAndRename(FrameState frame_state,Node * from,Node * to,StateCloneMode mode)389 FrameState JSInliningHeuristic::DuplicateFrameStateAndRename(
390     FrameState frame_state, Node* from, Node* to, StateCloneMode mode) {
391   // Only rename in states that are not shared with other users. This needs to
392   // be in sync with the condition in {DuplicateFrameStateAndRename}.
393   if (frame_state->UseCount() > 1) return frame_state;
394   Node* copy =
395       mode == kChangeInPlace ? static_cast<Node*>(frame_state) : nullptr;
396   if (frame_state.stack() == from) {
397     if (!copy) {
398       copy = graph()->CloneNode(frame_state);
399     }
400     copy->ReplaceInput(FrameState::kFrameStateStackInput, to);
401   }
402   Node* locals = frame_state.locals();
403   Node* new_locals = DuplicateStateValuesAndRename(locals, from, to, mode);
404   if (new_locals != locals) {
405     if (!copy) {
406       copy = graph()->CloneNode(frame_state);
407     }
408     copy->ReplaceInput(FrameState::kFrameStateLocalsInput, new_locals);
409   }
410   return copy != nullptr ? FrameState{copy} : frame_state;
411 }
412 
TryReuseDispatch(Node * node,Node * callee,Node ** if_successes,Node ** calls,Node ** inputs,int input_count)413 bool JSInliningHeuristic::TryReuseDispatch(Node* node, Node* callee,
414                                            Node** if_successes, Node** calls,
415                                            Node** inputs, int input_count) {
416   // We will try to reuse the control flow branch created for computing
417   // the {callee} target of the call. We only reuse the branch if there
418   // is no side-effect between the call and the branch, and if the callee is
419   // only used as the target (and possibly also in the related frame states).
420 
421   // We are trying to match the following pattern:
422   //
423   //         C1     C2
424   //          .     .
425   //          |     |
426   //         Merge(merge)  <-----------------+
427   //           ^    ^                        |
428   //  V1  V2   |    |         E1  E2         |
429   //   .  .    |    +----+     .  .          |
430   //   |  |    |         |     |  |          |
431   //  Phi(callee)      EffectPhi(effect_phi) |
432   //     ^                    ^              |
433   //     |                    |              |
434   //     +----+               |              |
435   //     |    |               |              |
436   //     |   StateValues      |              |
437   //     |       ^            |              |
438   //     +----+  |            |              |
439   //     |    |  |            |              |
440   //     |    FrameState      |              |
441   //     |           ^        |              |
442   //     |           |        |          +---+
443   //     |           |        |          |   |
444   //     +----+     Checkpoint(checkpoint)   |
445   //     |    |           ^                  |
446   //     |    StateValues |    +-------------+
447   //     |        |       |    |
448   //     +-----+  |       |    |
449   //     |     |  |       |    |
450   //     |     FrameState |    |
451   //     |             ^  |    |
452   //     +-----------+ |  |    |
453   //                  Call(node)
454   //                   |
455   //                   |
456   //                   .
457   //
458   // The {callee} here is a phi that merges the possible call targets, {node}
459   // is the actual call that we will try to duplicate and connect to the
460   // control that comes into {merge}. There can be a {checkpoint} between
461   // the call and the calle phi.
462   //
463   // The idea is to get rid of the merge, effect phi and phi, then duplicate
464   // the call (with all the frame states and such), and connect the duplicated
465   // calls and states directly to the inputs of the ex-phi, ex-effect-phi and
466   // ex-merge. The tricky part is to make sure that there is no interference
467   // from the outside. In particular, there should not be any unaccounted uses
468   // of the  phi, effect-phi and merge because we will remove them from
469   // the graph.
470   //
471   //     V1              E1   C1  V2   E2               C2
472   //     .                .    .  .    .                .
473   //     |                |    |  |    |                |
474   //     +----+           |    |  +----+                |
475   //     |    |           |    |  |    |                |
476   //     |   StateValues  |    |  |   StateValues       |
477   //     |       ^        |    |  |       ^             |
478   //     +----+  |        |    |  +----+  |             |
479   //     |    |  |        |    |  |    |  |             |
480   //     |    FrameState  |    |  |    FrameState       |
481   //     |           ^    |    |  |           ^         |
482   //     |           |    |    |  |           |         |
483   //     |           |    |    |  |           |         |
484   //     +----+     Checkpoint |  +----+     Checkpoint |
485   //     |    |           ^    |  |    |           ^    |
486   //     |    StateValues |    |  |    StateValues |    |
487   //     |        |       |    |  |        |       |    |
488   //     +-----+  |       |    |  +-----+  |       |    |
489   //     |     |  |       |    |  |     |  |       |    |
490   //     |     FrameState |    |  |     FrameState |    |
491   //     |              ^ |    |  |              ^ |    |
492   //     +-------------+| |    |  +-------------+| |    |
493   //                   Call----+                Call----+
494   //                     |                       |
495   //                     +-------+  +------------+
496   //                             |  |
497   //                             Merge
498   //                             EffectPhi
499   //                             Phi
500   //                              |
501   //                             ...
502 
503   // Bailout if the call is not polymorphic anymore (other reducers might
504   // have replaced the callee phi with a constant).
505   if (callee->opcode() != IrOpcode::kPhi) return false;
506   int const num_calls = callee->op()->ValueInputCount();
507 
508   // If there is a control node between the callee computation
509   // and the call, bail out.
510   Node* merge = NodeProperties::GetControlInput(callee);
511   if (NodeProperties::GetControlInput(node) != merge) return false;
512 
513   // If there is a non-checkpoint effect node between the callee computation
514   // and the call, bail out. We will drop any checkpoint between the call and
515   // the callee phi because the callee computation should have its own
516   // checkpoint that the call can fall back to.
517   Node* checkpoint = nullptr;
518   Node* effect = NodeProperties::GetEffectInput(node);
519   if (effect->opcode() == IrOpcode::kCheckpoint) {
520     checkpoint = effect;
521     if (NodeProperties::GetControlInput(checkpoint) != merge) return false;
522     effect = NodeProperties::GetEffectInput(effect);
523   }
524   if (effect->opcode() != IrOpcode::kEffectPhi) return false;
525   if (NodeProperties::GetControlInput(effect) != merge) return false;
526   Node* effect_phi = effect;
527 
528   // The effect phi, the callee, the call and the checkpoint must be the only
529   // users of the merge.
530   for (Node* merge_use : merge->uses()) {
531     if (merge_use != effect_phi && merge_use != callee && merge_use != node &&
532         merge_use != checkpoint) {
533       return false;
534     }
535   }
536 
537   // The effect phi must be only used by the checkpoint or the call.
538   for (Node* effect_phi_use : effect_phi->uses()) {
539     if (effect_phi_use != node && effect_phi_use != checkpoint) return false;
540   }
541 
542   // We must replace the callee phi with the appropriate constant in
543   // the entire subgraph reachable by inputs from the call (terminating
544   // at phis and merges). Since we do not want to walk (and later duplicate)
545   // the subgraph here, we limit the possible uses to this set:
546   //
547   // 1. In the call (as a target).
548   // 2. The checkpoint between the call and the callee computation merge.
549   // 3. The lazy deoptimization frame state.
550   //
551   // This corresponds to the most common pattern, where the function is
552   // called with only local variables or constants as arguments.
553   //
554   // To check the uses, we first collect all the occurrences of callee in 1, 2
555   // and 3, and then we check that all uses of callee are in the collected
556   // occurrences. If there is an unaccounted use, we do not try to rewire
557   // the control flow.
558   //
559   // Note: With CFG, this would be much easier and more robust - we would just
560   // duplicate all the nodes between the merge and the call, replacing all
561   // occurrences of the {callee} phi with the appropriate constant.
562 
563   // First compute the set of uses that are only reachable from 2 and 3.
564   const size_t kMaxUses = 8;
565   NodeAndIndex replaceable_uses[kMaxUses];
566   size_t replaceable_uses_count = 0;
567 
568   // Collect the uses to check case 2.
569   Node* checkpoint_state = nullptr;
570   if (checkpoint) {
571     checkpoint_state = checkpoint->InputAt(0);
572     if (!CollectFrameStateUniqueUses(callee, FrameState{checkpoint_state},
573                                      replaceable_uses, &replaceable_uses_count,
574                                      kMaxUses)) {
575       return false;
576     }
577   }
578 
579   // Collect the uses to check case 3.
580   FrameState frame_state{NodeProperties::GetFrameStateInput(node)};
581   if (!CollectFrameStateUniqueUses(callee, frame_state, replaceable_uses,
582                                    &replaceable_uses_count, kMaxUses)) {
583     return false;
584   }
585 
586   // Bail out if there is a use of {callee} that is not reachable from 1, 2
587   // and 3.
588   for (Edge edge : callee->use_edges()) {
589     // Case 1 (use by the call as a target).
590     if (edge.from() == node && edge.index() == 0) continue;
591     // Case 2 and 3 - used in checkpoint and/or lazy deopt frame states.
592     bool found = false;
593     for (size_t i = 0; i < replaceable_uses_count; i++) {
594       if (replaceable_uses[i].node == edge.from() &&
595           replaceable_uses[i].index == edge.index()) {
596         found = true;
597         break;
598       }
599     }
600     if (!found) return false;
601   }
602 
603   // Clone the call and the framestate, including the uniquely reachable
604   // state values, making sure that we replace the phi with the constant.
605   for (int i = 0; i < num_calls; ++i) {
606     // Clone the calls for each branch.
607     // We need to specialize the calls to the correct target, effect, and
608     // control. We also need to duplicate the checkpoint and the lazy
609     // frame state, and change all the uses of the callee to the constant
610     // callee.
611     Node* target = callee->InputAt(i);
612     Node* effect_phi_effect = effect_phi->InputAt(i);
613     Node* control = merge->InputAt(i);
614 
615     if (checkpoint) {
616       // Duplicate the checkpoint.
617       FrameState new_checkpoint_state = DuplicateFrameStateAndRename(
618           FrameState{checkpoint_state}, callee, target,
619           (i == num_calls - 1) ? kChangeInPlace : kCloneState);
620       effect_phi_effect = graph()->NewNode(
621           checkpoint->op(), new_checkpoint_state, effect_phi_effect, control);
622     }
623 
624     // Duplicate the call.
625     FrameState new_lazy_frame_state = DuplicateFrameStateAndRename(
626         frame_state, callee, target,
627         (i == num_calls - 1) ? kChangeInPlace : kCloneState);
628     inputs[0] = target;
629     inputs[input_count - 3] = new_lazy_frame_state;
630     inputs[input_count - 2] = effect_phi_effect;
631     inputs[input_count - 1] = control;
632     calls[i] = if_successes[i] =
633         graph()->NewNode(node->op(), input_count, inputs);
634   }
635 
636   // Mark the control inputs dead, so that we can kill the merge.
637   node->ReplaceInput(input_count - 1, jsgraph()->Dead());
638   callee->ReplaceInput(num_calls, jsgraph()->Dead());
639   effect_phi->ReplaceInput(num_calls, jsgraph()->Dead());
640   if (checkpoint) {
641     checkpoint->ReplaceInput(2, jsgraph()->Dead());
642   }
643 
644   merge->Kill();
645   return true;
646 }
647 
CreateOrReuseDispatch(Node * node,Node * callee,Candidate const & candidate,Node ** if_successes,Node ** calls,Node ** inputs,int input_count)648 void JSInliningHeuristic::CreateOrReuseDispatch(Node* node, Node* callee,
649                                                 Candidate const& candidate,
650                                                 Node** if_successes,
651                                                 Node** calls, Node** inputs,
652                                                 int input_count) {
653   SourcePositionTable::Scope position(
654       source_positions_, source_positions_->GetSourcePosition(node));
655   if (TryReuseDispatch(node, callee, if_successes, calls, inputs,
656                        input_count)) {
657     return;
658   }
659 
660   STATIC_ASSERT(JSCallOrConstructNode::kHaveIdenticalLayouts);
661 
662   Node* fallthrough_control = NodeProperties::GetControlInput(node);
663   int const num_calls = candidate.num_functions;
664 
665   // Create the appropriate control flow to dispatch to the cloned calls.
666   for (int i = 0; i < num_calls; ++i) {
667     // TODO(2206): Make comparison be based on underlying SharedFunctionInfo
668     // instead of the target JSFunction reference directly.
669     Node* target = jsgraph()->Constant(candidate.functions[i].value());
670     if (i != (num_calls - 1)) {
671       Node* check =
672           graph()->NewNode(simplified()->ReferenceEqual(), callee, target);
673       Node* branch =
674           graph()->NewNode(common()->Branch(), check, fallthrough_control);
675       fallthrough_control = graph()->NewNode(common()->IfFalse(), branch);
676       if_successes[i] = graph()->NewNode(common()->IfTrue(), branch);
677     } else {
678       if_successes[i] = fallthrough_control;
679     }
680 
681     // Clone the calls for each branch.
682     // The first input to the call is the actual target (which we specialize
683     // to the known {target}); the last input is the control dependency.
684     // We also specialize the new.target of JSConstruct {node}s if it refers
685     // to the same node as the {node}'s target input, so that we can later
686     // properly inline the JSCreate operations.
687     if (node->opcode() == IrOpcode::kJSConstruct) {
688       // TODO(jgruber, v8:10675): This branch seems unreachable.
689       JSConstructNode n(node);
690       if (inputs[n.TargetIndex()] == inputs[n.NewTargetIndex()]) {
691         inputs[n.NewTargetIndex()] = target;
692       }
693     }
694     inputs[JSCallOrConstructNode::TargetIndex()] = target;
695     inputs[input_count - 1] = if_successes[i];
696     calls[i] = if_successes[i] =
697         graph()->NewNode(node->op(), input_count, inputs);
698   }
699 }
700 
InlineCandidate(Candidate const & candidate,bool small_function)701 Reduction JSInliningHeuristic::InlineCandidate(Candidate const& candidate,
702                                                bool small_function) {
703   int const num_calls = candidate.num_functions;
704   Node* const node = candidate.node;
705 #if V8_ENABLE_WEBASSEMBLY
706   DCHECK_NE(node->opcode(), IrOpcode::kJSWasmCall);
707 #endif  // V8_ENABLE_WEBASSEMBLY
708   if (num_calls == 1) {
709     Reduction const reduction = inliner_.ReduceJSCall(node);
710     if (reduction.Changed()) {
711       total_inlined_bytecode_size_ += candidate.bytecode[0].value().length();
712     }
713     return reduction;
714   }
715 
716   // Expand the JSCall/JSConstruct node to a subgraph first if
717   // we have multiple known target functions.
718   DCHECK_LT(1, num_calls);
719   Node* calls[kMaxCallPolymorphism + 1];
720   Node* if_successes[kMaxCallPolymorphism];
721   Node* callee = NodeProperties::GetValueInput(node, 0);
722 
723   // Setup the inputs for the cloned call nodes.
724   int const input_count = node->InputCount();
725   Node** inputs = graph()->zone()->NewArray<Node*>(input_count);
726   for (int i = 0; i < input_count; ++i) {
727     inputs[i] = node->InputAt(i);
728   }
729 
730   // Create the appropriate control flow to dispatch to the cloned calls.
731   CreateOrReuseDispatch(node, callee, candidate, if_successes, calls, inputs,
732                         input_count);
733 
734   // Check if we have an exception projection for the call {node}.
735   Node* if_exception = nullptr;
736   if (NodeProperties::IsExceptionalCall(node, &if_exception)) {
737     Node* if_exceptions[kMaxCallPolymorphism + 1];
738     for (int i = 0; i < num_calls; ++i) {
739       if_successes[i] = graph()->NewNode(common()->IfSuccess(), calls[i]);
740       if_exceptions[i] =
741           graph()->NewNode(common()->IfException(), calls[i], calls[i]);
742     }
743 
744     // Morph the {if_exception} projection into a join.
745     Node* exception_control =
746         graph()->NewNode(common()->Merge(num_calls), num_calls, if_exceptions);
747     if_exceptions[num_calls] = exception_control;
748     Node* exception_effect = graph()->NewNode(common()->EffectPhi(num_calls),
749                                               num_calls + 1, if_exceptions);
750     Node* exception_value = graph()->NewNode(
751         common()->Phi(MachineRepresentation::kTagged, num_calls), num_calls + 1,
752         if_exceptions);
753     ReplaceWithValue(if_exception, exception_value, exception_effect,
754                      exception_control);
755   }
756 
757   // Morph the original call site into a join of the dispatched call sites.
758   Node* control =
759       graph()->NewNode(common()->Merge(num_calls), num_calls, if_successes);
760   calls[num_calls] = control;
761   Node* effect =
762       graph()->NewNode(common()->EffectPhi(num_calls), num_calls + 1, calls);
763   Node* value =
764       graph()->NewNode(common()->Phi(MachineRepresentation::kTagged, num_calls),
765                        num_calls + 1, calls);
766   ReplaceWithValue(node, value, effect, control);
767 
768   // Inline the individual, cloned call sites.
769   for (int i = 0; i < num_calls && total_inlined_bytecode_size_ <
770                                        max_inlined_bytecode_size_absolute_;
771        ++i) {
772     if (candidate.can_inline_function[i] &&
773         (small_function || total_inlined_bytecode_size_ <
774                                max_inlined_bytecode_size_cumulative_)) {
775       Node* call = calls[i];
776       Reduction const reduction = inliner_.ReduceJSCall(call);
777       if (reduction.Changed()) {
778         total_inlined_bytecode_size_ += candidate.bytecode[i]->length();
779         // Killing the call node is not strictly necessary, but it is safer to
780         // make sure we do not resurrect the node.
781         call->Kill();
782       }
783     }
784   }
785 
786   return Replace(value);
787 }
788 
operator ()(const Candidate & left,const Candidate & right) const789 bool JSInliningHeuristic::CandidateCompare::operator()(
790     const Candidate& left, const Candidate& right) const {
791   if (right.frequency.IsUnknown()) {
792     if (left.frequency.IsUnknown()) {
793       // If left and right are both unknown then the ordering is indeterminate,
794       // which breaks strict weak ordering requirements, so we fall back to the
795       // node id as a tie breaker.
796       return left.node->id() > right.node->id();
797     }
798     return true;
799   } else if (left.frequency.IsUnknown()) {
800     return false;
801   } else if (left.frequency.value() > right.frequency.value()) {
802     return true;
803   } else if (left.frequency.value() < right.frequency.value()) {
804     return false;
805   } else {
806     return left.node->id() > right.node->id();
807   }
808 }
809 
PrintCandidates()810 void JSInliningHeuristic::PrintCandidates() {
811   StdoutStream os;
812   os << candidates_.size() << " candidate(s) for inlining:" << std::endl;
813   for (const Candidate& candidate : candidates_) {
814     os << "- candidate: " << candidate.node->op()->mnemonic() << " node #"
815        << candidate.node->id() << " with frequency " << candidate.frequency
816        << ", " << candidate.num_functions << " target(s):" << std::endl;
817     for (int i = 0; i < candidate.num_functions; ++i) {
818       SharedFunctionInfoRef shared = candidate.functions[i].has_value()
819                                          ? candidate.functions[i]->shared()
820                                          : candidate.shared_info.value();
821       os << "  - target: " << shared;
822       if (candidate.bytecode[i].has_value()) {
823         os << ", bytecode size: " << candidate.bytecode[i]->length();
824         if (candidate.functions[i].has_value()) {
825           JSFunctionRef function = candidate.functions[i].value();
826           unsigned inlined_bytecode_size =
827               function.code().GetInlinedBytecodeSize();
828           if (inlined_bytecode_size > 0) {
829             os << ", existing opt code's inlined bytecode size: "
830                << inlined_bytecode_size;
831           }
832         }
833       } else {
834         os << ", no bytecode";
835       }
836       os << std::endl;
837     }
838   }
839 }
840 
graph() const841 Graph* JSInliningHeuristic::graph() const { return jsgraph()->graph(); }
842 
dependencies() const843 CompilationDependencies* JSInliningHeuristic::dependencies() const {
844   return broker()->dependencies();
845 }
846 
common() const847 CommonOperatorBuilder* JSInliningHeuristic::common() const {
848   return jsgraph()->common();
849 }
850 
simplified() const851 SimplifiedOperatorBuilder* JSInliningHeuristic::simplified() const {
852   return jsgraph()->simplified();
853 }
854 
855 #undef TRACE
856 
857 }  // namespace compiler
858 }  // namespace internal
859 }  // namespace v8
860