• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/compiler/js-inlining-heuristic.h"
6 
7 #include "src/codegen/optimized-compilation-info.h"
8 #include "src/compiler/common-operator.h"
9 #include "src/compiler/compiler-source-position-table.h"
10 #include "src/compiler/js-heap-broker.h"
11 #include "src/compiler/node-matchers.h"
12 #include "src/compiler/simplified-operator.h"
13 #include "src/objects/objects-inl.h"
14 
15 namespace v8 {
16 namespace internal {
17 namespace compiler {
18 
19 #define TRACE(...)                                                             \
20   do {                                                                         \
21     if (FLAG_trace_turbo_inlining) StdoutStream{} << __VA_ARGS__ << std::endl; \
22   } while (false)
23 
24 namespace {
IsSmall(int const size)25 bool IsSmall(int const size) {
26   return size <= FLAG_max_inlined_bytecode_size_small;
27 }
28 
CanConsiderForInlining(JSHeapBroker * broker,SharedFunctionInfoRef const & shared,FeedbackVectorRef const & feedback_vector)29 bool CanConsiderForInlining(JSHeapBroker* broker,
30                             SharedFunctionInfoRef const& shared,
31                             FeedbackVectorRef const& feedback_vector) {
32   SharedFunctionInfo::Inlineability inlineability = shared.GetInlineability();
33   if (inlineability != SharedFunctionInfo::kIsInlineable) {
34     TRACE("Cannot consider "
35           << shared << " for inlining (reason: " << inlineability << ")");
36     return false;
37   }
38 
39   DCHECK(shared.HasBytecodeArray());
40   if (!broker->IsSerializedForCompilation(shared, feedback_vector)) {
41     TRACE_BROKER_MISSING(
42         broker, "data for " << shared << " (not serialized for compilation)");
43     TRACE("Cannot consider " << shared << " for inlining with "
44                              << feedback_vector << " (missing data)");
45     return false;
46   }
47 
48   TRACE("Considering " << shared << " for inlining with " << feedback_vector);
49   return true;
50 }
51 
CanConsiderForInlining(JSHeapBroker * broker,JSFunctionRef const & function)52 bool CanConsiderForInlining(JSHeapBroker* broker,
53                             JSFunctionRef const& function) {
54   if (!function.has_feedback_vector()) {
55     TRACE("Cannot consider " << function
56                              << " for inlining (no feedback vector)");
57     return false;
58   }
59 
60   if (!function.serialized()) {
61     TRACE_BROKER_MISSING(
62         broker, "data for " << function << " (cannot consider for inlining)");
63     TRACE("Cannot consider " << function << " for inlining (missing data)");
64     return false;
65   }
66   return CanConsiderForInlining(broker, function.shared(),
67                                 function.feedback_vector());
68 }
69 
70 }  // namespace
71 
CollectFunctions(Node * node,int functions_size)72 JSInliningHeuristic::Candidate JSInliningHeuristic::CollectFunctions(
73     Node* node, int functions_size) {
74   DCHECK_NE(0, functions_size);
75   Node* callee = node->InputAt(0);
76   Candidate out;
77   out.node = node;
78 
79   HeapObjectMatcher m(callee);
80   if (m.HasResolvedValue() && m.Ref(broker()).IsJSFunction()) {
81     out.functions[0] = m.Ref(broker()).AsJSFunction();
82     JSFunctionRef function = out.functions[0].value();
83     if (CanConsiderForInlining(broker(), function)) {
84       out.bytecode[0] = function.shared().GetBytecodeArray();
85       out.num_functions = 1;
86       return out;
87     }
88   }
89   if (m.IsPhi()) {
90     int const value_input_count = m.node()->op()->ValueInputCount();
91     if (value_input_count > functions_size) {
92       out.num_functions = 0;
93       return out;
94     }
95     for (int n = 0; n < value_input_count; ++n) {
96       HeapObjectMatcher m(callee->InputAt(n));
97       if (!m.HasResolvedValue() || !m.Ref(broker()).IsJSFunction()) {
98         out.num_functions = 0;
99         return out;
100       }
101 
102       out.functions[n] = m.Ref(broker()).AsJSFunction();
103       JSFunctionRef function = out.functions[n].value();
104       if (CanConsiderForInlining(broker(), function)) {
105         out.bytecode[n] = function.shared().GetBytecodeArray();
106       }
107     }
108     out.num_functions = value_input_count;
109     return out;
110   }
111   if (m.IsCheckClosure()) {
112     DCHECK(!out.functions[0].has_value());
113     FeedbackCellRef feedback_cell(broker(), FeedbackCellOf(m.op()));
114     SharedFunctionInfoRef shared_info =
115         feedback_cell.shared_function_info().value();
116     out.shared_info = shared_info;
117     if (feedback_cell.value().IsFeedbackVector() &&
118         CanConsiderForInlining(broker(), shared_info,
119                                feedback_cell.value().AsFeedbackVector())) {
120       out.bytecode[0] = shared_info.GetBytecodeArray();
121     }
122     out.num_functions = 1;
123     return out;
124   }
125   if (m.IsJSCreateClosure()) {
126     DCHECK(!out.functions[0].has_value());
127     JSCreateClosureNode n(callee);
128     CreateClosureParameters const& p = n.Parameters();
129     FeedbackCellRef feedback_cell = n.GetFeedbackCellRefChecked(broker());
130     SharedFunctionInfoRef shared_info(broker(), p.shared_info());
131     out.shared_info = shared_info;
132     if (feedback_cell.value().IsFeedbackVector() &&
133         CanConsiderForInlining(broker(), shared_info,
134                                feedback_cell.value().AsFeedbackVector())) {
135       out.bytecode[0] = shared_info.GetBytecodeArray();
136     }
137     out.num_functions = 1;
138     return out;
139   }
140   out.num_functions = 0;
141   return out;
142 }
143 
Reduce(Node * node)144 Reduction JSInliningHeuristic::Reduce(Node* node) {
145   DisallowHeapAccessIf no_heap_acess(broker()->is_concurrent_inlining());
146 
147   if (!IrOpcode::IsInlineeOpcode(node->opcode())) return NoChange();
148 
149   if (total_inlined_bytecode_size_ >= FLAG_max_inlined_bytecode_size_absolute) {
150     return NoChange();
151   }
152 
153   // Check if we already saw that {node} before, and if so, just skip it.
154   if (seen_.find(node->id()) != seen_.end()) return NoChange();
155   seen_.insert(node->id());
156 
157   // Check if the {node} is an appropriate candidate for inlining.
158   Candidate candidate = CollectFunctions(node, kMaxCallPolymorphism);
159   if (candidate.num_functions == 0) {
160     return NoChange();
161   } else if (candidate.num_functions > 1 && !FLAG_polymorphic_inlining) {
162     TRACE("Not considering call site #"
163           << node->id() << ":" << node->op()->mnemonic()
164           << ", because polymorphic inlining is disabled");
165     return NoChange();
166   }
167 
168   bool can_inline_candidate = false, candidate_is_small = true;
169   candidate.total_size = 0;
170   Node* frame_state = NodeProperties::GetFrameStateInput(node);
171   FrameStateInfo const& frame_info = FrameStateInfoOf(frame_state->op());
172   Handle<SharedFunctionInfo> frame_shared_info;
173   for (int i = 0; i < candidate.num_functions; ++i) {
174     if (!candidate.bytecode[i].has_value()) {
175       candidate.can_inline_function[i] = false;
176       continue;
177     }
178 
179     SharedFunctionInfoRef shared = candidate.functions[i].has_value()
180                                        ? candidate.functions[i].value().shared()
181                                        : candidate.shared_info.value();
182     candidate.can_inline_function[i] = candidate.bytecode[i].has_value();
183     CHECK_IMPLIES(candidate.can_inline_function[i], shared.IsInlineable());
184     // Do not allow direct recursion i.e. f() -> f(). We still allow indirect
185     // recurion like f() -> g() -> f(). The indirect recursion is helpful in
186     // cases where f() is a small dispatch function that calls the appropriate
187     // function. In the case of direct recursion, we only have some static
188     // information for the first level of inlining and it may not be that useful
189     // to just inline one level in recursive calls. In some cases like tail
190     // recursion we may benefit from recursive inlining, if we have additional
191     // analysis that converts them to iterative implementations. Though it is
192     // not obvious if such an anlysis is needed.
193     if (frame_info.shared_info().ToHandle(&frame_shared_info) &&
194         frame_shared_info.equals(shared.object())) {
195       TRACE("Not considering call site #" << node->id() << ":"
196                                           << node->op()->mnemonic()
197                                           << ", because of recursive inlining");
198       candidate.can_inline_function[i] = false;
199     }
200     if (candidate.can_inline_function[i]) {
201       can_inline_candidate = true;
202       BytecodeArrayRef bytecode = candidate.bytecode[i].value();
203       candidate.total_size += bytecode.length();
204       unsigned inlined_bytecode_size = 0;
205       if (candidate.functions[i].has_value()) {
206         JSFunctionRef function = candidate.functions[i].value();
207         if (function.HasAttachedOptimizedCode()) {
208           inlined_bytecode_size = function.code().inlined_bytecode_size();
209           candidate.total_size += inlined_bytecode_size;
210         }
211       }
212       candidate_is_small = candidate_is_small &&
213                            IsSmall(bytecode.length() + inlined_bytecode_size);
214     }
215   }
216   if (!can_inline_candidate) return NoChange();
217 
218   // Gather feedback on how often this call site has been hit before.
219   if (node->opcode() == IrOpcode::kJSCall) {
220     CallParameters const p = CallParametersOf(node->op());
221     candidate.frequency = p.frequency();
222   } else {
223     ConstructParameters const p = ConstructParametersOf(node->op());
224     candidate.frequency = p.frequency();
225   }
226 
227   // Don't consider a {candidate} whose frequency is below the
228   // threshold, i.e. a call site that is only hit once every N
229   // invocations of the caller.
230   if (candidate.frequency.IsKnown() &&
231       candidate.frequency.value() < FLAG_min_inlining_frequency) {
232     return NoChange();
233   }
234 
235   // Forcibly inline small functions here. In the case of polymorphic inlining
236   // candidate_is_small is set only when all functions are small.
237   if (candidate_is_small) {
238     TRACE("Inlining small function(s) at call site #"
239           << node->id() << ":" << node->op()->mnemonic());
240     return InlineCandidate(candidate, true);
241   }
242 
243   // In the general case we remember the candidate for later.
244   candidates_.insert(candidate);
245   return NoChange();
246 }
247 
Finalize()248 void JSInliningHeuristic::Finalize() {
249   DisallowHeapAccessIf no_heap_acess(broker()->is_concurrent_inlining());
250 
251   if (candidates_.empty()) return;  // Nothing to do without candidates.
252   if (FLAG_trace_turbo_inlining) PrintCandidates();
253 
254   // We inline at most one candidate in every iteration of the fixpoint.
255   // This is to ensure that we don't consume the full inlining budget
256   // on things that aren't called very often.
257   // TODO(bmeurer): Use std::priority_queue instead of std::set here.
258   while (!candidates_.empty()) {
259     auto i = candidates_.begin();
260     Candidate candidate = *i;
261     candidates_.erase(i);
262 
263     // Ignore this candidate if it's no longer valid.
264     if (!IrOpcode::IsInlineeOpcode(candidate.node->opcode())) continue;
265     if (candidate.node->IsDead()) continue;
266 
267     // Make sure we have some extra budget left, so that any small functions
268     // exposed by this function would be given a chance to inline.
269     double size_of_candidate =
270         candidate.total_size * FLAG_reserve_inline_budget_scale_factor;
271     int total_size =
272         total_inlined_bytecode_size_ + static_cast<int>(size_of_candidate);
273     if (total_size > FLAG_max_inlined_bytecode_size_cumulative) {
274       // Try if any smaller functions are available to inline.
275       continue;
276     }
277 
278     Reduction const reduction = InlineCandidate(candidate, false);
279     if (reduction.Changed()) return;
280   }
281 }
282 
283 namespace {
284 
285 struct NodeAndIndex {
286   Node* node;
287   int index;
288 };
289 
CollectStateValuesOwnedUses(Node * node,Node * state_values,NodeAndIndex * uses_buffer,size_t * use_count,size_t max_uses)290 bool CollectStateValuesOwnedUses(Node* node, Node* state_values,
291                                  NodeAndIndex* uses_buffer, size_t* use_count,
292                                  size_t max_uses) {
293   // Only accumulate states that are not shared with other users.
294   if (state_values->UseCount() > 1) return true;
295   for (int i = 0; i < state_values->InputCount(); i++) {
296     Node* input = state_values->InputAt(i);
297     if (input->opcode() == IrOpcode::kStateValues) {
298       if (!CollectStateValuesOwnedUses(node, input, uses_buffer, use_count,
299                                        max_uses)) {
300         return false;
301       }
302     } else if (input == node) {
303       if (*use_count >= max_uses) return false;
304       uses_buffer[*use_count] = {state_values, i};
305       (*use_count)++;
306     }
307   }
308   return true;
309 }
310 
311 }  // namespace
312 
DuplicateStateValuesAndRename(Node * state_values,Node * from,Node * to,StateCloneMode mode)313 Node* JSInliningHeuristic::DuplicateStateValuesAndRename(Node* state_values,
314                                                          Node* from, Node* to,
315                                                          StateCloneMode mode) {
316   // Only rename in states that are not shared with other users. This needs to
317   // be in sync with the condition in {CollectStateValuesOwnedUses}.
318   if (state_values->UseCount() > 1) return state_values;
319   Node* copy = mode == kChangeInPlace ? state_values : nullptr;
320   for (int i = 0; i < state_values->InputCount(); i++) {
321     Node* input = state_values->InputAt(i);
322     Node* processed;
323     if (input->opcode() == IrOpcode::kStateValues) {
324       processed = DuplicateStateValuesAndRename(input, from, to, mode);
325     } else if (input == from) {
326       processed = to;
327     } else {
328       processed = input;
329     }
330     if (processed != input) {
331       if (!copy) {
332         copy = graph()->CloneNode(state_values);
333       }
334       copy->ReplaceInput(i, processed);
335     }
336   }
337   return copy ? copy : state_values;
338 }
339 
340 namespace {
341 
CollectFrameStateUniqueUses(Node * node,Node * frame_state,NodeAndIndex * uses_buffer,size_t * use_count,size_t max_uses)342 bool CollectFrameStateUniqueUses(Node* node, Node* frame_state,
343                                  NodeAndIndex* uses_buffer, size_t* use_count,
344                                  size_t max_uses) {
345   // Only accumulate states that are not shared with other users.
346   if (frame_state->UseCount() > 1) return true;
347   if (frame_state->InputAt(kFrameStateStackInput) == node) {
348     if (*use_count >= max_uses) return false;
349     uses_buffer[*use_count] = {frame_state, kFrameStateStackInput};
350     (*use_count)++;
351   }
352   if (!CollectStateValuesOwnedUses(node,
353                                    frame_state->InputAt(kFrameStateLocalsInput),
354                                    uses_buffer, use_count, max_uses)) {
355     return false;
356   }
357   return true;
358 }
359 
360 }  // namespace
361 
DuplicateFrameStateAndRename(Node * frame_state,Node * from,Node * to,StateCloneMode mode)362 Node* JSInliningHeuristic::DuplicateFrameStateAndRename(Node* frame_state,
363                                                         Node* from, Node* to,
364                                                         StateCloneMode mode) {
365   // Only rename in states that are not shared with other users. This needs to
366   // be in sync with the condition in {DuplicateFrameStateAndRename}.
367   if (frame_state->UseCount() > 1) return frame_state;
368   Node* copy = mode == kChangeInPlace ? frame_state : nullptr;
369   if (frame_state->InputAt(kFrameStateStackInput) == from) {
370     if (!copy) {
371       copy = graph()->CloneNode(frame_state);
372     }
373     copy->ReplaceInput(kFrameStateStackInput, to);
374   }
375   Node* locals = frame_state->InputAt(kFrameStateLocalsInput);
376   Node* new_locals = DuplicateStateValuesAndRename(locals, from, to, mode);
377   if (new_locals != locals) {
378     if (!copy) {
379       copy = graph()->CloneNode(frame_state);
380     }
381     copy->ReplaceInput(kFrameStateLocalsInput, new_locals);
382   }
383   return copy ? copy : frame_state;
384 }
385 
TryReuseDispatch(Node * node,Node * callee,Node ** if_successes,Node ** calls,Node ** inputs,int input_count)386 bool JSInliningHeuristic::TryReuseDispatch(Node* node, Node* callee,
387                                            Node** if_successes, Node** calls,
388                                            Node** inputs, int input_count) {
389   // We will try to reuse the control flow branch created for computing
390   // the {callee} target of the call. We only reuse the branch if there
391   // is no side-effect between the call and the branch, and if the callee is
392   // only used as the target (and possibly also in the related frame states).
393 
394   // We are trying to match the following pattern:
395   //
396   //         C1     C2
397   //          .     .
398   //          |     |
399   //         Merge(merge)  <-----------------+
400   //           ^    ^                        |
401   //  V1  V2   |    |         E1  E2         |
402   //   .  .    |    +----+     .  .          |
403   //   |  |    |         |     |  |          |
404   //  Phi(callee)      EffectPhi(effect_phi) |
405   //     ^                    ^              |
406   //     |                    |              |
407   //     +----+               |              |
408   //     |    |               |              |
409   //     |   StateValues      |              |
410   //     |       ^            |              |
411   //     +----+  |            |              |
412   //     |    |  |            |              |
413   //     |    FrameState      |              |
414   //     |           ^        |              |
415   //     |           |        |          +---+
416   //     |           |        |          |   |
417   //     +----+     Checkpoint(checkpoint)   |
418   //     |    |           ^                  |
419   //     |    StateValues |    +-------------+
420   //     |        |       |    |
421   //     +-----+  |       |    |
422   //     |     |  |       |    |
423   //     |     FrameState |    |
424   //     |             ^  |    |
425   //     +-----------+ |  |    |
426   //                  Call(node)
427   //                   |
428   //                   |
429   //                   .
430   //
431   // The {callee} here is a phi that merges the possible call targets, {node}
432   // is the actual call that we will try to duplicate and connect to the
433   // control that comes into {merge}. There can be a {checkpoint} between
434   // the call and the calle phi.
435   //
436   // The idea is to get rid of the merge, effect phi and phi, then duplicate
437   // the call (with all the frame states and such), and connect the duplicated
438   // calls and states directly to the inputs of the ex-phi, ex-effect-phi and
439   // ex-merge. The tricky part is to make sure that there is no interference
440   // from the outside. In particular, there should not be any unaccounted uses
441   // of the  phi, effect-phi and merge because we will remove them from
442   // the graph.
443   //
444   //     V1              E1   C1  V2   E2               C2
445   //     .                .    .  .    .                .
446   //     |                |    |  |    |                |
447   //     +----+           |    |  +----+                |
448   //     |    |           |    |  |    |                |
449   //     |   StateValues  |    |  |   StateValues       |
450   //     |       ^        |    |  |       ^             |
451   //     +----+  |        |    |  +----+  |             |
452   //     |    |  |        |    |  |    |  |             |
453   //     |    FrameState  |    |  |    FrameState       |
454   //     |           ^    |    |  |           ^         |
455   //     |           |    |    |  |           |         |
456   //     |           |    |    |  |           |         |
457   //     +----+     Checkpoint |  +----+     Checkpoint |
458   //     |    |           ^    |  |    |           ^    |
459   //     |    StateValues |    |  |    StateValues |    |
460   //     |        |       |    |  |        |       |    |
461   //     +-----+  |       |    |  +-----+  |       |    |
462   //     |     |  |       |    |  |     |  |       |    |
463   //     |     FrameState |    |  |     FrameState |    |
464   //     |              ^ |    |  |              ^ |    |
465   //     +-------------+| |    |  +-------------+| |    |
466   //                   Call----+                Call----+
467   //                     |                       |
468   //                     +-------+  +------------+
469   //                             |  |
470   //                             Merge
471   //                             EffectPhi
472   //                             Phi
473   //                              |
474   //                             ...
475 
476   // Bailout if the call is not polymorphic anymore (other reducers might
477   // have replaced the callee phi with a constant).
478   if (callee->opcode() != IrOpcode::kPhi) return false;
479   int const num_calls = callee->op()->ValueInputCount();
480 
481   // If there is a control node between the callee computation
482   // and the call, bail out.
483   Node* merge = NodeProperties::GetControlInput(callee);
484   if (NodeProperties::GetControlInput(node) != merge) return false;
485 
486   // If there is a non-checkpoint effect node between the callee computation
487   // and the call, bail out. We will drop any checkpoint between the call and
488   // the callee phi because the callee computation should have its own
489   // checkpoint that the call can fall back to.
490   Node* checkpoint = nullptr;
491   Node* effect = NodeProperties::GetEffectInput(node);
492   if (effect->opcode() == IrOpcode::kCheckpoint) {
493     checkpoint = effect;
494     if (NodeProperties::GetControlInput(checkpoint) != merge) return false;
495     effect = NodeProperties::GetEffectInput(effect);
496   }
497   if (effect->opcode() != IrOpcode::kEffectPhi) return false;
498   if (NodeProperties::GetControlInput(effect) != merge) return false;
499   Node* effect_phi = effect;
500 
501   // The effect phi, the callee, the call and the checkpoint must be the only
502   // users of the merge.
503   for (Node* merge_use : merge->uses()) {
504     if (merge_use != effect_phi && merge_use != callee && merge_use != node &&
505         merge_use != checkpoint) {
506       return false;
507     }
508   }
509 
510   // The effect phi must be only used by the checkpoint or the call.
511   for (Node* effect_phi_use : effect_phi->uses()) {
512     if (effect_phi_use != node && effect_phi_use != checkpoint) return false;
513   }
514 
515   // We must replace the callee phi with the appropriate constant in
516   // the entire subgraph reachable by inputs from the call (terminating
517   // at phis and merges). Since we do not want to walk (and later duplicate)
518   // the subgraph here, we limit the possible uses to this set:
519   //
520   // 1. In the call (as a target).
521   // 2. The checkpoint between the call and the callee computation merge.
522   // 3. The lazy deoptimization frame state.
523   //
524   // This corresponds to the most common pattern, where the function is
525   // called with only local variables or constants as arguments.
526   //
527   // To check the uses, we first collect all the occurrences of callee in 1, 2
528   // and 3, and then we check that all uses of callee are in the collected
529   // occurrences. If there is an unaccounted use, we do not try to rewire
530   // the control flow.
531   //
532   // Note: With CFG, this would be much easier and more robust - we would just
533   // duplicate all the nodes between the merge and the call, replacing all
534   // occurrences of the {callee} phi with the appropriate constant.
535 
536   // First compute the set of uses that are only reachable from 2 and 3.
537   const size_t kMaxUses = 8;
538   NodeAndIndex replaceable_uses[kMaxUses];
539   size_t replaceable_uses_count = 0;
540 
541   // Collect the uses to check case 2.
542   Node* checkpoint_state = nullptr;
543   if (checkpoint) {
544     checkpoint_state = checkpoint->InputAt(0);
545     if (!CollectFrameStateUniqueUses(callee, checkpoint_state, replaceable_uses,
546                                      &replaceable_uses_count, kMaxUses)) {
547       return false;
548     }
549   }
550 
551   // Collect the uses to check case 3.
552   Node* frame_state = NodeProperties::GetFrameStateInput(node);
553   if (!CollectFrameStateUniqueUses(callee, frame_state, replaceable_uses,
554                                    &replaceable_uses_count, kMaxUses)) {
555     return false;
556   }
557 
558   // Bail out if there is a use of {callee} that is not reachable from 1, 2
559   // and 3.
560   for (Edge edge : callee->use_edges()) {
561     // Case 1 (use by the call as a target).
562     if (edge.from() == node && edge.index() == 0) continue;
563     // Case 2 and 3 - used in checkpoint and/or lazy deopt frame states.
564     bool found = false;
565     for (size_t i = 0; i < replaceable_uses_count; i++) {
566       if (replaceable_uses[i].node == edge.from() &&
567           replaceable_uses[i].index == edge.index()) {
568         found = true;
569         break;
570       }
571     }
572     if (!found) return false;
573   }
574 
575   // Clone the call and the framestate, including the uniquely reachable
576   // state values, making sure that we replace the phi with the constant.
577   for (int i = 0; i < num_calls; ++i) {
578     // Clone the calls for each branch.
579     // We need to specialize the calls to the correct target, effect, and
580     // control. We also need to duplicate the checkpoint and the lazy
581     // frame state, and change all the uses of the callee to the constant
582     // callee.
583     Node* target = callee->InputAt(i);
584     Node* effect = effect_phi->InputAt(i);
585     Node* control = merge->InputAt(i);
586 
587     if (checkpoint) {
588       // Duplicate the checkpoint.
589       Node* new_checkpoint_state = DuplicateFrameStateAndRename(
590           checkpoint_state, callee, target,
591           (i == num_calls - 1) ? kChangeInPlace : kCloneState);
592       effect = graph()->NewNode(checkpoint->op(), new_checkpoint_state, effect,
593                                 control);
594     }
595 
596     // Duplicate the call.
597     Node* new_lazy_frame_state = DuplicateFrameStateAndRename(
598         frame_state, callee, target,
599         (i == num_calls - 1) ? kChangeInPlace : kCloneState);
600     inputs[0] = target;
601     inputs[input_count - 3] = new_lazy_frame_state;
602     inputs[input_count - 2] = effect;
603     inputs[input_count - 1] = control;
604     calls[i] = if_successes[i] =
605         graph()->NewNode(node->op(), input_count, inputs);
606   }
607 
608   // Mark the control inputs dead, so that we can kill the merge.
609   node->ReplaceInput(input_count - 1, jsgraph()->Dead());
610   callee->ReplaceInput(num_calls, jsgraph()->Dead());
611   effect_phi->ReplaceInput(num_calls, jsgraph()->Dead());
612   if (checkpoint) {
613     checkpoint->ReplaceInput(2, jsgraph()->Dead());
614   }
615 
616   merge->Kill();
617   return true;
618 }
619 
CreateOrReuseDispatch(Node * node,Node * callee,Candidate const & candidate,Node ** if_successes,Node ** calls,Node ** inputs,int input_count)620 void JSInliningHeuristic::CreateOrReuseDispatch(Node* node, Node* callee,
621                                                 Candidate const& candidate,
622                                                 Node** if_successes,
623                                                 Node** calls, Node** inputs,
624                                                 int input_count) {
625   SourcePositionTable::Scope position(
626       source_positions_, source_positions_->GetSourcePosition(node));
627   if (TryReuseDispatch(node, callee, if_successes, calls, inputs,
628                        input_count)) {
629     return;
630   }
631 
632   STATIC_ASSERT(JSCallOrConstructNode::kHaveIdenticalLayouts);
633 
634   Node* fallthrough_control = NodeProperties::GetControlInput(node);
635   int const num_calls = candidate.num_functions;
636 
637   // Create the appropriate control flow to dispatch to the cloned calls.
638   for (int i = 0; i < num_calls; ++i) {
639     // TODO(2206): Make comparison be based on underlying SharedFunctionInfo
640     // instead of the target JSFunction reference directly.
641     Node* target = jsgraph()->Constant(candidate.functions[i].value());
642     if (i != (num_calls - 1)) {
643       Node* check =
644           graph()->NewNode(simplified()->ReferenceEqual(), callee, target);
645       Node* branch =
646           graph()->NewNode(common()->Branch(), check, fallthrough_control);
647       fallthrough_control = graph()->NewNode(common()->IfFalse(), branch);
648       if_successes[i] = graph()->NewNode(common()->IfTrue(), branch);
649     } else {
650       if_successes[i] = fallthrough_control;
651     }
652 
653     // Clone the calls for each branch.
654     // The first input to the call is the actual target (which we specialize
655     // to the known {target}); the last input is the control dependency.
656     // We also specialize the new.target of JSConstruct {node}s if it refers
657     // to the same node as the {node}'s target input, so that we can later
658     // properly inline the JSCreate operations.
659     if (node->opcode() == IrOpcode::kJSConstruct) {
660       // TODO(jgruber, v8:10675): This branch seems unreachable.
661       JSConstructNode n(node);
662       if (inputs[n.TargetIndex()] == inputs[n.NewTargetIndex()]) {
663         inputs[n.NewTargetIndex()] = target;
664       }
665     }
666     inputs[JSCallOrConstructNode::TargetIndex()] = target;
667     inputs[input_count - 1] = if_successes[i];
668     calls[i] = if_successes[i] =
669         graph()->NewNode(node->op(), input_count, inputs);
670   }
671 }
672 
InlineCandidate(Candidate const & candidate,bool small_function)673 Reduction JSInliningHeuristic::InlineCandidate(Candidate const& candidate,
674                                                bool small_function) {
675   int const num_calls = candidate.num_functions;
676   Node* const node = candidate.node;
677   if (num_calls == 1) {
678     Reduction const reduction = inliner_.ReduceJSCall(node);
679     if (reduction.Changed()) {
680       total_inlined_bytecode_size_ += candidate.bytecode[0].value().length();
681     }
682     return reduction;
683   }
684 
685   // Expand the JSCall/JSConstruct node to a subgraph first if
686   // we have multiple known target functions.
687   DCHECK_LT(1, num_calls);
688   Node* calls[kMaxCallPolymorphism + 1];
689   Node* if_successes[kMaxCallPolymorphism];
690   Node* callee = NodeProperties::GetValueInput(node, 0);
691 
692   // Setup the inputs for the cloned call nodes.
693   int const input_count = node->InputCount();
694   Node** inputs = graph()->zone()->NewArray<Node*>(input_count);
695   for (int i = 0; i < input_count; ++i) {
696     inputs[i] = node->InputAt(i);
697   }
698 
699   // Create the appropriate control flow to dispatch to the cloned calls.
700   CreateOrReuseDispatch(node, callee, candidate, if_successes, calls, inputs,
701                         input_count);
702 
703   // Check if we have an exception projection for the call {node}.
704   Node* if_exception = nullptr;
705   if (NodeProperties::IsExceptionalCall(node, &if_exception)) {
706     Node* if_exceptions[kMaxCallPolymorphism + 1];
707     for (int i = 0; i < num_calls; ++i) {
708       if_successes[i] = graph()->NewNode(common()->IfSuccess(), calls[i]);
709       if_exceptions[i] =
710           graph()->NewNode(common()->IfException(), calls[i], calls[i]);
711     }
712 
713     // Morph the {if_exception} projection into a join.
714     Node* exception_control =
715         graph()->NewNode(common()->Merge(num_calls), num_calls, if_exceptions);
716     if_exceptions[num_calls] = exception_control;
717     Node* exception_effect = graph()->NewNode(common()->EffectPhi(num_calls),
718                                               num_calls + 1, if_exceptions);
719     Node* exception_value = graph()->NewNode(
720         common()->Phi(MachineRepresentation::kTagged, num_calls), num_calls + 1,
721         if_exceptions);
722     ReplaceWithValue(if_exception, exception_value, exception_effect,
723                      exception_control);
724   }
725 
726   // Morph the original call site into a join of the dispatched call sites.
727   Node* control =
728       graph()->NewNode(common()->Merge(num_calls), num_calls, if_successes);
729   calls[num_calls] = control;
730   Node* effect =
731       graph()->NewNode(common()->EffectPhi(num_calls), num_calls + 1, calls);
732   Node* value =
733       graph()->NewNode(common()->Phi(MachineRepresentation::kTagged, num_calls),
734                        num_calls + 1, calls);
735   ReplaceWithValue(node, value, effect, control);
736 
737   // Inline the individual, cloned call sites.
738   for (int i = 0; i < num_calls && total_inlined_bytecode_size_ <
739                                        FLAG_max_inlined_bytecode_size_absolute;
740        ++i) {
741     if (candidate.can_inline_function[i] &&
742         (small_function || total_inlined_bytecode_size_ <
743                                FLAG_max_inlined_bytecode_size_cumulative)) {
744       Node* node = calls[i];
745       Reduction const reduction = inliner_.ReduceJSCall(node);
746       if (reduction.Changed()) {
747         total_inlined_bytecode_size_ += candidate.bytecode[i]->length();
748         // Killing the call node is not strictly necessary, but it is safer to
749         // make sure we do not resurrect the node.
750         node->Kill();
751       }
752     }
753   }
754 
755   return Replace(value);
756 }
757 
operator ()(const Candidate & left,const Candidate & right) const758 bool JSInliningHeuristic::CandidateCompare::operator()(
759     const Candidate& left, const Candidate& right) const {
760   if (right.frequency.IsUnknown()) {
761     if (left.frequency.IsUnknown()) {
762       // If left and right are both unknown then the ordering is indeterminate,
763       // which breaks strict weak ordering requirements, so we fall back to the
764       // node id as a tie breaker.
765       return left.node->id() > right.node->id();
766     }
767     return true;
768   } else if (left.frequency.IsUnknown()) {
769     return false;
770   } else if (left.frequency.value() > right.frequency.value()) {
771     return true;
772   } else if (left.frequency.value() < right.frequency.value()) {
773     return false;
774   } else {
775     return left.node->id() > right.node->id();
776   }
777 }
778 
PrintCandidates()779 void JSInliningHeuristic::PrintCandidates() {
780   StdoutStream os;
781   os << candidates_.size() << " candidate(s) for inlining:" << std::endl;
782   for (const Candidate& candidate : candidates_) {
783     os << "- candidate: " << candidate.node->op()->mnemonic() << " node #"
784        << candidate.node->id() << " with frequency " << candidate.frequency
785        << ", " << candidate.num_functions << " target(s):" << std::endl;
786     for (int i = 0; i < candidate.num_functions; ++i) {
787       SharedFunctionInfoRef shared = candidate.functions[i].has_value()
788                                          ? candidate.functions[i]->shared()
789                                          : candidate.shared_info.value();
790       os << "  - target: " << shared;
791       if (candidate.bytecode[i].has_value()) {
792         os << ", bytecode size: " << candidate.bytecode[i]->length();
793         if (candidate.functions[i].has_value()) {
794           JSFunctionRef function = candidate.functions[i].value();
795           if (function.HasAttachedOptimizedCode()) {
796             os << ", existing opt code's inlined bytecode size: "
797                << function.code().inlined_bytecode_size();
798           }
799         }
800       } else {
801         os << ", no bytecode";
802       }
803       os << std::endl;
804     }
805   }
806 }
807 
graph() const808 Graph* JSInliningHeuristic::graph() const { return jsgraph()->graph(); }
809 
common() const810 CommonOperatorBuilder* JSInliningHeuristic::common() const {
811   return jsgraph()->common();
812 }
813 
simplified() const814 SimplifiedOperatorBuilder* JSInliningHeuristic::simplified() const {
815   return jsgraph()->simplified();
816 }
817 
818 #undef TRACE
819 
820 }  // namespace compiler
821 }  // namespace internal
822 }  // namespace v8
823