1 // Copyright 2015 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/compiler/js-inlining-heuristic.h"
6
7 #include "src/codegen/optimized-compilation-info.h"
8 #include "src/compiler/common-operator.h"
9 #include "src/compiler/compiler-source-position-table.h"
10 #include "src/compiler/js-heap-broker.h"
11 #include "src/compiler/node-matchers.h"
12 #include "src/compiler/simplified-operator.h"
13 #include "src/objects/objects-inl.h"
14
15 namespace v8 {
16 namespace internal {
17 namespace compiler {
18
19 #define TRACE(...) \
20 do { \
21 if (FLAG_trace_turbo_inlining) StdoutStream{} << __VA_ARGS__ << std::endl; \
22 } while (false)
23
24 namespace {
IsSmall(int const size)25 bool IsSmall(int const size) {
26 return size <= FLAG_max_inlined_bytecode_size_small;
27 }
28
CanConsiderForInlining(JSHeapBroker * broker,FeedbackCellRef const & feedback_cell)29 bool CanConsiderForInlining(JSHeapBroker* broker,
30 FeedbackCellRef const& feedback_cell) {
31 base::Optional<FeedbackVectorRef> feedback_vector =
32 feedback_cell.feedback_vector();
33 if (!feedback_vector.has_value()) {
34 TRACE("Cannot consider " << feedback_cell
35 << " for inlining (no feedback vector)");
36 return false;
37 }
38 SharedFunctionInfoRef shared = feedback_vector->shared_function_info();
39
40 if (!shared.HasBytecodeArray()) {
41 TRACE("Cannot consider " << shared << " for inlining (no bytecode)");
42 return false;
43 }
44 // Ensure we have a persistent handle to the bytecode in order to avoid
45 // flushing it during the remaining compilation.
46 shared.GetBytecodeArray();
47
48 // Read feedback vector again in case it got flushed before we were able to
49 // prevent flushing above.
50 base::Optional<FeedbackVectorRef> feedback_vector_again =
51 feedback_cell.feedback_vector();
52 if (!feedback_vector_again.has_value()) {
53 TRACE("Cannot consider " << shared << " for inlining (no feedback vector)");
54 return false;
55 }
56 if (!feedback_vector_again->equals(*feedback_vector)) {
57 // The new feedback vector likely contains lots of uninitialized slots, so
58 // it doesn't make much sense to inline this function now.
59 TRACE("Not considering " << shared
60 << " for inlining (feedback vector changed)");
61 return false;
62 }
63
64 SharedFunctionInfo::Inlineability inlineability = shared.GetInlineability();
65 if (inlineability != SharedFunctionInfo::kIsInlineable) {
66 TRACE("Cannot consider "
67 << shared << " for inlining (reason: " << inlineability << ")");
68 return false;
69 }
70
71 TRACE("Considering " << shared << " for inlining with " << *feedback_vector);
72 return true;
73 }
74
CanConsiderForInlining(JSHeapBroker * broker,JSFunctionRef const & function)75 bool CanConsiderForInlining(JSHeapBroker* broker,
76 JSFunctionRef const& function) {
77 FeedbackCellRef feedback_cell =
78 function.raw_feedback_cell(broker->dependencies());
79 bool const result = CanConsiderForInlining(broker, feedback_cell);
80 if (result) {
81 CHECK(
82 function.shared().equals(feedback_cell.shared_function_info().value()));
83 }
84 return result;
85 }
86
87 } // namespace
88
CollectFunctions(Node * node,int functions_size)89 JSInliningHeuristic::Candidate JSInliningHeuristic::CollectFunctions(
90 Node* node, int functions_size) {
91 DCHECK_NE(0, functions_size);
92 Node* callee = node->InputAt(0);
93 Candidate out;
94 out.node = node;
95
96 HeapObjectMatcher m(callee);
97 if (m.HasResolvedValue() && m.Ref(broker()).IsJSFunction()) {
98 JSFunctionRef function = m.Ref(broker()).AsJSFunction();
99 out.functions[0] = function;
100 if (CanConsiderForInlining(broker(), function)) {
101 out.bytecode[0] = function.shared().GetBytecodeArray();
102 out.num_functions = 1;
103 return out;
104 }
105 }
106 if (m.IsPhi()) {
107 int const value_input_count = m.node()->op()->ValueInputCount();
108 if (value_input_count > functions_size) {
109 out.num_functions = 0;
110 return out;
111 }
112 for (int n = 0; n < value_input_count; ++n) {
113 HeapObjectMatcher m2(callee->InputAt(n));
114 if (!m2.HasResolvedValue() || !m2.Ref(broker()).IsJSFunction()) {
115 out.num_functions = 0;
116 return out;
117 }
118
119 out.functions[n] = m2.Ref(broker()).AsJSFunction();
120 JSFunctionRef function = out.functions[n].value();
121 if (CanConsiderForInlining(broker(), function)) {
122 out.bytecode[n] = function.shared().GetBytecodeArray();
123 }
124 }
125 out.num_functions = value_input_count;
126 return out;
127 }
128 if (m.IsCheckClosure()) {
129 DCHECK(!out.functions[0].has_value());
130 FeedbackCellRef feedback_cell = MakeRef(broker(), FeedbackCellOf(m.op()));
131 if (CanConsiderForInlining(broker(), feedback_cell)) {
132 out.shared_info = feedback_cell.shared_function_info().value();
133 out.bytecode[0] = out.shared_info->GetBytecodeArray();
134 }
135 out.num_functions = 1;
136 return out;
137 }
138 if (m.IsJSCreateClosure()) {
139 DCHECK(!out.functions[0].has_value());
140 JSCreateClosureNode n(callee);
141 FeedbackCellRef feedback_cell = n.GetFeedbackCellRefChecked(broker());
142 if (CanConsiderForInlining(broker(), feedback_cell)) {
143 out.shared_info = feedback_cell.shared_function_info().value();
144 out.bytecode[0] = out.shared_info->GetBytecodeArray();
145 CHECK(out.shared_info->equals(n.Parameters().shared_info(broker())));
146 }
147 out.num_functions = 1;
148 return out;
149 }
150 out.num_functions = 0;
151 return out;
152 }
153
Reduce(Node * node)154 Reduction JSInliningHeuristic::Reduce(Node* node) {
155 #if V8_ENABLE_WEBASSEMBLY
156 if (mode() == kWasmOnly) {
157 if (node->opcode() == IrOpcode::kJSWasmCall) {
158 return inliner_.ReduceJSWasmCall(node);
159 }
160 return NoChange();
161 }
162 #endif // V8_ENABLE_WEBASSEMBLY
163
164 DCHECK_EQ(mode(), kJSOnly);
165 if (!IrOpcode::IsInlineeOpcode(node->opcode())) return NoChange();
166
167 if (total_inlined_bytecode_size_ >= max_inlined_bytecode_size_absolute_) {
168 return NoChange();
169 }
170
171 // Check if we already saw that {node} before, and if so, just skip it.
172 if (seen_.find(node->id()) != seen_.end()) return NoChange();
173
174 // Check if the {node} is an appropriate candidate for inlining.
175 Candidate candidate = CollectFunctions(node, kMaxCallPolymorphism);
176 if (candidate.num_functions == 0) {
177 return NoChange();
178 } else if (candidate.num_functions > 1 && !FLAG_polymorphic_inlining) {
179 TRACE("Not considering call site #"
180 << node->id() << ":" << node->op()->mnemonic()
181 << ", because polymorphic inlining is disabled");
182 return NoChange();
183 }
184
185 bool can_inline_candidate = false, candidate_is_small = true;
186 candidate.total_size = 0;
187 FrameState frame_state{NodeProperties::GetFrameStateInput(node)};
188 FrameStateInfo const& frame_info = frame_state.frame_state_info();
189 Handle<SharedFunctionInfo> frame_shared_info;
190 for (int i = 0; i < candidate.num_functions; ++i) {
191 if (!candidate.bytecode[i].has_value()) {
192 candidate.can_inline_function[i] = false;
193 continue;
194 }
195
196 SharedFunctionInfoRef shared = candidate.functions[i].has_value()
197 ? candidate.functions[i].value().shared()
198 : candidate.shared_info.value();
199 candidate.can_inline_function[i] = candidate.bytecode[i].has_value();
200 // Because of concurrent optimization, optimization of the inlining
201 // candidate could have been disabled meanwhile.
202 // JSInliner will check this again and not actually inline the function in
203 // this case.
204 CHECK_IMPLIES(candidate.can_inline_function[i],
205 shared.IsInlineable() ||
206 shared.GetInlineability() ==
207 SharedFunctionInfo::kHasOptimizationDisabled);
208 // Do not allow direct recursion i.e. f() -> f(). We still allow indirect
209 // recursion like f() -> g() -> f(). The indirect recursion is helpful in
210 // cases where f() is a small dispatch function that calls the appropriate
211 // function. In the case of direct recursion, we only have some static
212 // information for the first level of inlining and it may not be that useful
213 // to just inline one level in recursive calls. In some cases like tail
214 // recursion we may benefit from recursive inlining, if we have additional
215 // analysis that converts them to iterative implementations. Though it is
216 // not obvious if such an analysis is needed.
217 if (frame_info.shared_info().ToHandle(&frame_shared_info) &&
218 frame_shared_info.equals(shared.object())) {
219 TRACE("Not considering call site #" << node->id() << ":"
220 << node->op()->mnemonic()
221 << ", because of recursive inlining");
222 candidate.can_inline_function[i] = false;
223 }
224 if (candidate.can_inline_function[i]) {
225 can_inline_candidate = true;
226 BytecodeArrayRef bytecode = candidate.bytecode[i].value();
227 candidate.total_size += bytecode.length();
228 unsigned inlined_bytecode_size = 0;
229 if (candidate.functions[i].has_value()) {
230 JSFunctionRef function = candidate.functions[i].value();
231 inlined_bytecode_size = function.code().GetInlinedBytecodeSize();
232 candidate.total_size += inlined_bytecode_size;
233 }
234 candidate_is_small = candidate_is_small &&
235 IsSmall(bytecode.length() + inlined_bytecode_size);
236 }
237 }
238 if (!can_inline_candidate) return NoChange();
239
240 // Gather feedback on how often this call site has been hit before.
241 if (node->opcode() == IrOpcode::kJSCall) {
242 CallParameters const p = CallParametersOf(node->op());
243 candidate.frequency = p.frequency();
244 } else {
245 ConstructParameters const p = ConstructParametersOf(node->op());
246 candidate.frequency = p.frequency();
247 }
248
249 // Don't consider a {candidate} whose frequency is below the
250 // threshold, i.e. a call site that is only hit once every N
251 // invocations of the caller.
252 if (candidate.frequency.IsKnown() &&
253 candidate.frequency.value() < FLAG_min_inlining_frequency) {
254 return NoChange();
255 }
256
257 // Found a candidate. Insert it into the set of seen nodes s.t. we don't
258 // revisit in the future. Note this insertion happens here and not earlier in
259 // order to make inlining decisions order-independent. A node may not be a
260 // candidate when first seen, but later reductions may turn it into a valid
261 // candidate. In that case, the node should be revisited by
262 // JSInliningHeuristic.
263 seen_.insert(node->id());
264
265 // Forcibly inline small functions here. In the case of polymorphic inlining
266 // candidate_is_small is set only when all functions are small.
267 if (candidate_is_small) {
268 TRACE("Inlining small function(s) at call site #"
269 << node->id() << ":" << node->op()->mnemonic());
270 return InlineCandidate(candidate, true);
271 }
272
273 // In the general case we remember the candidate for later.
274 candidates_.insert(candidate);
275 return NoChange();
276 }
277
Finalize()278 void JSInliningHeuristic::Finalize() {
279 if (candidates_.empty()) return; // Nothing to do without candidates.
280 if (FLAG_trace_turbo_inlining) PrintCandidates();
281
282 // We inline at most one candidate in every iteration of the fixpoint.
283 // This is to ensure that we don't consume the full inlining budget
284 // on things that aren't called very often.
285 // TODO(bmeurer): Use std::priority_queue instead of std::set here.
286 while (!candidates_.empty()) {
287 auto i = candidates_.begin();
288 Candidate candidate = *i;
289 candidates_.erase(i);
290
291 // Ignore this candidate if it's no longer valid.
292 if (!IrOpcode::IsInlineeOpcode(candidate.node->opcode())) continue;
293 if (candidate.node->IsDead()) continue;
294
295 // Make sure we have some extra budget left, so that any small functions
296 // exposed by this function would be given a chance to inline.
297 double size_of_candidate =
298 candidate.total_size * FLAG_reserve_inline_budget_scale_factor;
299 int total_size =
300 total_inlined_bytecode_size_ + static_cast<int>(size_of_candidate);
301 if (total_size > max_inlined_bytecode_size_cumulative_) {
302 // Try if any smaller functions are available to inline.
303 continue;
304 }
305
306 Reduction const reduction = InlineCandidate(candidate, false);
307 if (reduction.Changed()) return;
308 }
309 }
310
311 namespace {
312
313 struct NodeAndIndex {
314 Node* node;
315 int index;
316 };
317
CollectStateValuesOwnedUses(Node * node,Node * state_values,NodeAndIndex * uses_buffer,size_t * use_count,size_t max_uses)318 bool CollectStateValuesOwnedUses(Node* node, Node* state_values,
319 NodeAndIndex* uses_buffer, size_t* use_count,
320 size_t max_uses) {
321 // Only accumulate states that are not shared with other users.
322 if (state_values->UseCount() > 1) return true;
323 for (int i = 0; i < state_values->InputCount(); i++) {
324 Node* input = state_values->InputAt(i);
325 if (input->opcode() == IrOpcode::kStateValues) {
326 if (!CollectStateValuesOwnedUses(node, input, uses_buffer, use_count,
327 max_uses)) {
328 return false;
329 }
330 } else if (input == node) {
331 if (*use_count >= max_uses) return false;
332 uses_buffer[*use_count] = {state_values, i};
333 (*use_count)++;
334 }
335 }
336 return true;
337 }
338
339 } // namespace
340
DuplicateStateValuesAndRename(Node * state_values,Node * from,Node * to,StateCloneMode mode)341 Node* JSInliningHeuristic::DuplicateStateValuesAndRename(Node* state_values,
342 Node* from, Node* to,
343 StateCloneMode mode) {
344 // Only rename in states that are not shared with other users. This needs to
345 // be in sync with the condition in {CollectStateValuesOwnedUses}.
346 if (state_values->UseCount() > 1) return state_values;
347 Node* copy = mode == kChangeInPlace ? state_values : nullptr;
348 for (int i = 0; i < state_values->InputCount(); i++) {
349 Node* input = state_values->InputAt(i);
350 Node* processed;
351 if (input->opcode() == IrOpcode::kStateValues) {
352 processed = DuplicateStateValuesAndRename(input, from, to, mode);
353 } else if (input == from) {
354 processed = to;
355 } else {
356 processed = input;
357 }
358 if (processed != input) {
359 if (!copy) {
360 copy = graph()->CloneNode(state_values);
361 }
362 copy->ReplaceInput(i, processed);
363 }
364 }
365 return copy ? copy : state_values;
366 }
367
368 namespace {
369
CollectFrameStateUniqueUses(Node * node,FrameState frame_state,NodeAndIndex * uses_buffer,size_t * use_count,size_t max_uses)370 bool CollectFrameStateUniqueUses(Node* node, FrameState frame_state,
371 NodeAndIndex* uses_buffer, size_t* use_count,
372 size_t max_uses) {
373 // Only accumulate states that are not shared with other users.
374 if (frame_state->UseCount() > 1) return true;
375 if (frame_state.stack() == node) {
376 if (*use_count >= max_uses) return false;
377 uses_buffer[*use_count] = {frame_state, FrameState::kFrameStateStackInput};
378 (*use_count)++;
379 }
380 if (!CollectStateValuesOwnedUses(node, frame_state.locals(), uses_buffer,
381 use_count, max_uses)) {
382 return false;
383 }
384 return true;
385 }
386
387 } // namespace
388
DuplicateFrameStateAndRename(FrameState frame_state,Node * from,Node * to,StateCloneMode mode)389 FrameState JSInliningHeuristic::DuplicateFrameStateAndRename(
390 FrameState frame_state, Node* from, Node* to, StateCloneMode mode) {
391 // Only rename in states that are not shared with other users. This needs to
392 // be in sync with the condition in {DuplicateFrameStateAndRename}.
393 if (frame_state->UseCount() > 1) return frame_state;
394 Node* copy =
395 mode == kChangeInPlace ? static_cast<Node*>(frame_state) : nullptr;
396 if (frame_state.stack() == from) {
397 if (!copy) {
398 copy = graph()->CloneNode(frame_state);
399 }
400 copy->ReplaceInput(FrameState::kFrameStateStackInput, to);
401 }
402 Node* locals = frame_state.locals();
403 Node* new_locals = DuplicateStateValuesAndRename(locals, from, to, mode);
404 if (new_locals != locals) {
405 if (!copy) {
406 copy = graph()->CloneNode(frame_state);
407 }
408 copy->ReplaceInput(FrameState::kFrameStateLocalsInput, new_locals);
409 }
410 return copy != nullptr ? FrameState{copy} : frame_state;
411 }
412
TryReuseDispatch(Node * node,Node * callee,Node ** if_successes,Node ** calls,Node ** inputs,int input_count)413 bool JSInliningHeuristic::TryReuseDispatch(Node* node, Node* callee,
414 Node** if_successes, Node** calls,
415 Node** inputs, int input_count) {
416 // We will try to reuse the control flow branch created for computing
417 // the {callee} target of the call. We only reuse the branch if there
418 // is no side-effect between the call and the branch, and if the callee is
419 // only used as the target (and possibly also in the related frame states).
420
421 // We are trying to match the following pattern:
422 //
423 // C1 C2
424 // . .
425 // | |
426 // Merge(merge) <-----------------+
427 // ^ ^ |
428 // V1 V2 | | E1 E2 |
429 // . . | +----+ . . |
430 // | | | | | | |
431 // Phi(callee) EffectPhi(effect_phi) |
432 // ^ ^ |
433 // | | |
434 // +----+ | |
435 // | | | |
436 // | StateValues | |
437 // | ^ | |
438 // +----+ | | |
439 // | | | | |
440 // | FrameState | |
441 // | ^ | |
442 // | | | +---+
443 // | | | | |
444 // +----+ Checkpoint(checkpoint) |
445 // | | ^ |
446 // | StateValues | +-------------+
447 // | | | |
448 // +-----+ | | |
449 // | | | | |
450 // | FrameState | |
451 // | ^ | |
452 // +-----------+ | | |
453 // Call(node)
454 // |
455 // |
456 // .
457 //
458 // The {callee} here is a phi that merges the possible call targets, {node}
459 // is the actual call that we will try to duplicate and connect to the
460 // control that comes into {merge}. There can be a {checkpoint} between
461 // the call and the calle phi.
462 //
463 // The idea is to get rid of the merge, effect phi and phi, then duplicate
464 // the call (with all the frame states and such), and connect the duplicated
465 // calls and states directly to the inputs of the ex-phi, ex-effect-phi and
466 // ex-merge. The tricky part is to make sure that there is no interference
467 // from the outside. In particular, there should not be any unaccounted uses
468 // of the phi, effect-phi and merge because we will remove them from
469 // the graph.
470 //
471 // V1 E1 C1 V2 E2 C2
472 // . . . . . .
473 // | | | | | |
474 // +----+ | | +----+ |
475 // | | | | | | |
476 // | StateValues | | | StateValues |
477 // | ^ | | | ^ |
478 // +----+ | | | +----+ | |
479 // | | | | | | | | |
480 // | FrameState | | | FrameState |
481 // | ^ | | | ^ |
482 // | | | | | | |
483 // | | | | | | |
484 // +----+ Checkpoint | +----+ Checkpoint |
485 // | | ^ | | | ^ |
486 // | StateValues | | | StateValues | |
487 // | | | | | | | |
488 // +-----+ | | | +-----+ | | |
489 // | | | | | | | | | |
490 // | FrameState | | | FrameState | |
491 // | ^ | | | ^ | |
492 // +-------------+| | | +-------------+| | |
493 // Call----+ Call----+
494 // | |
495 // +-------+ +------------+
496 // | |
497 // Merge
498 // EffectPhi
499 // Phi
500 // |
501 // ...
502
503 // Bailout if the call is not polymorphic anymore (other reducers might
504 // have replaced the callee phi with a constant).
505 if (callee->opcode() != IrOpcode::kPhi) return false;
506 int const num_calls = callee->op()->ValueInputCount();
507
508 // If there is a control node between the callee computation
509 // and the call, bail out.
510 Node* merge = NodeProperties::GetControlInput(callee);
511 if (NodeProperties::GetControlInput(node) != merge) return false;
512
513 // If there is a non-checkpoint effect node between the callee computation
514 // and the call, bail out. We will drop any checkpoint between the call and
515 // the callee phi because the callee computation should have its own
516 // checkpoint that the call can fall back to.
517 Node* checkpoint = nullptr;
518 Node* effect = NodeProperties::GetEffectInput(node);
519 if (effect->opcode() == IrOpcode::kCheckpoint) {
520 checkpoint = effect;
521 if (NodeProperties::GetControlInput(checkpoint) != merge) return false;
522 effect = NodeProperties::GetEffectInput(effect);
523 }
524 if (effect->opcode() != IrOpcode::kEffectPhi) return false;
525 if (NodeProperties::GetControlInput(effect) != merge) return false;
526 Node* effect_phi = effect;
527
528 // The effect phi, the callee, the call and the checkpoint must be the only
529 // users of the merge.
530 for (Node* merge_use : merge->uses()) {
531 if (merge_use != effect_phi && merge_use != callee && merge_use != node &&
532 merge_use != checkpoint) {
533 return false;
534 }
535 }
536
537 // The effect phi must be only used by the checkpoint or the call.
538 for (Node* effect_phi_use : effect_phi->uses()) {
539 if (effect_phi_use != node && effect_phi_use != checkpoint) return false;
540 }
541
542 // We must replace the callee phi with the appropriate constant in
543 // the entire subgraph reachable by inputs from the call (terminating
544 // at phis and merges). Since we do not want to walk (and later duplicate)
545 // the subgraph here, we limit the possible uses to this set:
546 //
547 // 1. In the call (as a target).
548 // 2. The checkpoint between the call and the callee computation merge.
549 // 3. The lazy deoptimization frame state.
550 //
551 // This corresponds to the most common pattern, where the function is
552 // called with only local variables or constants as arguments.
553 //
554 // To check the uses, we first collect all the occurrences of callee in 1, 2
555 // and 3, and then we check that all uses of callee are in the collected
556 // occurrences. If there is an unaccounted use, we do not try to rewire
557 // the control flow.
558 //
559 // Note: With CFG, this would be much easier and more robust - we would just
560 // duplicate all the nodes between the merge and the call, replacing all
561 // occurrences of the {callee} phi with the appropriate constant.
562
563 // First compute the set of uses that are only reachable from 2 and 3.
564 const size_t kMaxUses = 8;
565 NodeAndIndex replaceable_uses[kMaxUses];
566 size_t replaceable_uses_count = 0;
567
568 // Collect the uses to check case 2.
569 Node* checkpoint_state = nullptr;
570 if (checkpoint) {
571 checkpoint_state = checkpoint->InputAt(0);
572 if (!CollectFrameStateUniqueUses(callee, FrameState{checkpoint_state},
573 replaceable_uses, &replaceable_uses_count,
574 kMaxUses)) {
575 return false;
576 }
577 }
578
579 // Collect the uses to check case 3.
580 FrameState frame_state{NodeProperties::GetFrameStateInput(node)};
581 if (!CollectFrameStateUniqueUses(callee, frame_state, replaceable_uses,
582 &replaceable_uses_count, kMaxUses)) {
583 return false;
584 }
585
586 // Bail out if there is a use of {callee} that is not reachable from 1, 2
587 // and 3.
588 for (Edge edge : callee->use_edges()) {
589 // Case 1 (use by the call as a target).
590 if (edge.from() == node && edge.index() == 0) continue;
591 // Case 2 and 3 - used in checkpoint and/or lazy deopt frame states.
592 bool found = false;
593 for (size_t i = 0; i < replaceable_uses_count; i++) {
594 if (replaceable_uses[i].node == edge.from() &&
595 replaceable_uses[i].index == edge.index()) {
596 found = true;
597 break;
598 }
599 }
600 if (!found) return false;
601 }
602
603 // Clone the call and the framestate, including the uniquely reachable
604 // state values, making sure that we replace the phi with the constant.
605 for (int i = 0; i < num_calls; ++i) {
606 // Clone the calls for each branch.
607 // We need to specialize the calls to the correct target, effect, and
608 // control. We also need to duplicate the checkpoint and the lazy
609 // frame state, and change all the uses of the callee to the constant
610 // callee.
611 Node* target = callee->InputAt(i);
612 Node* effect_phi_effect = effect_phi->InputAt(i);
613 Node* control = merge->InputAt(i);
614
615 if (checkpoint) {
616 // Duplicate the checkpoint.
617 FrameState new_checkpoint_state = DuplicateFrameStateAndRename(
618 FrameState{checkpoint_state}, callee, target,
619 (i == num_calls - 1) ? kChangeInPlace : kCloneState);
620 effect_phi_effect = graph()->NewNode(
621 checkpoint->op(), new_checkpoint_state, effect_phi_effect, control);
622 }
623
624 // Duplicate the call.
625 FrameState new_lazy_frame_state = DuplicateFrameStateAndRename(
626 frame_state, callee, target,
627 (i == num_calls - 1) ? kChangeInPlace : kCloneState);
628 inputs[0] = target;
629 inputs[input_count - 3] = new_lazy_frame_state;
630 inputs[input_count - 2] = effect_phi_effect;
631 inputs[input_count - 1] = control;
632 calls[i] = if_successes[i] =
633 graph()->NewNode(node->op(), input_count, inputs);
634 }
635
636 // Mark the control inputs dead, so that we can kill the merge.
637 node->ReplaceInput(input_count - 1, jsgraph()->Dead());
638 callee->ReplaceInput(num_calls, jsgraph()->Dead());
639 effect_phi->ReplaceInput(num_calls, jsgraph()->Dead());
640 if (checkpoint) {
641 checkpoint->ReplaceInput(2, jsgraph()->Dead());
642 }
643
644 merge->Kill();
645 return true;
646 }
647
CreateOrReuseDispatch(Node * node,Node * callee,Candidate const & candidate,Node ** if_successes,Node ** calls,Node ** inputs,int input_count)648 void JSInliningHeuristic::CreateOrReuseDispatch(Node* node, Node* callee,
649 Candidate const& candidate,
650 Node** if_successes,
651 Node** calls, Node** inputs,
652 int input_count) {
653 SourcePositionTable::Scope position(
654 source_positions_, source_positions_->GetSourcePosition(node));
655 if (TryReuseDispatch(node, callee, if_successes, calls, inputs,
656 input_count)) {
657 return;
658 }
659
660 STATIC_ASSERT(JSCallOrConstructNode::kHaveIdenticalLayouts);
661
662 Node* fallthrough_control = NodeProperties::GetControlInput(node);
663 int const num_calls = candidate.num_functions;
664
665 // Create the appropriate control flow to dispatch to the cloned calls.
666 for (int i = 0; i < num_calls; ++i) {
667 // TODO(2206): Make comparison be based on underlying SharedFunctionInfo
668 // instead of the target JSFunction reference directly.
669 Node* target = jsgraph()->Constant(candidate.functions[i].value());
670 if (i != (num_calls - 1)) {
671 Node* check =
672 graph()->NewNode(simplified()->ReferenceEqual(), callee, target);
673 Node* branch =
674 graph()->NewNode(common()->Branch(), check, fallthrough_control);
675 fallthrough_control = graph()->NewNode(common()->IfFalse(), branch);
676 if_successes[i] = graph()->NewNode(common()->IfTrue(), branch);
677 } else {
678 if_successes[i] = fallthrough_control;
679 }
680
681 // Clone the calls for each branch.
682 // The first input to the call is the actual target (which we specialize
683 // to the known {target}); the last input is the control dependency.
684 // We also specialize the new.target of JSConstruct {node}s if it refers
685 // to the same node as the {node}'s target input, so that we can later
686 // properly inline the JSCreate operations.
687 if (node->opcode() == IrOpcode::kJSConstruct) {
688 // TODO(jgruber, v8:10675): This branch seems unreachable.
689 JSConstructNode n(node);
690 if (inputs[n.TargetIndex()] == inputs[n.NewTargetIndex()]) {
691 inputs[n.NewTargetIndex()] = target;
692 }
693 }
694 inputs[JSCallOrConstructNode::TargetIndex()] = target;
695 inputs[input_count - 1] = if_successes[i];
696 calls[i] = if_successes[i] =
697 graph()->NewNode(node->op(), input_count, inputs);
698 }
699 }
700
InlineCandidate(Candidate const & candidate,bool small_function)701 Reduction JSInliningHeuristic::InlineCandidate(Candidate const& candidate,
702 bool small_function) {
703 int const num_calls = candidate.num_functions;
704 Node* const node = candidate.node;
705 #if V8_ENABLE_WEBASSEMBLY
706 DCHECK_NE(node->opcode(), IrOpcode::kJSWasmCall);
707 #endif // V8_ENABLE_WEBASSEMBLY
708 if (num_calls == 1) {
709 Reduction const reduction = inliner_.ReduceJSCall(node);
710 if (reduction.Changed()) {
711 total_inlined_bytecode_size_ += candidate.bytecode[0].value().length();
712 }
713 return reduction;
714 }
715
716 // Expand the JSCall/JSConstruct node to a subgraph first if
717 // we have multiple known target functions.
718 DCHECK_LT(1, num_calls);
719 Node* calls[kMaxCallPolymorphism + 1];
720 Node* if_successes[kMaxCallPolymorphism];
721 Node* callee = NodeProperties::GetValueInput(node, 0);
722
723 // Setup the inputs for the cloned call nodes.
724 int const input_count = node->InputCount();
725 Node** inputs = graph()->zone()->NewArray<Node*>(input_count);
726 for (int i = 0; i < input_count; ++i) {
727 inputs[i] = node->InputAt(i);
728 }
729
730 // Create the appropriate control flow to dispatch to the cloned calls.
731 CreateOrReuseDispatch(node, callee, candidate, if_successes, calls, inputs,
732 input_count);
733
734 // Check if we have an exception projection for the call {node}.
735 Node* if_exception = nullptr;
736 if (NodeProperties::IsExceptionalCall(node, &if_exception)) {
737 Node* if_exceptions[kMaxCallPolymorphism + 1];
738 for (int i = 0; i < num_calls; ++i) {
739 if_successes[i] = graph()->NewNode(common()->IfSuccess(), calls[i]);
740 if_exceptions[i] =
741 graph()->NewNode(common()->IfException(), calls[i], calls[i]);
742 }
743
744 // Morph the {if_exception} projection into a join.
745 Node* exception_control =
746 graph()->NewNode(common()->Merge(num_calls), num_calls, if_exceptions);
747 if_exceptions[num_calls] = exception_control;
748 Node* exception_effect = graph()->NewNode(common()->EffectPhi(num_calls),
749 num_calls + 1, if_exceptions);
750 Node* exception_value = graph()->NewNode(
751 common()->Phi(MachineRepresentation::kTagged, num_calls), num_calls + 1,
752 if_exceptions);
753 ReplaceWithValue(if_exception, exception_value, exception_effect,
754 exception_control);
755 }
756
757 // Morph the original call site into a join of the dispatched call sites.
758 Node* control =
759 graph()->NewNode(common()->Merge(num_calls), num_calls, if_successes);
760 calls[num_calls] = control;
761 Node* effect =
762 graph()->NewNode(common()->EffectPhi(num_calls), num_calls + 1, calls);
763 Node* value =
764 graph()->NewNode(common()->Phi(MachineRepresentation::kTagged, num_calls),
765 num_calls + 1, calls);
766 ReplaceWithValue(node, value, effect, control);
767
768 // Inline the individual, cloned call sites.
769 for (int i = 0; i < num_calls && total_inlined_bytecode_size_ <
770 max_inlined_bytecode_size_absolute_;
771 ++i) {
772 if (candidate.can_inline_function[i] &&
773 (small_function || total_inlined_bytecode_size_ <
774 max_inlined_bytecode_size_cumulative_)) {
775 Node* call = calls[i];
776 Reduction const reduction = inliner_.ReduceJSCall(call);
777 if (reduction.Changed()) {
778 total_inlined_bytecode_size_ += candidate.bytecode[i]->length();
779 // Killing the call node is not strictly necessary, but it is safer to
780 // make sure we do not resurrect the node.
781 call->Kill();
782 }
783 }
784 }
785
786 return Replace(value);
787 }
788
operator ()(const Candidate & left,const Candidate & right) const789 bool JSInliningHeuristic::CandidateCompare::operator()(
790 const Candidate& left, const Candidate& right) const {
791 if (right.frequency.IsUnknown()) {
792 if (left.frequency.IsUnknown()) {
793 // If left and right are both unknown then the ordering is indeterminate,
794 // which breaks strict weak ordering requirements, so we fall back to the
795 // node id as a tie breaker.
796 return left.node->id() > right.node->id();
797 }
798 return true;
799 } else if (left.frequency.IsUnknown()) {
800 return false;
801 } else if (left.frequency.value() > right.frequency.value()) {
802 return true;
803 } else if (left.frequency.value() < right.frequency.value()) {
804 return false;
805 } else {
806 return left.node->id() > right.node->id();
807 }
808 }
809
PrintCandidates()810 void JSInliningHeuristic::PrintCandidates() {
811 StdoutStream os;
812 os << candidates_.size() << " candidate(s) for inlining:" << std::endl;
813 for (const Candidate& candidate : candidates_) {
814 os << "- candidate: " << candidate.node->op()->mnemonic() << " node #"
815 << candidate.node->id() << " with frequency " << candidate.frequency
816 << ", " << candidate.num_functions << " target(s):" << std::endl;
817 for (int i = 0; i < candidate.num_functions; ++i) {
818 SharedFunctionInfoRef shared = candidate.functions[i].has_value()
819 ? candidate.functions[i]->shared()
820 : candidate.shared_info.value();
821 os << " - target: " << shared;
822 if (candidate.bytecode[i].has_value()) {
823 os << ", bytecode size: " << candidate.bytecode[i]->length();
824 if (candidate.functions[i].has_value()) {
825 JSFunctionRef function = candidate.functions[i].value();
826 unsigned inlined_bytecode_size =
827 function.code().GetInlinedBytecodeSize();
828 if (inlined_bytecode_size > 0) {
829 os << ", existing opt code's inlined bytecode size: "
830 << inlined_bytecode_size;
831 }
832 }
833 } else {
834 os << ", no bytecode";
835 }
836 os << std::endl;
837 }
838 }
839 }
840
graph() const841 Graph* JSInliningHeuristic::graph() const { return jsgraph()->graph(); }
842
dependencies() const843 CompilationDependencies* JSInliningHeuristic::dependencies() const {
844 return broker()->dependencies();
845 }
846
common() const847 CommonOperatorBuilder* JSInliningHeuristic::common() const {
848 return jsgraph()->common();
849 }
850
simplified() const851 SimplifiedOperatorBuilder* JSInliningHeuristic::simplified() const {
852 return jsgraph()->simplified();
853 }
854
855 #undef TRACE
856
857 } // namespace compiler
858 } // namespace internal
859 } // namespace v8
860