1 // Copyright 2015 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/compiler/js-inlining-heuristic.h"
6
7 #include "src/codegen/optimized-compilation-info.h"
8 #include "src/compiler/common-operator.h"
9 #include "src/compiler/compiler-source-position-table.h"
10 #include "src/compiler/js-heap-broker.h"
11 #include "src/compiler/node-matchers.h"
12 #include "src/compiler/simplified-operator.h"
13 #include "src/objects/objects-inl.h"
14
15 namespace v8 {
16 namespace internal {
17 namespace compiler {
18
19 #define TRACE(...) \
20 do { \
21 if (FLAG_trace_turbo_inlining) StdoutStream{} << __VA_ARGS__ << std::endl; \
22 } while (false)
23
24 namespace {
IsSmall(int const size)25 bool IsSmall(int const size) {
26 return size <= FLAG_max_inlined_bytecode_size_small;
27 }
28
CanConsiderForInlining(JSHeapBroker * broker,SharedFunctionInfoRef const & shared,FeedbackVectorRef const & feedback_vector)29 bool CanConsiderForInlining(JSHeapBroker* broker,
30 SharedFunctionInfoRef const& shared,
31 FeedbackVectorRef const& feedback_vector) {
32 SharedFunctionInfo::Inlineability inlineability = shared.GetInlineability();
33 if (inlineability != SharedFunctionInfo::kIsInlineable) {
34 TRACE("Cannot consider "
35 << shared << " for inlining (reason: " << inlineability << ")");
36 return false;
37 }
38
39 DCHECK(shared.HasBytecodeArray());
40 if (!broker->IsSerializedForCompilation(shared, feedback_vector)) {
41 TRACE_BROKER_MISSING(
42 broker, "data for " << shared << " (not serialized for compilation)");
43 TRACE("Cannot consider " << shared << " for inlining with "
44 << feedback_vector << " (missing data)");
45 return false;
46 }
47
48 TRACE("Considering " << shared << " for inlining with " << feedback_vector);
49 return true;
50 }
51
CanConsiderForInlining(JSHeapBroker * broker,JSFunctionRef const & function)52 bool CanConsiderForInlining(JSHeapBroker* broker,
53 JSFunctionRef const& function) {
54 if (!function.has_feedback_vector()) {
55 TRACE("Cannot consider " << function
56 << " for inlining (no feedback vector)");
57 return false;
58 }
59
60 if (!function.serialized()) {
61 TRACE_BROKER_MISSING(
62 broker, "data for " << function << " (cannot consider for inlining)");
63 TRACE("Cannot consider " << function << " for inlining (missing data)");
64 return false;
65 }
66 return CanConsiderForInlining(broker, function.shared(),
67 function.feedback_vector());
68 }
69
70 } // namespace
71
CollectFunctions(Node * node,int functions_size)72 JSInliningHeuristic::Candidate JSInliningHeuristic::CollectFunctions(
73 Node* node, int functions_size) {
74 DCHECK_NE(0, functions_size);
75 Node* callee = node->InputAt(0);
76 Candidate out;
77 out.node = node;
78
79 HeapObjectMatcher m(callee);
80 if (m.HasResolvedValue() && m.Ref(broker()).IsJSFunction()) {
81 out.functions[0] = m.Ref(broker()).AsJSFunction();
82 JSFunctionRef function = out.functions[0].value();
83 if (CanConsiderForInlining(broker(), function)) {
84 out.bytecode[0] = function.shared().GetBytecodeArray();
85 out.num_functions = 1;
86 return out;
87 }
88 }
89 if (m.IsPhi()) {
90 int const value_input_count = m.node()->op()->ValueInputCount();
91 if (value_input_count > functions_size) {
92 out.num_functions = 0;
93 return out;
94 }
95 for (int n = 0; n < value_input_count; ++n) {
96 HeapObjectMatcher m(callee->InputAt(n));
97 if (!m.HasResolvedValue() || !m.Ref(broker()).IsJSFunction()) {
98 out.num_functions = 0;
99 return out;
100 }
101
102 out.functions[n] = m.Ref(broker()).AsJSFunction();
103 JSFunctionRef function = out.functions[n].value();
104 if (CanConsiderForInlining(broker(), function)) {
105 out.bytecode[n] = function.shared().GetBytecodeArray();
106 }
107 }
108 out.num_functions = value_input_count;
109 return out;
110 }
111 if (m.IsCheckClosure()) {
112 DCHECK(!out.functions[0].has_value());
113 FeedbackCellRef feedback_cell(broker(), FeedbackCellOf(m.op()));
114 SharedFunctionInfoRef shared_info =
115 feedback_cell.shared_function_info().value();
116 out.shared_info = shared_info;
117 if (feedback_cell.value().IsFeedbackVector() &&
118 CanConsiderForInlining(broker(), shared_info,
119 feedback_cell.value().AsFeedbackVector())) {
120 out.bytecode[0] = shared_info.GetBytecodeArray();
121 }
122 out.num_functions = 1;
123 return out;
124 }
125 if (m.IsJSCreateClosure()) {
126 DCHECK(!out.functions[0].has_value());
127 JSCreateClosureNode n(callee);
128 CreateClosureParameters const& p = n.Parameters();
129 FeedbackCellRef feedback_cell = n.GetFeedbackCellRefChecked(broker());
130 SharedFunctionInfoRef shared_info(broker(), p.shared_info());
131 out.shared_info = shared_info;
132 if (feedback_cell.value().IsFeedbackVector() &&
133 CanConsiderForInlining(broker(), shared_info,
134 feedback_cell.value().AsFeedbackVector())) {
135 out.bytecode[0] = shared_info.GetBytecodeArray();
136 }
137 out.num_functions = 1;
138 return out;
139 }
140 out.num_functions = 0;
141 return out;
142 }
143
Reduce(Node * node)144 Reduction JSInliningHeuristic::Reduce(Node* node) {
145 DisallowHeapAccessIf no_heap_acess(broker()->is_concurrent_inlining());
146
147 if (!IrOpcode::IsInlineeOpcode(node->opcode())) return NoChange();
148
149 if (total_inlined_bytecode_size_ >= FLAG_max_inlined_bytecode_size_absolute) {
150 return NoChange();
151 }
152
153 // Check if we already saw that {node} before, and if so, just skip it.
154 if (seen_.find(node->id()) != seen_.end()) return NoChange();
155 seen_.insert(node->id());
156
157 // Check if the {node} is an appropriate candidate for inlining.
158 Candidate candidate = CollectFunctions(node, kMaxCallPolymorphism);
159 if (candidate.num_functions == 0) {
160 return NoChange();
161 } else if (candidate.num_functions > 1 && !FLAG_polymorphic_inlining) {
162 TRACE("Not considering call site #"
163 << node->id() << ":" << node->op()->mnemonic()
164 << ", because polymorphic inlining is disabled");
165 return NoChange();
166 }
167
168 bool can_inline_candidate = false, candidate_is_small = true;
169 candidate.total_size = 0;
170 Node* frame_state = NodeProperties::GetFrameStateInput(node);
171 FrameStateInfo const& frame_info = FrameStateInfoOf(frame_state->op());
172 Handle<SharedFunctionInfo> frame_shared_info;
173 for (int i = 0; i < candidate.num_functions; ++i) {
174 if (!candidate.bytecode[i].has_value()) {
175 candidate.can_inline_function[i] = false;
176 continue;
177 }
178
179 SharedFunctionInfoRef shared = candidate.functions[i].has_value()
180 ? candidate.functions[i].value().shared()
181 : candidate.shared_info.value();
182 candidate.can_inline_function[i] = candidate.bytecode[i].has_value();
183 CHECK_IMPLIES(candidate.can_inline_function[i], shared.IsInlineable());
184 // Do not allow direct recursion i.e. f() -> f(). We still allow indirect
185 // recurion like f() -> g() -> f(). The indirect recursion is helpful in
186 // cases where f() is a small dispatch function that calls the appropriate
187 // function. In the case of direct recursion, we only have some static
188 // information for the first level of inlining and it may not be that useful
189 // to just inline one level in recursive calls. In some cases like tail
190 // recursion we may benefit from recursive inlining, if we have additional
191 // analysis that converts them to iterative implementations. Though it is
192 // not obvious if such an anlysis is needed.
193 if (frame_info.shared_info().ToHandle(&frame_shared_info) &&
194 frame_shared_info.equals(shared.object())) {
195 TRACE("Not considering call site #" << node->id() << ":"
196 << node->op()->mnemonic()
197 << ", because of recursive inlining");
198 candidate.can_inline_function[i] = false;
199 }
200 if (candidate.can_inline_function[i]) {
201 can_inline_candidate = true;
202 BytecodeArrayRef bytecode = candidate.bytecode[i].value();
203 candidate.total_size += bytecode.length();
204 unsigned inlined_bytecode_size = 0;
205 if (candidate.functions[i].has_value()) {
206 JSFunctionRef function = candidate.functions[i].value();
207 if (function.HasAttachedOptimizedCode()) {
208 inlined_bytecode_size = function.code().inlined_bytecode_size();
209 candidate.total_size += inlined_bytecode_size;
210 }
211 }
212 candidate_is_small = candidate_is_small &&
213 IsSmall(bytecode.length() + inlined_bytecode_size);
214 }
215 }
216 if (!can_inline_candidate) return NoChange();
217
218 // Gather feedback on how often this call site has been hit before.
219 if (node->opcode() == IrOpcode::kJSCall) {
220 CallParameters const p = CallParametersOf(node->op());
221 candidate.frequency = p.frequency();
222 } else {
223 ConstructParameters const p = ConstructParametersOf(node->op());
224 candidate.frequency = p.frequency();
225 }
226
227 // Don't consider a {candidate} whose frequency is below the
228 // threshold, i.e. a call site that is only hit once every N
229 // invocations of the caller.
230 if (candidate.frequency.IsKnown() &&
231 candidate.frequency.value() < FLAG_min_inlining_frequency) {
232 return NoChange();
233 }
234
235 // Forcibly inline small functions here. In the case of polymorphic inlining
236 // candidate_is_small is set only when all functions are small.
237 if (candidate_is_small) {
238 TRACE("Inlining small function(s) at call site #"
239 << node->id() << ":" << node->op()->mnemonic());
240 return InlineCandidate(candidate, true);
241 }
242
243 // In the general case we remember the candidate for later.
244 candidates_.insert(candidate);
245 return NoChange();
246 }
247
Finalize()248 void JSInliningHeuristic::Finalize() {
249 DisallowHeapAccessIf no_heap_acess(broker()->is_concurrent_inlining());
250
251 if (candidates_.empty()) return; // Nothing to do without candidates.
252 if (FLAG_trace_turbo_inlining) PrintCandidates();
253
254 // We inline at most one candidate in every iteration of the fixpoint.
255 // This is to ensure that we don't consume the full inlining budget
256 // on things that aren't called very often.
257 // TODO(bmeurer): Use std::priority_queue instead of std::set here.
258 while (!candidates_.empty()) {
259 auto i = candidates_.begin();
260 Candidate candidate = *i;
261 candidates_.erase(i);
262
263 // Ignore this candidate if it's no longer valid.
264 if (!IrOpcode::IsInlineeOpcode(candidate.node->opcode())) continue;
265 if (candidate.node->IsDead()) continue;
266
267 // Make sure we have some extra budget left, so that any small functions
268 // exposed by this function would be given a chance to inline.
269 double size_of_candidate =
270 candidate.total_size * FLAG_reserve_inline_budget_scale_factor;
271 int total_size =
272 total_inlined_bytecode_size_ + static_cast<int>(size_of_candidate);
273 if (total_size > FLAG_max_inlined_bytecode_size_cumulative) {
274 // Try if any smaller functions are available to inline.
275 continue;
276 }
277
278 Reduction const reduction = InlineCandidate(candidate, false);
279 if (reduction.Changed()) return;
280 }
281 }
282
283 namespace {
284
285 struct NodeAndIndex {
286 Node* node;
287 int index;
288 };
289
CollectStateValuesOwnedUses(Node * node,Node * state_values,NodeAndIndex * uses_buffer,size_t * use_count,size_t max_uses)290 bool CollectStateValuesOwnedUses(Node* node, Node* state_values,
291 NodeAndIndex* uses_buffer, size_t* use_count,
292 size_t max_uses) {
293 // Only accumulate states that are not shared with other users.
294 if (state_values->UseCount() > 1) return true;
295 for (int i = 0; i < state_values->InputCount(); i++) {
296 Node* input = state_values->InputAt(i);
297 if (input->opcode() == IrOpcode::kStateValues) {
298 if (!CollectStateValuesOwnedUses(node, input, uses_buffer, use_count,
299 max_uses)) {
300 return false;
301 }
302 } else if (input == node) {
303 if (*use_count >= max_uses) return false;
304 uses_buffer[*use_count] = {state_values, i};
305 (*use_count)++;
306 }
307 }
308 return true;
309 }
310
311 } // namespace
312
DuplicateStateValuesAndRename(Node * state_values,Node * from,Node * to,StateCloneMode mode)313 Node* JSInliningHeuristic::DuplicateStateValuesAndRename(Node* state_values,
314 Node* from, Node* to,
315 StateCloneMode mode) {
316 // Only rename in states that are not shared with other users. This needs to
317 // be in sync with the condition in {CollectStateValuesOwnedUses}.
318 if (state_values->UseCount() > 1) return state_values;
319 Node* copy = mode == kChangeInPlace ? state_values : nullptr;
320 for (int i = 0; i < state_values->InputCount(); i++) {
321 Node* input = state_values->InputAt(i);
322 Node* processed;
323 if (input->opcode() == IrOpcode::kStateValues) {
324 processed = DuplicateStateValuesAndRename(input, from, to, mode);
325 } else if (input == from) {
326 processed = to;
327 } else {
328 processed = input;
329 }
330 if (processed != input) {
331 if (!copy) {
332 copy = graph()->CloneNode(state_values);
333 }
334 copy->ReplaceInput(i, processed);
335 }
336 }
337 return copy ? copy : state_values;
338 }
339
340 namespace {
341
CollectFrameStateUniqueUses(Node * node,Node * frame_state,NodeAndIndex * uses_buffer,size_t * use_count,size_t max_uses)342 bool CollectFrameStateUniqueUses(Node* node, Node* frame_state,
343 NodeAndIndex* uses_buffer, size_t* use_count,
344 size_t max_uses) {
345 // Only accumulate states that are not shared with other users.
346 if (frame_state->UseCount() > 1) return true;
347 if (frame_state->InputAt(kFrameStateStackInput) == node) {
348 if (*use_count >= max_uses) return false;
349 uses_buffer[*use_count] = {frame_state, kFrameStateStackInput};
350 (*use_count)++;
351 }
352 if (!CollectStateValuesOwnedUses(node,
353 frame_state->InputAt(kFrameStateLocalsInput),
354 uses_buffer, use_count, max_uses)) {
355 return false;
356 }
357 return true;
358 }
359
360 } // namespace
361
DuplicateFrameStateAndRename(Node * frame_state,Node * from,Node * to,StateCloneMode mode)362 Node* JSInliningHeuristic::DuplicateFrameStateAndRename(Node* frame_state,
363 Node* from, Node* to,
364 StateCloneMode mode) {
365 // Only rename in states that are not shared with other users. This needs to
366 // be in sync with the condition in {DuplicateFrameStateAndRename}.
367 if (frame_state->UseCount() > 1) return frame_state;
368 Node* copy = mode == kChangeInPlace ? frame_state : nullptr;
369 if (frame_state->InputAt(kFrameStateStackInput) == from) {
370 if (!copy) {
371 copy = graph()->CloneNode(frame_state);
372 }
373 copy->ReplaceInput(kFrameStateStackInput, to);
374 }
375 Node* locals = frame_state->InputAt(kFrameStateLocalsInput);
376 Node* new_locals = DuplicateStateValuesAndRename(locals, from, to, mode);
377 if (new_locals != locals) {
378 if (!copy) {
379 copy = graph()->CloneNode(frame_state);
380 }
381 copy->ReplaceInput(kFrameStateLocalsInput, new_locals);
382 }
383 return copy ? copy : frame_state;
384 }
385
TryReuseDispatch(Node * node,Node * callee,Node ** if_successes,Node ** calls,Node ** inputs,int input_count)386 bool JSInliningHeuristic::TryReuseDispatch(Node* node, Node* callee,
387 Node** if_successes, Node** calls,
388 Node** inputs, int input_count) {
389 // We will try to reuse the control flow branch created for computing
390 // the {callee} target of the call. We only reuse the branch if there
391 // is no side-effect between the call and the branch, and if the callee is
392 // only used as the target (and possibly also in the related frame states).
393
394 // We are trying to match the following pattern:
395 //
396 // C1 C2
397 // . .
398 // | |
399 // Merge(merge) <-----------------+
400 // ^ ^ |
401 // V1 V2 | | E1 E2 |
402 // . . | +----+ . . |
403 // | | | | | | |
404 // Phi(callee) EffectPhi(effect_phi) |
405 // ^ ^ |
406 // | | |
407 // +----+ | |
408 // | | | |
409 // | StateValues | |
410 // | ^ | |
411 // +----+ | | |
412 // | | | | |
413 // | FrameState | |
414 // | ^ | |
415 // | | | +---+
416 // | | | | |
417 // +----+ Checkpoint(checkpoint) |
418 // | | ^ |
419 // | StateValues | +-------------+
420 // | | | |
421 // +-----+ | | |
422 // | | | | |
423 // | FrameState | |
424 // | ^ | |
425 // +-----------+ | | |
426 // Call(node)
427 // |
428 // |
429 // .
430 //
431 // The {callee} here is a phi that merges the possible call targets, {node}
432 // is the actual call that we will try to duplicate and connect to the
433 // control that comes into {merge}. There can be a {checkpoint} between
434 // the call and the calle phi.
435 //
436 // The idea is to get rid of the merge, effect phi and phi, then duplicate
437 // the call (with all the frame states and such), and connect the duplicated
438 // calls and states directly to the inputs of the ex-phi, ex-effect-phi and
439 // ex-merge. The tricky part is to make sure that there is no interference
440 // from the outside. In particular, there should not be any unaccounted uses
441 // of the phi, effect-phi and merge because we will remove them from
442 // the graph.
443 //
444 // V1 E1 C1 V2 E2 C2
445 // . . . . . .
446 // | | | | | |
447 // +----+ | | +----+ |
448 // | | | | | | |
449 // | StateValues | | | StateValues |
450 // | ^ | | | ^ |
451 // +----+ | | | +----+ | |
452 // | | | | | | | | |
453 // | FrameState | | | FrameState |
454 // | ^ | | | ^ |
455 // | | | | | | |
456 // | | | | | | |
457 // +----+ Checkpoint | +----+ Checkpoint |
458 // | | ^ | | | ^ |
459 // | StateValues | | | StateValues | |
460 // | | | | | | | |
461 // +-----+ | | | +-----+ | | |
462 // | | | | | | | | | |
463 // | FrameState | | | FrameState | |
464 // | ^ | | | ^ | |
465 // +-------------+| | | +-------------+| | |
466 // Call----+ Call----+
467 // | |
468 // +-------+ +------------+
469 // | |
470 // Merge
471 // EffectPhi
472 // Phi
473 // |
474 // ...
475
476 // Bailout if the call is not polymorphic anymore (other reducers might
477 // have replaced the callee phi with a constant).
478 if (callee->opcode() != IrOpcode::kPhi) return false;
479 int const num_calls = callee->op()->ValueInputCount();
480
481 // If there is a control node between the callee computation
482 // and the call, bail out.
483 Node* merge = NodeProperties::GetControlInput(callee);
484 if (NodeProperties::GetControlInput(node) != merge) return false;
485
486 // If there is a non-checkpoint effect node between the callee computation
487 // and the call, bail out. We will drop any checkpoint between the call and
488 // the callee phi because the callee computation should have its own
489 // checkpoint that the call can fall back to.
490 Node* checkpoint = nullptr;
491 Node* effect = NodeProperties::GetEffectInput(node);
492 if (effect->opcode() == IrOpcode::kCheckpoint) {
493 checkpoint = effect;
494 if (NodeProperties::GetControlInput(checkpoint) != merge) return false;
495 effect = NodeProperties::GetEffectInput(effect);
496 }
497 if (effect->opcode() != IrOpcode::kEffectPhi) return false;
498 if (NodeProperties::GetControlInput(effect) != merge) return false;
499 Node* effect_phi = effect;
500
501 // The effect phi, the callee, the call and the checkpoint must be the only
502 // users of the merge.
503 for (Node* merge_use : merge->uses()) {
504 if (merge_use != effect_phi && merge_use != callee && merge_use != node &&
505 merge_use != checkpoint) {
506 return false;
507 }
508 }
509
510 // The effect phi must be only used by the checkpoint or the call.
511 for (Node* effect_phi_use : effect_phi->uses()) {
512 if (effect_phi_use != node && effect_phi_use != checkpoint) return false;
513 }
514
515 // We must replace the callee phi with the appropriate constant in
516 // the entire subgraph reachable by inputs from the call (terminating
517 // at phis and merges). Since we do not want to walk (and later duplicate)
518 // the subgraph here, we limit the possible uses to this set:
519 //
520 // 1. In the call (as a target).
521 // 2. The checkpoint between the call and the callee computation merge.
522 // 3. The lazy deoptimization frame state.
523 //
524 // This corresponds to the most common pattern, where the function is
525 // called with only local variables or constants as arguments.
526 //
527 // To check the uses, we first collect all the occurrences of callee in 1, 2
528 // and 3, and then we check that all uses of callee are in the collected
529 // occurrences. If there is an unaccounted use, we do not try to rewire
530 // the control flow.
531 //
532 // Note: With CFG, this would be much easier and more robust - we would just
533 // duplicate all the nodes between the merge and the call, replacing all
534 // occurrences of the {callee} phi with the appropriate constant.
535
536 // First compute the set of uses that are only reachable from 2 and 3.
537 const size_t kMaxUses = 8;
538 NodeAndIndex replaceable_uses[kMaxUses];
539 size_t replaceable_uses_count = 0;
540
541 // Collect the uses to check case 2.
542 Node* checkpoint_state = nullptr;
543 if (checkpoint) {
544 checkpoint_state = checkpoint->InputAt(0);
545 if (!CollectFrameStateUniqueUses(callee, checkpoint_state, replaceable_uses,
546 &replaceable_uses_count, kMaxUses)) {
547 return false;
548 }
549 }
550
551 // Collect the uses to check case 3.
552 Node* frame_state = NodeProperties::GetFrameStateInput(node);
553 if (!CollectFrameStateUniqueUses(callee, frame_state, replaceable_uses,
554 &replaceable_uses_count, kMaxUses)) {
555 return false;
556 }
557
558 // Bail out if there is a use of {callee} that is not reachable from 1, 2
559 // and 3.
560 for (Edge edge : callee->use_edges()) {
561 // Case 1 (use by the call as a target).
562 if (edge.from() == node && edge.index() == 0) continue;
563 // Case 2 and 3 - used in checkpoint and/or lazy deopt frame states.
564 bool found = false;
565 for (size_t i = 0; i < replaceable_uses_count; i++) {
566 if (replaceable_uses[i].node == edge.from() &&
567 replaceable_uses[i].index == edge.index()) {
568 found = true;
569 break;
570 }
571 }
572 if (!found) return false;
573 }
574
575 // Clone the call and the framestate, including the uniquely reachable
576 // state values, making sure that we replace the phi with the constant.
577 for (int i = 0; i < num_calls; ++i) {
578 // Clone the calls for each branch.
579 // We need to specialize the calls to the correct target, effect, and
580 // control. We also need to duplicate the checkpoint and the lazy
581 // frame state, and change all the uses of the callee to the constant
582 // callee.
583 Node* target = callee->InputAt(i);
584 Node* effect = effect_phi->InputAt(i);
585 Node* control = merge->InputAt(i);
586
587 if (checkpoint) {
588 // Duplicate the checkpoint.
589 Node* new_checkpoint_state = DuplicateFrameStateAndRename(
590 checkpoint_state, callee, target,
591 (i == num_calls - 1) ? kChangeInPlace : kCloneState);
592 effect = graph()->NewNode(checkpoint->op(), new_checkpoint_state, effect,
593 control);
594 }
595
596 // Duplicate the call.
597 Node* new_lazy_frame_state = DuplicateFrameStateAndRename(
598 frame_state, callee, target,
599 (i == num_calls - 1) ? kChangeInPlace : kCloneState);
600 inputs[0] = target;
601 inputs[input_count - 3] = new_lazy_frame_state;
602 inputs[input_count - 2] = effect;
603 inputs[input_count - 1] = control;
604 calls[i] = if_successes[i] =
605 graph()->NewNode(node->op(), input_count, inputs);
606 }
607
608 // Mark the control inputs dead, so that we can kill the merge.
609 node->ReplaceInput(input_count - 1, jsgraph()->Dead());
610 callee->ReplaceInput(num_calls, jsgraph()->Dead());
611 effect_phi->ReplaceInput(num_calls, jsgraph()->Dead());
612 if (checkpoint) {
613 checkpoint->ReplaceInput(2, jsgraph()->Dead());
614 }
615
616 merge->Kill();
617 return true;
618 }
619
CreateOrReuseDispatch(Node * node,Node * callee,Candidate const & candidate,Node ** if_successes,Node ** calls,Node ** inputs,int input_count)620 void JSInliningHeuristic::CreateOrReuseDispatch(Node* node, Node* callee,
621 Candidate const& candidate,
622 Node** if_successes,
623 Node** calls, Node** inputs,
624 int input_count) {
625 SourcePositionTable::Scope position(
626 source_positions_, source_positions_->GetSourcePosition(node));
627 if (TryReuseDispatch(node, callee, if_successes, calls, inputs,
628 input_count)) {
629 return;
630 }
631
632 STATIC_ASSERT(JSCallOrConstructNode::kHaveIdenticalLayouts);
633
634 Node* fallthrough_control = NodeProperties::GetControlInput(node);
635 int const num_calls = candidate.num_functions;
636
637 // Create the appropriate control flow to dispatch to the cloned calls.
638 for (int i = 0; i < num_calls; ++i) {
639 // TODO(2206): Make comparison be based on underlying SharedFunctionInfo
640 // instead of the target JSFunction reference directly.
641 Node* target = jsgraph()->Constant(candidate.functions[i].value());
642 if (i != (num_calls - 1)) {
643 Node* check =
644 graph()->NewNode(simplified()->ReferenceEqual(), callee, target);
645 Node* branch =
646 graph()->NewNode(common()->Branch(), check, fallthrough_control);
647 fallthrough_control = graph()->NewNode(common()->IfFalse(), branch);
648 if_successes[i] = graph()->NewNode(common()->IfTrue(), branch);
649 } else {
650 if_successes[i] = fallthrough_control;
651 }
652
653 // Clone the calls for each branch.
654 // The first input to the call is the actual target (which we specialize
655 // to the known {target}); the last input is the control dependency.
656 // We also specialize the new.target of JSConstruct {node}s if it refers
657 // to the same node as the {node}'s target input, so that we can later
658 // properly inline the JSCreate operations.
659 if (node->opcode() == IrOpcode::kJSConstruct) {
660 // TODO(jgruber, v8:10675): This branch seems unreachable.
661 JSConstructNode n(node);
662 if (inputs[n.TargetIndex()] == inputs[n.NewTargetIndex()]) {
663 inputs[n.NewTargetIndex()] = target;
664 }
665 }
666 inputs[JSCallOrConstructNode::TargetIndex()] = target;
667 inputs[input_count - 1] = if_successes[i];
668 calls[i] = if_successes[i] =
669 graph()->NewNode(node->op(), input_count, inputs);
670 }
671 }
672
InlineCandidate(Candidate const & candidate,bool small_function)673 Reduction JSInliningHeuristic::InlineCandidate(Candidate const& candidate,
674 bool small_function) {
675 int const num_calls = candidate.num_functions;
676 Node* const node = candidate.node;
677 if (num_calls == 1) {
678 Reduction const reduction = inliner_.ReduceJSCall(node);
679 if (reduction.Changed()) {
680 total_inlined_bytecode_size_ += candidate.bytecode[0].value().length();
681 }
682 return reduction;
683 }
684
685 // Expand the JSCall/JSConstruct node to a subgraph first if
686 // we have multiple known target functions.
687 DCHECK_LT(1, num_calls);
688 Node* calls[kMaxCallPolymorphism + 1];
689 Node* if_successes[kMaxCallPolymorphism];
690 Node* callee = NodeProperties::GetValueInput(node, 0);
691
692 // Setup the inputs for the cloned call nodes.
693 int const input_count = node->InputCount();
694 Node** inputs = graph()->zone()->NewArray<Node*>(input_count);
695 for (int i = 0; i < input_count; ++i) {
696 inputs[i] = node->InputAt(i);
697 }
698
699 // Create the appropriate control flow to dispatch to the cloned calls.
700 CreateOrReuseDispatch(node, callee, candidate, if_successes, calls, inputs,
701 input_count);
702
703 // Check if we have an exception projection for the call {node}.
704 Node* if_exception = nullptr;
705 if (NodeProperties::IsExceptionalCall(node, &if_exception)) {
706 Node* if_exceptions[kMaxCallPolymorphism + 1];
707 for (int i = 0; i < num_calls; ++i) {
708 if_successes[i] = graph()->NewNode(common()->IfSuccess(), calls[i]);
709 if_exceptions[i] =
710 graph()->NewNode(common()->IfException(), calls[i], calls[i]);
711 }
712
713 // Morph the {if_exception} projection into a join.
714 Node* exception_control =
715 graph()->NewNode(common()->Merge(num_calls), num_calls, if_exceptions);
716 if_exceptions[num_calls] = exception_control;
717 Node* exception_effect = graph()->NewNode(common()->EffectPhi(num_calls),
718 num_calls + 1, if_exceptions);
719 Node* exception_value = graph()->NewNode(
720 common()->Phi(MachineRepresentation::kTagged, num_calls), num_calls + 1,
721 if_exceptions);
722 ReplaceWithValue(if_exception, exception_value, exception_effect,
723 exception_control);
724 }
725
726 // Morph the original call site into a join of the dispatched call sites.
727 Node* control =
728 graph()->NewNode(common()->Merge(num_calls), num_calls, if_successes);
729 calls[num_calls] = control;
730 Node* effect =
731 graph()->NewNode(common()->EffectPhi(num_calls), num_calls + 1, calls);
732 Node* value =
733 graph()->NewNode(common()->Phi(MachineRepresentation::kTagged, num_calls),
734 num_calls + 1, calls);
735 ReplaceWithValue(node, value, effect, control);
736
737 // Inline the individual, cloned call sites.
738 for (int i = 0; i < num_calls && total_inlined_bytecode_size_ <
739 FLAG_max_inlined_bytecode_size_absolute;
740 ++i) {
741 if (candidate.can_inline_function[i] &&
742 (small_function || total_inlined_bytecode_size_ <
743 FLAG_max_inlined_bytecode_size_cumulative)) {
744 Node* node = calls[i];
745 Reduction const reduction = inliner_.ReduceJSCall(node);
746 if (reduction.Changed()) {
747 total_inlined_bytecode_size_ += candidate.bytecode[i]->length();
748 // Killing the call node is not strictly necessary, but it is safer to
749 // make sure we do not resurrect the node.
750 node->Kill();
751 }
752 }
753 }
754
755 return Replace(value);
756 }
757
operator ()(const Candidate & left,const Candidate & right) const758 bool JSInliningHeuristic::CandidateCompare::operator()(
759 const Candidate& left, const Candidate& right) const {
760 if (right.frequency.IsUnknown()) {
761 if (left.frequency.IsUnknown()) {
762 // If left and right are both unknown then the ordering is indeterminate,
763 // which breaks strict weak ordering requirements, so we fall back to the
764 // node id as a tie breaker.
765 return left.node->id() > right.node->id();
766 }
767 return true;
768 } else if (left.frequency.IsUnknown()) {
769 return false;
770 } else if (left.frequency.value() > right.frequency.value()) {
771 return true;
772 } else if (left.frequency.value() < right.frequency.value()) {
773 return false;
774 } else {
775 return left.node->id() > right.node->id();
776 }
777 }
778
PrintCandidates()779 void JSInliningHeuristic::PrintCandidates() {
780 StdoutStream os;
781 os << candidates_.size() << " candidate(s) for inlining:" << std::endl;
782 for (const Candidate& candidate : candidates_) {
783 os << "- candidate: " << candidate.node->op()->mnemonic() << " node #"
784 << candidate.node->id() << " with frequency " << candidate.frequency
785 << ", " << candidate.num_functions << " target(s):" << std::endl;
786 for (int i = 0; i < candidate.num_functions; ++i) {
787 SharedFunctionInfoRef shared = candidate.functions[i].has_value()
788 ? candidate.functions[i]->shared()
789 : candidate.shared_info.value();
790 os << " - target: " << shared;
791 if (candidate.bytecode[i].has_value()) {
792 os << ", bytecode size: " << candidate.bytecode[i]->length();
793 if (candidate.functions[i].has_value()) {
794 JSFunctionRef function = candidate.functions[i].value();
795 if (function.HasAttachedOptimizedCode()) {
796 os << ", existing opt code's inlined bytecode size: "
797 << function.code().inlined_bytecode_size();
798 }
799 }
800 } else {
801 os << ", no bytecode";
802 }
803 os << std::endl;
804 }
805 }
806 }
807
graph() const808 Graph* JSInliningHeuristic::graph() const { return jsgraph()->graph(); }
809
common() const810 CommonOperatorBuilder* JSInliningHeuristic::common() const {
811 return jsgraph()->common();
812 }
813
simplified() const814 SimplifiedOperatorBuilder* JSInliningHeuristic::simplified() const {
815 return jsgraph()->simplified();
816 }
817
818 #undef TRACE
819
820 } // namespace compiler
821 } // namespace internal
822 } // namespace v8
823