1 //! Inlining pass for MIR functions
2 use crate::deref_separator::deref_finder;
3 use rustc_attr::InlineAttr;
4 use rustc_hir::def_id::DefId;
5 use rustc_index::bit_set::BitSet;
6 use rustc_index::Idx;
7 use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs};
8 use rustc_middle::mir::visit::*;
9 use rustc_middle::mir::*;
10 use rustc_middle::ty::TypeVisitableExt;
11 use rustc_middle::ty::{self, Instance, InstanceDef, ParamEnv, Ty, TyCtxt};
12 use rustc_session::config::OptLevel;
13 use rustc_target::abi::{FieldIdx, FIRST_VARIANT};
14 use rustc_target::spec::abi::Abi;
15
16 use crate::simplify::{remove_dead_blocks, CfgSimplifier};
17 use crate::util;
18 use crate::MirPass;
19 use std::iter;
20 use std::ops::{Range, RangeFrom};
21
22 pub(crate) mod cycle;
23
24 const INSTR_COST: usize = 5;
25 const CALL_PENALTY: usize = 25;
26 const LANDINGPAD_PENALTY: usize = 50;
27 const RESUME_PENALTY: usize = 45;
28
29 const TOP_DOWN_DEPTH_LIMIT: usize = 5;
30
31 pub struct Inline;
32
33 #[derive(Copy, Clone, Debug)]
34 struct CallSite<'tcx> {
35 callee: Instance<'tcx>,
36 fn_sig: ty::PolyFnSig<'tcx>,
37 block: BasicBlock,
38 target: Option<BasicBlock>,
39 source_info: SourceInfo,
40 }
41
42 impl<'tcx> MirPass<'tcx> for Inline {
is_enabled(&self, sess: &rustc_session::Session) -> bool43 fn is_enabled(&self, sess: &rustc_session::Session) -> bool {
44 if let Some(enabled) = sess.opts.unstable_opts.inline_mir {
45 return enabled;
46 }
47
48 match sess.mir_opt_level() {
49 0 | 1 => false,
50 2 => {
51 (sess.opts.optimize == OptLevel::Default
52 || sess.opts.optimize == OptLevel::Aggressive)
53 && sess.opts.incremental == None
54 }
55 _ => true,
56 }
57 }
58
run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>)59 fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
60 let span = trace_span!("inline", body = %tcx.def_path_str(body.source.def_id()));
61 let _guard = span.enter();
62 if inline(tcx, body) {
63 debug!("running simplify cfg on {:?}", body.source);
64 CfgSimplifier::new(body).simplify();
65 remove_dead_blocks(tcx, body);
66 deref_finder(tcx, body);
67 }
68 }
69 }
70
inline<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) -> bool71 fn inline<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) -> bool {
72 let def_id = body.source.def_id().expect_local();
73
74 // Only do inlining into fn bodies.
75 if !tcx.hir().body_owner_kind(def_id).is_fn_or_closure() {
76 return false;
77 }
78 if body.source.promoted.is_some() {
79 return false;
80 }
81 // Avoid inlining into generators, since their `optimized_mir` is used for layout computation,
82 // which can create a cycle, even when no attempt is made to inline the function in the other
83 // direction.
84 if body.generator.is_some() {
85 return false;
86 }
87
88 let param_env = tcx.param_env_reveal_all_normalized(def_id);
89
90 let mut this = Inliner {
91 tcx,
92 param_env,
93 codegen_fn_attrs: tcx.codegen_fn_attrs(def_id),
94 history: Vec::new(),
95 changed: false,
96 };
97 let blocks = START_BLOCK..body.basic_blocks.next_index();
98 this.process_blocks(body, blocks);
99 this.changed
100 }
101
102 struct Inliner<'tcx> {
103 tcx: TyCtxt<'tcx>,
104 param_env: ParamEnv<'tcx>,
105 /// Caller codegen attributes.
106 codegen_fn_attrs: &'tcx CodegenFnAttrs,
107 /// Stack of inlined instances.
108 /// We only check the `DefId` and not the substs because we want to
109 /// avoid inlining cases of polymorphic recursion.
110 /// The number of `DefId`s is finite, so checking history is enough
111 /// to ensure that we do not loop endlessly while inlining.
112 history: Vec<DefId>,
113 /// Indicates that the caller body has been modified.
114 changed: bool,
115 }
116
117 impl<'tcx> Inliner<'tcx> {
process_blocks(&mut self, caller_body: &mut Body<'tcx>, blocks: Range<BasicBlock>)118 fn process_blocks(&mut self, caller_body: &mut Body<'tcx>, blocks: Range<BasicBlock>) {
119 // How many callsites in this body are we allowed to inline? We need to limit this in order
120 // to prevent super-linear growth in MIR size
121 let inline_limit = match self.history.len() {
122 0 => usize::MAX,
123 1..=TOP_DOWN_DEPTH_LIMIT => 1,
124 _ => return,
125 };
126 let mut inlined_count = 0;
127 for bb in blocks {
128 let bb_data = &caller_body[bb];
129 if bb_data.is_cleanup {
130 continue;
131 }
132
133 let Some(callsite) = self.resolve_callsite(caller_body, bb, bb_data) else {
134 continue;
135 };
136
137 let span = trace_span!("process_blocks", %callsite.callee, ?bb);
138 let _guard = span.enter();
139
140 match self.try_inlining(caller_body, &callsite) {
141 Err(reason) => {
142 debug!("not-inlined {} [{}]", callsite.callee, reason);
143 continue;
144 }
145 Ok(new_blocks) => {
146 debug!("inlined {}", callsite.callee);
147 self.changed = true;
148
149 self.history.push(callsite.callee.def_id());
150 self.process_blocks(caller_body, new_blocks);
151 self.history.pop();
152
153 inlined_count += 1;
154 if inlined_count == inline_limit {
155 debug!("inline count reached");
156 return;
157 }
158 }
159 }
160 }
161 }
162
163 /// Attempts to inline a callsite into the caller body. When successful returns basic blocks
164 /// containing the inlined body. Otherwise returns an error describing why inlining didn't take
165 /// place.
try_inlining( &self, caller_body: &mut Body<'tcx>, callsite: &CallSite<'tcx>, ) -> Result<std::ops::Range<BasicBlock>, &'static str>166 fn try_inlining(
167 &self,
168 caller_body: &mut Body<'tcx>,
169 callsite: &CallSite<'tcx>,
170 ) -> Result<std::ops::Range<BasicBlock>, &'static str> {
171 let callee_attrs = self.tcx.codegen_fn_attrs(callsite.callee.def_id());
172 self.check_codegen_attributes(callsite, callee_attrs)?;
173
174 let terminator = caller_body[callsite.block].terminator.as_ref().unwrap();
175 let TerminatorKind::Call { args, destination, .. } = &terminator.kind else { bug!() };
176 let destination_ty = destination.ty(&caller_body.local_decls, self.tcx).ty;
177 for arg in args {
178 if !arg.ty(&caller_body.local_decls, self.tcx).is_sized(self.tcx, self.param_env) {
179 // We do not allow inlining functions with unsized params. Inlining these functions
180 // could create unsized locals, which are unsound and being phased out.
181 return Err("Call has unsized argument");
182 }
183 }
184
185 self.check_mir_is_available(caller_body, &callsite.callee)?;
186 let callee_body = try_instance_mir(self.tcx, callsite.callee.def)?;
187 self.check_mir_body(callsite, callee_body, callee_attrs)?;
188
189 if !self.tcx.consider_optimizing(|| {
190 format!("Inline {:?} into {:?}", callsite.callee, caller_body.source)
191 }) {
192 return Err("optimization fuel exhausted");
193 }
194
195 let Ok(callee_body) = callsite.callee.try_subst_mir_and_normalize_erasing_regions(
196 self.tcx,
197 self.param_env,
198 ty::EarlyBinder::bind(callee_body.clone()),
199 ) else {
200 return Err("failed to normalize callee body");
201 };
202
203 // Check call signature compatibility.
204 // Normally, this shouldn't be required, but trait normalization failure can create a
205 // validation ICE.
206 let output_type = callee_body.return_ty();
207 if !util::is_subtype(self.tcx, self.param_env, output_type, destination_ty) {
208 trace!(?output_type, ?destination_ty);
209 return Err("failed to normalize return type");
210 }
211 if callsite.fn_sig.abi() == Abi::RustCall {
212 let (arg_tuple, skipped_args) = match &args[..] {
213 [arg_tuple] => (arg_tuple, 0),
214 [_, arg_tuple] => (arg_tuple, 1),
215 _ => bug!("Expected `rust-call` to have 1 or 2 args"),
216 };
217
218 let arg_tuple_ty = arg_tuple.ty(&caller_body.local_decls, self.tcx);
219 let ty::Tuple(arg_tuple_tys) = arg_tuple_ty.kind() else {
220 bug!("Closure arguments are not passed as a tuple");
221 };
222
223 for (arg_ty, input) in
224 arg_tuple_tys.iter().zip(callee_body.args_iter().skip(skipped_args))
225 {
226 let input_type = callee_body.local_decls[input].ty;
227 if !util::is_subtype(self.tcx, self.param_env, input_type, arg_ty) {
228 trace!(?arg_ty, ?input_type);
229 return Err("failed to normalize tuple argument type");
230 }
231 }
232 } else {
233 for (arg, input) in args.iter().zip(callee_body.args_iter()) {
234 let input_type = callee_body.local_decls[input].ty;
235 let arg_ty = arg.ty(&caller_body.local_decls, self.tcx);
236 if !util::is_subtype(self.tcx, self.param_env, input_type, arg_ty) {
237 trace!(?arg_ty, ?input_type);
238 return Err("failed to normalize argument type");
239 }
240 }
241 }
242
243 let old_blocks = caller_body.basic_blocks.next_index();
244 self.inline_call(caller_body, &callsite, callee_body);
245 let new_blocks = old_blocks..caller_body.basic_blocks.next_index();
246
247 Ok(new_blocks)
248 }
249
check_mir_is_available( &self, caller_body: &Body<'tcx>, callee: &Instance<'tcx>, ) -> Result<(), &'static str>250 fn check_mir_is_available(
251 &self,
252 caller_body: &Body<'tcx>,
253 callee: &Instance<'tcx>,
254 ) -> Result<(), &'static str> {
255 let caller_def_id = caller_body.source.def_id();
256 let callee_def_id = callee.def_id();
257 if callee_def_id == caller_def_id {
258 return Err("self-recursion");
259 }
260
261 match callee.def {
262 InstanceDef::Item(_) => {
263 // If there is no MIR available (either because it was not in metadata or
264 // because it has no MIR because it's an extern function), then the inliner
265 // won't cause cycles on this.
266 if !self.tcx.is_mir_available(callee_def_id) {
267 return Err("item MIR unavailable");
268 }
269 }
270 // These have no own callable MIR.
271 InstanceDef::Intrinsic(_) | InstanceDef::Virtual(..) => {
272 return Err("instance without MIR (intrinsic / virtual)");
273 }
274 // This cannot result in an immediate cycle since the callee MIR is a shim, which does
275 // not get any optimizations run on it. Any subsequent inlining may cause cycles, but we
276 // do not need to catch this here, we can wait until the inliner decides to continue
277 // inlining a second time.
278 InstanceDef::VTableShim(_)
279 | InstanceDef::ReifyShim(_)
280 | InstanceDef::FnPtrShim(..)
281 | InstanceDef::ClosureOnceShim { .. }
282 | InstanceDef::DropGlue(..)
283 | InstanceDef::CloneShim(..)
284 | InstanceDef::ThreadLocalShim(..)
285 | InstanceDef::FnPtrAddrShim(..) => return Ok(()),
286 }
287
288 if self.tcx.is_constructor(callee_def_id) {
289 trace!("constructors always have MIR");
290 // Constructor functions cannot cause a query cycle.
291 return Ok(());
292 }
293
294 if callee_def_id.is_local() {
295 // Avoid a cycle here by only using `instance_mir` only if we have
296 // a lower `DefPathHash` than the callee. This ensures that the callee will
297 // not inline us. This trick even works with incremental compilation,
298 // since `DefPathHash` is stable.
299 if self.tcx.def_path_hash(caller_def_id).local_hash()
300 < self.tcx.def_path_hash(callee_def_id).local_hash()
301 {
302 return Ok(());
303 }
304
305 // If we know for sure that the function we're calling will itself try to
306 // call us, then we avoid inlining that function.
307 if self.tcx.mir_callgraph_reachable((*callee, caller_def_id.expect_local())) {
308 return Err("caller might be reachable from callee (query cycle avoidance)");
309 }
310
311 Ok(())
312 } else {
313 // This cannot result in an immediate cycle since the callee MIR is from another crate
314 // and is already optimized. Any subsequent inlining may cause cycles, but we do
315 // not need to catch this here, we can wait until the inliner decides to continue
316 // inlining a second time.
317 trace!("functions from other crates always have MIR");
318 Ok(())
319 }
320 }
321
resolve_callsite( &self, caller_body: &Body<'tcx>, bb: BasicBlock, bb_data: &BasicBlockData<'tcx>, ) -> Option<CallSite<'tcx>>322 fn resolve_callsite(
323 &self,
324 caller_body: &Body<'tcx>,
325 bb: BasicBlock,
326 bb_data: &BasicBlockData<'tcx>,
327 ) -> Option<CallSite<'tcx>> {
328 // Only consider direct calls to functions
329 let terminator = bb_data.terminator();
330 if let TerminatorKind::Call { ref func, target, fn_span, .. } = terminator.kind {
331 let func_ty = func.ty(caller_body, self.tcx);
332 if let ty::FnDef(def_id, substs) = *func_ty.kind() {
333 // To resolve an instance its substs have to be fully normalized.
334 let substs = self.tcx.try_normalize_erasing_regions(self.param_env, substs).ok()?;
335 let callee =
336 Instance::resolve(self.tcx, self.param_env, def_id, substs).ok().flatten()?;
337
338 if let InstanceDef::Virtual(..) | InstanceDef::Intrinsic(_) = callee.def {
339 return None;
340 }
341
342 if self.history.contains(&callee.def_id()) {
343 return None;
344 }
345
346 let fn_sig = self.tcx.fn_sig(def_id).subst(self.tcx, substs);
347 let source_info = SourceInfo { span: fn_span, ..terminator.source_info };
348
349 return Some(CallSite { callee, fn_sig, block: bb, target, source_info });
350 }
351 }
352
353 None
354 }
355
356 /// Returns an error if inlining is not possible based on codegen attributes alone. A success
357 /// indicates that inlining decision should be based on other criteria.
check_codegen_attributes( &self, callsite: &CallSite<'tcx>, callee_attrs: &CodegenFnAttrs, ) -> Result<(), &'static str>358 fn check_codegen_attributes(
359 &self,
360 callsite: &CallSite<'tcx>,
361 callee_attrs: &CodegenFnAttrs,
362 ) -> Result<(), &'static str> {
363 if let InlineAttr::Never = callee_attrs.inline {
364 return Err("never inline hint");
365 }
366
367 // Only inline local functions if they would be eligible for cross-crate
368 // inlining. This is to ensure that the final crate doesn't have MIR that
369 // reference unexported symbols
370 if callsite.callee.def_id().is_local() {
371 let is_generic = callsite.callee.substs.non_erasable_generics().next().is_some();
372 if !is_generic && !callee_attrs.requests_inline() {
373 return Err("not exported");
374 }
375 }
376
377 if callsite.fn_sig.c_variadic() {
378 return Err("C variadic");
379 }
380
381 if callee_attrs.flags.contains(CodegenFnAttrFlags::COLD) {
382 return Err("cold");
383 }
384
385 if callee_attrs.no_sanitize != self.codegen_fn_attrs.no_sanitize {
386 return Err("incompatible sanitizer set");
387 }
388
389 // Two functions are compatible if the callee has no attribute (meaning
390 // that it's codegen agnostic), or sets an attribute that is identical
391 // to this function's attribute.
392 if callee_attrs.instruction_set.is_some()
393 && callee_attrs.instruction_set != self.codegen_fn_attrs.instruction_set
394 {
395 return Err("incompatible instruction set");
396 }
397
398 for feature in &callee_attrs.target_features {
399 if !self.codegen_fn_attrs.target_features.contains(feature) {
400 return Err("incompatible target feature");
401 }
402 }
403
404 Ok(())
405 }
406
407 /// Returns inlining decision that is based on the examination of callee MIR body.
408 /// Assumes that codegen attributes have been checked for compatibility already.
409 #[instrument(level = "debug", skip(self, callee_body))]
check_mir_body( &self, callsite: &CallSite<'tcx>, callee_body: &Body<'tcx>, callee_attrs: &CodegenFnAttrs, ) -> Result<(), &'static str>410 fn check_mir_body(
411 &self,
412 callsite: &CallSite<'tcx>,
413 callee_body: &Body<'tcx>,
414 callee_attrs: &CodegenFnAttrs,
415 ) -> Result<(), &'static str> {
416 let tcx = self.tcx;
417
418 let mut threshold = if callee_attrs.requests_inline() {
419 self.tcx.sess.opts.unstable_opts.inline_mir_hint_threshold.unwrap_or(100)
420 } else {
421 self.tcx.sess.opts.unstable_opts.inline_mir_threshold.unwrap_or(50)
422 };
423
424 // Give a bonus functions with a small number of blocks,
425 // We normally have two or three blocks for even
426 // very small functions.
427 if callee_body.basic_blocks.len() <= 3 {
428 threshold += threshold / 4;
429 }
430 debug!(" final inline threshold = {}", threshold);
431
432 // FIXME: Give a bonus to functions with only a single caller
433
434 let mut checker = CostChecker {
435 tcx: self.tcx,
436 param_env: self.param_env,
437 instance: callsite.callee,
438 callee_body,
439 cost: 0,
440 validation: Ok(()),
441 };
442
443 for var_debug_info in callee_body.var_debug_info.iter() {
444 checker.visit_var_debug_info(var_debug_info);
445 }
446
447 // Traverse the MIR manually so we can account for the effects of inlining on the CFG.
448 let mut work_list = vec![START_BLOCK];
449 let mut visited = BitSet::new_empty(callee_body.basic_blocks.len());
450 while let Some(bb) = work_list.pop() {
451 if !visited.insert(bb.index()) {
452 continue;
453 }
454
455 let blk = &callee_body.basic_blocks[bb];
456 checker.visit_basic_block_data(bb, blk);
457
458 let term = blk.terminator();
459 if let TerminatorKind::Drop { ref place, target, unwind, replace: _ } = term.kind {
460 work_list.push(target);
461
462 // If the place doesn't actually need dropping, treat it like a regular goto.
463 let ty = callsite
464 .callee
465 .subst_mir(self.tcx, ty::EarlyBinder::bind(&place.ty(callee_body, tcx).ty));
466 if ty.needs_drop(tcx, self.param_env) && let UnwindAction::Cleanup(unwind) = unwind {
467 work_list.push(unwind);
468 }
469 } else if callee_attrs.instruction_set != self.codegen_fn_attrs.instruction_set
470 && matches!(term.kind, TerminatorKind::InlineAsm { .. })
471 {
472 // During the attribute checking stage we allow a callee with no
473 // instruction_set assigned to count as compatible with a function that does
474 // assign one. However, during this stage we require an exact match when any
475 // inline-asm is detected. LLVM will still possibly do an inline later on
476 // if the no-attribute function ends up with the same instruction set anyway.
477 return Err("Cannot move inline-asm across instruction sets");
478 } else {
479 work_list.extend(term.successors())
480 }
481 }
482
483 // Abort if type validation found anything fishy.
484 checker.validation?;
485
486 // N.B. We still apply our cost threshold to #[inline(always)] functions.
487 // That attribute is often applied to very large functions that exceed LLVM's (very
488 // generous) inlining threshold. Such functions are very poor MIR inlining candidates.
489 // Always inlining #[inline(always)] functions in MIR, on net, slows down the compiler.
490 let cost = checker.cost;
491 if cost <= threshold {
492 debug!("INLINING {:?} [cost={} <= threshold={}]", callsite, cost, threshold);
493 Ok(())
494 } else {
495 debug!("NOT inlining {:?} [cost={} > threshold={}]", callsite, cost, threshold);
496 Err("cost above threshold")
497 }
498 }
499
inline_call( &self, caller_body: &mut Body<'tcx>, callsite: &CallSite<'tcx>, mut callee_body: Body<'tcx>, )500 fn inline_call(
501 &self,
502 caller_body: &mut Body<'tcx>,
503 callsite: &CallSite<'tcx>,
504 mut callee_body: Body<'tcx>,
505 ) {
506 let terminator = caller_body[callsite.block].terminator.take().unwrap();
507 match terminator.kind {
508 TerminatorKind::Call { args, destination, unwind, .. } => {
509 // If the call is something like `a[*i] = f(i)`, where
510 // `i : &mut usize`, then just duplicating the `a[*i]`
511 // Place could result in two different locations if `f`
512 // writes to `i`. To prevent this we need to create a temporary
513 // borrow of the place and pass the destination as `*temp` instead.
514 fn dest_needs_borrow(place: Place<'_>) -> bool {
515 for elem in place.projection.iter() {
516 match elem {
517 ProjectionElem::Deref | ProjectionElem::Index(_) => return true,
518 _ => {}
519 }
520 }
521
522 false
523 }
524
525 let dest = if dest_needs_borrow(destination) {
526 trace!("creating temp for return destination");
527 let dest = Rvalue::Ref(
528 self.tcx.lifetimes.re_erased,
529 BorrowKind::Mut { kind: MutBorrowKind::Default },
530 destination,
531 );
532 let dest_ty = dest.ty(caller_body, self.tcx);
533 let temp = Place::from(self.new_call_temp(caller_body, &callsite, dest_ty));
534 caller_body[callsite.block].statements.push(Statement {
535 source_info: callsite.source_info,
536 kind: StatementKind::Assign(Box::new((temp, dest))),
537 });
538 self.tcx.mk_place_deref(temp)
539 } else {
540 destination
541 };
542
543 // Always create a local to hold the destination, as `RETURN_PLACE` may appear
544 // where a full `Place` is not allowed.
545 let (remap_destination, destination_local) = if let Some(d) = dest.as_local() {
546 (false, d)
547 } else {
548 (
549 true,
550 self.new_call_temp(
551 caller_body,
552 &callsite,
553 destination.ty(caller_body, self.tcx).ty,
554 ),
555 )
556 };
557
558 // Copy the arguments if needed.
559 let args: Vec<_> = self.make_call_args(args, &callsite, caller_body, &callee_body);
560
561 let mut integrator = Integrator {
562 args: &args,
563 new_locals: Local::new(caller_body.local_decls.len())..,
564 new_scopes: SourceScope::new(caller_body.source_scopes.len())..,
565 new_blocks: BasicBlock::new(caller_body.basic_blocks.len())..,
566 destination: destination_local,
567 callsite_scope: caller_body.source_scopes[callsite.source_info.scope].clone(),
568 callsite,
569 cleanup_block: unwind,
570 in_cleanup_block: false,
571 tcx: self.tcx,
572 always_live_locals: BitSet::new_filled(callee_body.local_decls.len()),
573 };
574
575 // Map all `Local`s, `SourceScope`s and `BasicBlock`s to new ones
576 // (or existing ones, in a few special cases) in the caller.
577 integrator.visit_body(&mut callee_body);
578
579 // If there are any locals without storage markers, give them storage only for the
580 // duration of the call.
581 for local in callee_body.vars_and_temps_iter() {
582 if !callee_body.local_decls[local].internal
583 && integrator.always_live_locals.contains(local)
584 {
585 let new_local = integrator.map_local(local);
586 caller_body[callsite.block].statements.push(Statement {
587 source_info: callsite.source_info,
588 kind: StatementKind::StorageLive(new_local),
589 });
590 }
591 }
592 if let Some(block) = callsite.target {
593 // To avoid repeated O(n) insert, push any new statements to the end and rotate
594 // the slice once.
595 let mut n = 0;
596 if remap_destination {
597 caller_body[block].statements.push(Statement {
598 source_info: callsite.source_info,
599 kind: StatementKind::Assign(Box::new((
600 dest,
601 Rvalue::Use(Operand::Move(destination_local.into())),
602 ))),
603 });
604 n += 1;
605 }
606 for local in callee_body.vars_and_temps_iter().rev() {
607 if !callee_body.local_decls[local].internal
608 && integrator.always_live_locals.contains(local)
609 {
610 let new_local = integrator.map_local(local);
611 caller_body[block].statements.push(Statement {
612 source_info: callsite.source_info,
613 kind: StatementKind::StorageDead(new_local),
614 });
615 n += 1;
616 }
617 }
618 caller_body[block].statements.rotate_right(n);
619 }
620
621 // Insert all of the (mapped) parts of the callee body into the caller.
622 caller_body.local_decls.extend(callee_body.drain_vars_and_temps());
623 caller_body.source_scopes.extend(&mut callee_body.source_scopes.drain(..));
624 caller_body.var_debug_info.append(&mut callee_body.var_debug_info);
625 caller_body.basic_blocks_mut().extend(callee_body.basic_blocks_mut().drain(..));
626
627 caller_body[callsite.block].terminator = Some(Terminator {
628 source_info: callsite.source_info,
629 kind: TerminatorKind::Goto { target: integrator.map_block(START_BLOCK) },
630 });
631
632 // Copy only unevaluated constants from the callee_body into the caller_body.
633 // Although we are only pushing `ConstKind::Unevaluated` consts to
634 // `required_consts`, here we may not only have `ConstKind::Unevaluated`
635 // because we are calling `subst_and_normalize_erasing_regions`.
636 caller_body.required_consts.extend(
637 callee_body.required_consts.iter().copied().filter(|&ct| match ct.literal {
638 ConstantKind::Ty(_) => {
639 bug!("should never encounter ty::UnevaluatedConst in `required_consts`")
640 }
641 ConstantKind::Val(..) | ConstantKind::Unevaluated(..) => true,
642 }),
643 );
644 }
645 kind => bug!("unexpected terminator kind {:?}", kind),
646 }
647 }
648
make_call_args( &self, args: Vec<Operand<'tcx>>, callsite: &CallSite<'tcx>, caller_body: &mut Body<'tcx>, callee_body: &Body<'tcx>, ) -> Vec<Local>649 fn make_call_args(
650 &self,
651 args: Vec<Operand<'tcx>>,
652 callsite: &CallSite<'tcx>,
653 caller_body: &mut Body<'tcx>,
654 callee_body: &Body<'tcx>,
655 ) -> Vec<Local> {
656 let tcx = self.tcx;
657
658 // There is a bit of a mismatch between the *caller* of a closure and the *callee*.
659 // The caller provides the arguments wrapped up in a tuple:
660 //
661 // tuple_tmp = (a, b, c)
662 // Fn::call(closure_ref, tuple_tmp)
663 //
664 // meanwhile the closure body expects the arguments (here, `a`, `b`, and `c`)
665 // as distinct arguments. (This is the "rust-call" ABI hack.) Normally, codegen has
666 // the job of unpacking this tuple. But here, we are codegen. =) So we want to create
667 // a vector like
668 //
669 // [closure_ref, tuple_tmp.0, tuple_tmp.1, tuple_tmp.2]
670 //
671 // Except for one tiny wrinkle: we don't actually want `tuple_tmp.0`. It's more convenient
672 // if we "spill" that into *another* temporary, so that we can map the argument
673 // variable in the callee MIR directly to an argument variable on our side.
674 // So we introduce temporaries like:
675 //
676 // tmp0 = tuple_tmp.0
677 // tmp1 = tuple_tmp.1
678 // tmp2 = tuple_tmp.2
679 //
680 // and the vector is `[closure_ref, tmp0, tmp1, tmp2]`.
681 if callsite.fn_sig.abi() == Abi::RustCall && callee_body.spread_arg.is_none() {
682 let mut args = args.into_iter();
683 let self_ = self.create_temp_if_necessary(args.next().unwrap(), callsite, caller_body);
684 let tuple = self.create_temp_if_necessary(args.next().unwrap(), callsite, caller_body);
685 assert!(args.next().is_none());
686
687 let tuple = Place::from(tuple);
688 let ty::Tuple(tuple_tys) = tuple.ty(caller_body, tcx).ty.kind() else {
689 bug!("Closure arguments are not passed as a tuple");
690 };
691
692 // The `closure_ref` in our example above.
693 let closure_ref_arg = iter::once(self_);
694
695 // The `tmp0`, `tmp1`, and `tmp2` in our example above.
696 let tuple_tmp_args = tuple_tys.iter().enumerate().map(|(i, ty)| {
697 // This is e.g., `tuple_tmp.0` in our example above.
698 let tuple_field = Operand::Move(tcx.mk_place_field(tuple, FieldIdx::new(i), ty));
699
700 // Spill to a local to make e.g., `tmp0`.
701 self.create_temp_if_necessary(tuple_field, callsite, caller_body)
702 });
703
704 closure_ref_arg.chain(tuple_tmp_args).collect()
705 } else {
706 args.into_iter()
707 .map(|a| self.create_temp_if_necessary(a, callsite, caller_body))
708 .collect()
709 }
710 }
711
712 /// If `arg` is already a temporary, returns it. Otherwise, introduces a fresh
713 /// temporary `T` and an instruction `T = arg`, and returns `T`.
create_temp_if_necessary( &self, arg: Operand<'tcx>, callsite: &CallSite<'tcx>, caller_body: &mut Body<'tcx>, ) -> Local714 fn create_temp_if_necessary(
715 &self,
716 arg: Operand<'tcx>,
717 callsite: &CallSite<'tcx>,
718 caller_body: &mut Body<'tcx>,
719 ) -> Local {
720 // Reuse the operand if it is a moved temporary.
721 if let Operand::Move(place) = &arg
722 && let Some(local) = place.as_local()
723 && caller_body.local_kind(local) == LocalKind::Temp
724 {
725 return local;
726 }
727
728 // Otherwise, create a temporary for the argument.
729 trace!("creating temp for argument {:?}", arg);
730 let arg_ty = arg.ty(caller_body, self.tcx);
731 let local = self.new_call_temp(caller_body, callsite, arg_ty);
732 caller_body[callsite.block].statements.push(Statement {
733 source_info: callsite.source_info,
734 kind: StatementKind::Assign(Box::new((Place::from(local), Rvalue::Use(arg)))),
735 });
736 local
737 }
738
739 /// Introduces a new temporary into the caller body that is live for the duration of the call.
new_call_temp( &self, caller_body: &mut Body<'tcx>, callsite: &CallSite<'tcx>, ty: Ty<'tcx>, ) -> Local740 fn new_call_temp(
741 &self,
742 caller_body: &mut Body<'tcx>,
743 callsite: &CallSite<'tcx>,
744 ty: Ty<'tcx>,
745 ) -> Local {
746 let local = caller_body.local_decls.push(LocalDecl::new(ty, callsite.source_info.span));
747
748 caller_body[callsite.block].statements.push(Statement {
749 source_info: callsite.source_info,
750 kind: StatementKind::StorageLive(local),
751 });
752
753 if let Some(block) = callsite.target {
754 caller_body[block].statements.insert(
755 0,
756 Statement {
757 source_info: callsite.source_info,
758 kind: StatementKind::StorageDead(local),
759 },
760 );
761 }
762
763 local
764 }
765 }
766
767 /// Verify that the callee body is compatible with the caller.
768 ///
769 /// This visitor mostly computes the inlining cost,
770 /// but also needs to verify that types match because of normalization failure.
771 struct CostChecker<'b, 'tcx> {
772 tcx: TyCtxt<'tcx>,
773 param_env: ParamEnv<'tcx>,
774 cost: usize,
775 callee_body: &'b Body<'tcx>,
776 instance: ty::Instance<'tcx>,
777 validation: Result<(), &'static str>,
778 }
779
780 impl<'tcx> Visitor<'tcx> for CostChecker<'_, 'tcx> {
visit_statement(&mut self, statement: &Statement<'tcx>, location: Location)781 fn visit_statement(&mut self, statement: &Statement<'tcx>, location: Location) {
782 // Don't count StorageLive/StorageDead in the inlining cost.
783 match statement.kind {
784 StatementKind::StorageLive(_)
785 | StatementKind::StorageDead(_)
786 | StatementKind::Deinit(_)
787 | StatementKind::Nop => {}
788 _ => self.cost += INSTR_COST,
789 }
790
791 self.super_statement(statement, location);
792 }
793
visit_terminator(&mut self, terminator: &Terminator<'tcx>, location: Location)794 fn visit_terminator(&mut self, terminator: &Terminator<'tcx>, location: Location) {
795 let tcx = self.tcx;
796 match terminator.kind {
797 TerminatorKind::Drop { ref place, unwind, .. } => {
798 // If the place doesn't actually need dropping, treat it like a regular goto.
799 let ty = self
800 .instance
801 .subst_mir(tcx, ty::EarlyBinder::bind(&place.ty(self.callee_body, tcx).ty));
802 if ty.needs_drop(tcx, self.param_env) {
803 self.cost += CALL_PENALTY;
804 if let UnwindAction::Cleanup(_) = unwind {
805 self.cost += LANDINGPAD_PENALTY;
806 }
807 } else {
808 self.cost += INSTR_COST;
809 }
810 }
811 TerminatorKind::Call { func: Operand::Constant(ref f), unwind, .. } => {
812 let fn_ty = self.instance.subst_mir(tcx, ty::EarlyBinder::bind(&f.literal.ty()));
813 self.cost += if let ty::FnDef(def_id, _) = *fn_ty.kind() && tcx.is_intrinsic(def_id) {
814 // Don't give intrinsics the extra penalty for calls
815 INSTR_COST
816 } else {
817 CALL_PENALTY
818 };
819 if let UnwindAction::Cleanup(_) = unwind {
820 self.cost += LANDINGPAD_PENALTY;
821 }
822 }
823 TerminatorKind::Assert { unwind, .. } => {
824 self.cost += CALL_PENALTY;
825 if let UnwindAction::Cleanup(_) = unwind {
826 self.cost += LANDINGPAD_PENALTY;
827 }
828 }
829 TerminatorKind::Resume => self.cost += RESUME_PENALTY,
830 TerminatorKind::InlineAsm { unwind, .. } => {
831 self.cost += INSTR_COST;
832 if let UnwindAction::Cleanup(_) = unwind {
833 self.cost += LANDINGPAD_PENALTY;
834 }
835 }
836 _ => self.cost += INSTR_COST,
837 }
838
839 self.super_terminator(terminator, location);
840 }
841
842 /// This method duplicates code from MIR validation in an attempt to detect type mismatches due
843 /// to normalization failure.
visit_projection_elem( &mut self, place_ref: PlaceRef<'tcx>, elem: PlaceElem<'tcx>, context: PlaceContext, location: Location, )844 fn visit_projection_elem(
845 &mut self,
846 place_ref: PlaceRef<'tcx>,
847 elem: PlaceElem<'tcx>,
848 context: PlaceContext,
849 location: Location,
850 ) {
851 if let ProjectionElem::Field(f, ty) = elem {
852 let parent_ty = place_ref.ty(&self.callee_body.local_decls, self.tcx);
853 let check_equal = |this: &mut Self, f_ty| {
854 // Fast path if there is nothing to substitute.
855 if ty == f_ty {
856 return;
857 }
858 let ty = this.instance.subst_mir(this.tcx, ty::EarlyBinder::bind(&ty));
859 let f_ty = this.instance.subst_mir(this.tcx, ty::EarlyBinder::bind(&f_ty));
860 if ty == f_ty {
861 return;
862 }
863 if !util::is_subtype(this.tcx, this.param_env, ty, f_ty) {
864 trace!(?ty, ?f_ty);
865 this.validation = Err("failed to normalize projection type");
866 return;
867 }
868 };
869
870 let kind = match parent_ty.ty.kind() {
871 &ty::Alias(ty::Opaque, ty::AliasTy { def_id, substs, .. }) => {
872 self.tcx.type_of(def_id).subst(self.tcx, substs).kind()
873 }
874 kind => kind,
875 };
876
877 match kind {
878 ty::Tuple(fields) => {
879 let Some(f_ty) = fields.get(f.as_usize()) else {
880 self.validation = Err("malformed MIR");
881 return;
882 };
883 check_equal(self, *f_ty);
884 }
885 ty::Adt(adt_def, substs) => {
886 let var = parent_ty.variant_index.unwrap_or(FIRST_VARIANT);
887 let Some(field) = adt_def.variant(var).fields.get(f) else {
888 self.validation = Err("malformed MIR");
889 return;
890 };
891 check_equal(self, field.ty(self.tcx, substs));
892 }
893 ty::Closure(_, substs) => {
894 let substs = substs.as_closure();
895 let Some(f_ty) = substs.upvar_tys().nth(f.as_usize()) else {
896 self.validation = Err("malformed MIR");
897 return;
898 };
899 check_equal(self, f_ty);
900 }
901 &ty::Generator(def_id, substs, _) => {
902 let f_ty = if let Some(var) = parent_ty.variant_index {
903 let gen_body = if def_id == self.callee_body.source.def_id() {
904 self.callee_body
905 } else {
906 self.tcx.optimized_mir(def_id)
907 };
908
909 let Some(layout) = gen_body.generator_layout() else {
910 self.validation = Err("malformed MIR");
911 return;
912 };
913
914 let Some(&local) = layout.variant_fields[var].get(f) else {
915 self.validation = Err("malformed MIR");
916 return;
917 };
918
919 let Some(f_ty) = layout.field_tys.get(local) else {
920 self.validation = Err("malformed MIR");
921 return;
922 };
923
924 f_ty.ty
925 } else {
926 let Some(f_ty) = substs.as_generator().prefix_tys().nth(f.index()) else {
927 self.validation = Err("malformed MIR");
928 return;
929 };
930
931 f_ty
932 };
933
934 check_equal(self, f_ty);
935 }
936 _ => self.validation = Err("malformed MIR"),
937 }
938 }
939
940 self.super_projection_elem(place_ref, elem, context, location);
941 }
942 }
943
944 /**
945 * Integrator.
946 *
947 * Integrates blocks from the callee function into the calling function.
948 * Updates block indices, references to locals and other control flow
949 * stuff.
950 */
951 struct Integrator<'a, 'tcx> {
952 args: &'a [Local],
953 new_locals: RangeFrom<Local>,
954 new_scopes: RangeFrom<SourceScope>,
955 new_blocks: RangeFrom<BasicBlock>,
956 destination: Local,
957 callsite_scope: SourceScopeData<'tcx>,
958 callsite: &'a CallSite<'tcx>,
959 cleanup_block: UnwindAction,
960 in_cleanup_block: bool,
961 tcx: TyCtxt<'tcx>,
962 always_live_locals: BitSet<Local>,
963 }
964
965 impl Integrator<'_, '_> {
map_local(&self, local: Local) -> Local966 fn map_local(&self, local: Local) -> Local {
967 let new = if local == RETURN_PLACE {
968 self.destination
969 } else {
970 let idx = local.index() - 1;
971 if idx < self.args.len() {
972 self.args[idx]
973 } else {
974 Local::new(self.new_locals.start.index() + (idx - self.args.len()))
975 }
976 };
977 trace!("mapping local `{:?}` to `{:?}`", local, new);
978 new
979 }
980
map_scope(&self, scope: SourceScope) -> SourceScope981 fn map_scope(&self, scope: SourceScope) -> SourceScope {
982 let new = SourceScope::new(self.new_scopes.start.index() + scope.index());
983 trace!("mapping scope `{:?}` to `{:?}`", scope, new);
984 new
985 }
986
map_block(&self, block: BasicBlock) -> BasicBlock987 fn map_block(&self, block: BasicBlock) -> BasicBlock {
988 let new = BasicBlock::new(self.new_blocks.start.index() + block.index());
989 trace!("mapping block `{:?}` to `{:?}`", block, new);
990 new
991 }
992
map_unwind(&self, unwind: UnwindAction) -> UnwindAction993 fn map_unwind(&self, unwind: UnwindAction) -> UnwindAction {
994 if self.in_cleanup_block {
995 match unwind {
996 UnwindAction::Cleanup(_) | UnwindAction::Continue => {
997 bug!("cleanup on cleanup block");
998 }
999 UnwindAction::Unreachable | UnwindAction::Terminate => return unwind,
1000 }
1001 }
1002
1003 match unwind {
1004 UnwindAction::Unreachable | UnwindAction::Terminate => unwind,
1005 UnwindAction::Cleanup(target) => UnwindAction::Cleanup(self.map_block(target)),
1006 // Add an unwind edge to the original call's cleanup block
1007 UnwindAction::Continue => self.cleanup_block,
1008 }
1009 }
1010 }
1011
1012 impl<'tcx> MutVisitor<'tcx> for Integrator<'_, 'tcx> {
tcx(&self) -> TyCtxt<'tcx>1013 fn tcx(&self) -> TyCtxt<'tcx> {
1014 self.tcx
1015 }
1016
visit_local(&mut self, local: &mut Local, _ctxt: PlaceContext, _location: Location)1017 fn visit_local(&mut self, local: &mut Local, _ctxt: PlaceContext, _location: Location) {
1018 *local = self.map_local(*local);
1019 }
1020
visit_source_scope_data(&mut self, scope_data: &mut SourceScopeData<'tcx>)1021 fn visit_source_scope_data(&mut self, scope_data: &mut SourceScopeData<'tcx>) {
1022 self.super_source_scope_data(scope_data);
1023 if scope_data.parent_scope.is_none() {
1024 // Attach the outermost callee scope as a child of the callsite
1025 // scope, via the `parent_scope` and `inlined_parent_scope` chains.
1026 scope_data.parent_scope = Some(self.callsite.source_info.scope);
1027 assert_eq!(scope_data.inlined_parent_scope, None);
1028 scope_data.inlined_parent_scope = if self.callsite_scope.inlined.is_some() {
1029 Some(self.callsite.source_info.scope)
1030 } else {
1031 self.callsite_scope.inlined_parent_scope
1032 };
1033
1034 // Mark the outermost callee scope as an inlined one.
1035 assert_eq!(scope_data.inlined, None);
1036 scope_data.inlined = Some((self.callsite.callee, self.callsite.source_info.span));
1037 } else if scope_data.inlined_parent_scope.is_none() {
1038 // Make it easy to find the scope with `inlined` set above.
1039 scope_data.inlined_parent_scope = Some(self.map_scope(OUTERMOST_SOURCE_SCOPE));
1040 }
1041 }
1042
visit_source_scope(&mut self, scope: &mut SourceScope)1043 fn visit_source_scope(&mut self, scope: &mut SourceScope) {
1044 *scope = self.map_scope(*scope);
1045 }
1046
visit_basic_block_data(&mut self, block: BasicBlock, data: &mut BasicBlockData<'tcx>)1047 fn visit_basic_block_data(&mut self, block: BasicBlock, data: &mut BasicBlockData<'tcx>) {
1048 self.in_cleanup_block = data.is_cleanup;
1049 self.super_basic_block_data(block, data);
1050 self.in_cleanup_block = false;
1051 }
1052
visit_retag(&mut self, kind: &mut RetagKind, place: &mut Place<'tcx>, loc: Location)1053 fn visit_retag(&mut self, kind: &mut RetagKind, place: &mut Place<'tcx>, loc: Location) {
1054 self.super_retag(kind, place, loc);
1055
1056 // We have to patch all inlined retags to be aware that they are no longer
1057 // happening on function entry.
1058 if *kind == RetagKind::FnEntry {
1059 *kind = RetagKind::Default;
1060 }
1061 }
1062
visit_statement(&mut self, statement: &mut Statement<'tcx>, location: Location)1063 fn visit_statement(&mut self, statement: &mut Statement<'tcx>, location: Location) {
1064 if let StatementKind::StorageLive(local) | StatementKind::StorageDead(local) =
1065 statement.kind
1066 {
1067 self.always_live_locals.remove(local);
1068 }
1069 self.super_statement(statement, location);
1070 }
1071
visit_terminator(&mut self, terminator: &mut Terminator<'tcx>, loc: Location)1072 fn visit_terminator(&mut self, terminator: &mut Terminator<'tcx>, loc: Location) {
1073 // Don't try to modify the implicit `_0` access on return (`return` terminators are
1074 // replaced down below anyways).
1075 if !matches!(terminator.kind, TerminatorKind::Return) {
1076 self.super_terminator(terminator, loc);
1077 }
1078
1079 match terminator.kind {
1080 TerminatorKind::GeneratorDrop | TerminatorKind::Yield { .. } => bug!(),
1081 TerminatorKind::Goto { ref mut target } => {
1082 *target = self.map_block(*target);
1083 }
1084 TerminatorKind::SwitchInt { ref mut targets, .. } => {
1085 for tgt in targets.all_targets_mut() {
1086 *tgt = self.map_block(*tgt);
1087 }
1088 }
1089 TerminatorKind::Drop { ref mut target, ref mut unwind, .. } => {
1090 *target = self.map_block(*target);
1091 *unwind = self.map_unwind(*unwind);
1092 }
1093 TerminatorKind::Call { ref mut target, ref mut unwind, .. } => {
1094 if let Some(ref mut tgt) = *target {
1095 *tgt = self.map_block(*tgt);
1096 }
1097 *unwind = self.map_unwind(*unwind);
1098 }
1099 TerminatorKind::Assert { ref mut target, ref mut unwind, .. } => {
1100 *target = self.map_block(*target);
1101 *unwind = self.map_unwind(*unwind);
1102 }
1103 TerminatorKind::Return => {
1104 terminator.kind = if let Some(tgt) = self.callsite.target {
1105 TerminatorKind::Goto { target: tgt }
1106 } else {
1107 TerminatorKind::Unreachable
1108 }
1109 }
1110 TerminatorKind::Resume => {
1111 terminator.kind = match self.cleanup_block {
1112 UnwindAction::Cleanup(tgt) => TerminatorKind::Goto { target: tgt },
1113 UnwindAction::Continue => TerminatorKind::Resume,
1114 UnwindAction::Unreachable => TerminatorKind::Unreachable,
1115 UnwindAction::Terminate => TerminatorKind::Terminate,
1116 };
1117 }
1118 TerminatorKind::Terminate => {}
1119 TerminatorKind::Unreachable => {}
1120 TerminatorKind::FalseEdge { ref mut real_target, ref mut imaginary_target } => {
1121 *real_target = self.map_block(*real_target);
1122 *imaginary_target = self.map_block(*imaginary_target);
1123 }
1124 TerminatorKind::FalseUnwind { real_target: _, unwind: _ } =>
1125 // see the ordering of passes in the optimized_mir query.
1126 {
1127 bug!("False unwinds should have been removed before inlining")
1128 }
1129 TerminatorKind::InlineAsm { ref mut destination, ref mut unwind, .. } => {
1130 if let Some(ref mut tgt) = *destination {
1131 *tgt = self.map_block(*tgt);
1132 }
1133 *unwind = self.map_unwind(*unwind);
1134 }
1135 }
1136 }
1137 }
1138
1139 #[instrument(skip(tcx), level = "debug")]
try_instance_mir<'tcx>( tcx: TyCtxt<'tcx>, instance: InstanceDef<'tcx>, ) -> Result<&'tcx Body<'tcx>, &'static str>1140 fn try_instance_mir<'tcx>(
1141 tcx: TyCtxt<'tcx>,
1142 instance: InstanceDef<'tcx>,
1143 ) -> Result<&'tcx Body<'tcx>, &'static str> {
1144 match instance {
1145 ty::InstanceDef::DropGlue(_, Some(ty)) => match ty.kind() {
1146 ty::Adt(def, substs) => {
1147 let fields = def.all_fields();
1148 for field in fields {
1149 let field_ty = field.ty(tcx, substs);
1150 if field_ty.has_param() && field_ty.has_projections() {
1151 return Err("cannot build drop shim for polymorphic type");
1152 }
1153 }
1154
1155 Ok(tcx.instance_mir(instance))
1156 }
1157 _ => Ok(tcx.instance_mir(instance)),
1158 },
1159 _ => Ok(tcx.instance_mir(instance)),
1160 }
1161 }
1162