• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright © 2023 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 #![allow(unstable_name_collisions)]
5 
6 use crate::api::{GetDebugFlags, DEBUG};
7 use crate::bitset::BitSet;
8 use crate::ir::*;
9 use crate::liveness::{
10     BlockLiveness, LiveSet, Liveness, NextUseBlockLiveness, NextUseLiveness,
11 };
12 
13 use std::cell::RefCell;
14 use std::cmp::{max, Ordering, Reverse};
15 use std::collections::{BinaryHeap, HashMap, HashSet};
16 
17 struct PhiDstMap {
18     ssa_phi: HashMap<SSAValue, u32>,
19 }
20 
21 impl PhiDstMap {
new() -> PhiDstMap22     fn new() -> PhiDstMap {
23         PhiDstMap {
24             ssa_phi: HashMap::new(),
25         }
26     }
27 
add_phi_dst(&mut self, phi_idx: u32, dst: Dst)28     fn add_phi_dst(&mut self, phi_idx: u32, dst: Dst) {
29         let vec = dst.as_ssa().expect("Not an SSA destination");
30         debug_assert!(vec.comps() == 1);
31         self.ssa_phi.insert(vec[0], phi_idx);
32     }
33 
from_block(block: &BasicBlock) -> PhiDstMap34     pub fn from_block(block: &BasicBlock) -> PhiDstMap {
35         let mut map = PhiDstMap::new();
36         if let Some(phi) = block.phi_dsts() {
37             for (idx, dst) in phi.dsts.iter() {
38                 map.add_phi_dst(*idx, *dst);
39             }
40         }
41         map
42     }
43 
get_phi_idx(&self, ssa: &SSAValue) -> Option<&u32>44     fn get_phi_idx(&self, ssa: &SSAValue) -> Option<&u32> {
45         self.ssa_phi.get(ssa)
46     }
47 }
48 
49 struct PhiSrcMap {
50     phi_src: HashMap<u32, SSAValue>,
51 }
52 
53 impl PhiSrcMap {
new() -> PhiSrcMap54     fn new() -> PhiSrcMap {
55         PhiSrcMap {
56             phi_src: HashMap::new(),
57         }
58     }
59 
add_phi_src(&mut self, phi_idx: u32, src: Src)60     fn add_phi_src(&mut self, phi_idx: u32, src: Src) {
61         debug_assert!(src.src_mod.is_none());
62         let vec = src.src_ref.as_ssa().expect("Not an SSA source");
63         debug_assert!(vec.comps() == 1);
64         self.phi_src.insert(phi_idx, vec[0]);
65     }
66 
from_block(block: &BasicBlock) -> PhiSrcMap67     pub fn from_block(block: &BasicBlock) -> PhiSrcMap {
68         let mut map = PhiSrcMap::new();
69         if let Some(phi) = block.phi_srcs() {
70             for (idx, src) in phi.srcs.iter() {
71                 map.add_phi_src(*idx, *src);
72             }
73         }
74         map
75     }
76 
get_src_ssa(&self, phi_idx: &u32) -> &SSAValue77     pub fn get_src_ssa(&self, phi_idx: &u32) -> &SSAValue {
78         self.phi_src.get(phi_idx).expect("Phi source missing")
79     }
80 }
81 
82 trait Spill {
spill_file(&self, file: RegFile) -> RegFile83     fn spill_file(&self, file: RegFile) -> RegFile;
spill(&self, dst: SSAValue, src: Src) -> Box<Instr>84     fn spill(&self, dst: SSAValue, src: Src) -> Box<Instr>;
fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>85     fn fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>;
86 }
87 
88 struct SpillPred {}
89 
90 impl SpillPred {
new() -> Self91     fn new() -> Self {
92         Self {}
93     }
94 }
95 
96 impl Spill for SpillPred {
spill_file(&self, file: RegFile) -> RegFile97     fn spill_file(&self, file: RegFile) -> RegFile {
98         match file {
99             RegFile::Pred => RegFile::GPR,
100             RegFile::UPred => RegFile::UGPR,
101             _ => panic!("Unsupported register file"),
102         }
103     }
104 
spill(&self, dst: SSAValue, src: Src) -> Box<Instr>105     fn spill(&self, dst: SSAValue, src: Src) -> Box<Instr> {
106         assert!(dst.file() == RegFile::GPR);
107         if let Some(b) = src.as_bool() {
108             let u32_src = if b {
109                 Src::new_imm_u32(!0)
110             } else {
111                 Src::new_zero()
112             };
113             Instr::new_boxed(OpCopy {
114                 dst: dst.into(),
115                 src: u32_src,
116             })
117         } else {
118             Instr::new_boxed(OpSel {
119                 dst: dst.into(),
120                 cond: src.bnot(),
121                 srcs: [Src::new_zero(), Src::new_imm_u32(!0)],
122             })
123         }
124     }
125 
fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>126     fn fill(&self, dst: Dst, src: SSAValue) -> Box<Instr> {
127         assert!(src.file() == RegFile::GPR);
128         Instr::new_boxed(OpISetP {
129             dst: dst,
130             set_op: PredSetOp::And,
131             cmp_op: IntCmpOp::Ne,
132             cmp_type: IntCmpType::U32,
133             ex: false,
134             srcs: [src.into(), Src::new_zero()],
135             accum: true.into(),
136             low_cmp: true.into(),
137         })
138     }
139 }
140 
141 struct SpillBar {}
142 
143 impl SpillBar {
new() -> Self144     fn new() -> Self {
145         Self {}
146     }
147 }
148 
149 impl Spill for SpillBar {
spill_file(&self, file: RegFile) -> RegFile150     fn spill_file(&self, file: RegFile) -> RegFile {
151         assert!(file == RegFile::Bar);
152         RegFile::GPR
153     }
154 
spill(&self, dst: SSAValue, src: Src) -> Box<Instr>155     fn spill(&self, dst: SSAValue, src: Src) -> Box<Instr> {
156         assert!(dst.file() == RegFile::GPR);
157         Instr::new_boxed(OpBMov {
158             dst: dst.into(),
159             src: src,
160             clear: false,
161         })
162     }
163 
fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>164     fn fill(&self, dst: Dst, src: SSAValue) -> Box<Instr> {
165         assert!(src.file() == RegFile::GPR);
166         Instr::new_boxed(OpBMov {
167             dst: dst,
168             src: src.into(),
169             clear: false,
170         })
171     }
172 }
173 
174 struct SpillGPR {}
175 
176 impl SpillGPR {
new() -> Self177     fn new() -> Self {
178         Self {}
179     }
180 }
181 
182 impl Spill for SpillGPR {
spill_file(&self, file: RegFile) -> RegFile183     fn spill_file(&self, file: RegFile) -> RegFile {
184         assert!(file == RegFile::GPR);
185         RegFile::Mem
186     }
187 
spill(&self, dst: SSAValue, src: Src) -> Box<Instr>188     fn spill(&self, dst: SSAValue, src: Src) -> Box<Instr> {
189         assert!(dst.file() == RegFile::Mem);
190         Instr::new_boxed(OpCopy {
191             dst: dst.into(),
192             src: src,
193         })
194     }
195 
fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>196     fn fill(&self, dst: Dst, src: SSAValue) -> Box<Instr> {
197         assert!(src.file() == RegFile::Mem);
198         Instr::new_boxed(OpCopy {
199             dst: dst,
200             src: src.into(),
201         })
202     }
203 }
204 
205 #[derive(Eq, PartialEq)]
206 struct SSANextUse {
207     ssa: SSAValue,
208     next_use: usize,
209 }
210 
211 impl SSANextUse {
new(ssa: SSAValue, next_use: usize) -> SSANextUse212     fn new(ssa: SSAValue, next_use: usize) -> SSANextUse {
213         SSANextUse {
214             ssa: ssa,
215             next_use: next_use,
216         }
217     }
218 }
219 
220 impl Ord for SSANextUse {
cmp(&self, other: &Self) -> Ordering221     fn cmp(&self, other: &Self) -> Ordering {
222         self.next_use
223             .cmp(&other.next_use)
224             .then_with(|| self.ssa.idx().cmp(&other.ssa.idx()))
225     }
226 }
227 
228 impl PartialOrd for SSANextUse {
partial_cmp(&self, other: &Self) -> Option<Ordering>229     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
230         Some(self.cmp(other))
231     }
232 }
233 
234 struct SpillCache<'a, S: Spill> {
235     alloc: &'a mut SSAValueAllocator,
236     spill: S,
237     val_spill: HashMap<SSAValue, SSAValue>,
238 }
239 
240 impl<'a, S: Spill> SpillCache<'a, S> {
new(alloc: &'a mut SSAValueAllocator, spill: S) -> SpillCache<'a, S>241     fn new(alloc: &'a mut SSAValueAllocator, spill: S) -> SpillCache<'a, S> {
242         SpillCache {
243             alloc: alloc,
244             spill: spill,
245             val_spill: HashMap::new(),
246         }
247     }
248 
get_spill(&mut self, ssa: SSAValue) -> SSAValue249     fn get_spill(&mut self, ssa: SSAValue) -> SSAValue {
250         *self.val_spill.entry(ssa).or_insert_with(|| {
251             self.alloc.alloc(self.spill.spill_file(ssa.file()))
252         })
253     }
254 
spill_src(&mut self, ssa: SSAValue, src: Src) -> Box<Instr>255     fn spill_src(&mut self, ssa: SSAValue, src: Src) -> Box<Instr> {
256         let dst = self.get_spill(ssa);
257         self.spill.spill(dst, src)
258     }
259 
spill(&mut self, ssa: SSAValue) -> Box<Instr>260     fn spill(&mut self, ssa: SSAValue) -> Box<Instr> {
261         self.spill_src(ssa, ssa.into())
262     }
263 
fill_dst(&mut self, dst: Dst, ssa: SSAValue) -> Box<Instr>264     fn fill_dst(&mut self, dst: Dst, ssa: SSAValue) -> Box<Instr> {
265         let src = self.get_spill(ssa);
266         self.spill.fill(dst, src)
267     }
268 
fill(&mut self, ssa: SSAValue) -> Box<Instr>269     fn fill(&mut self, ssa: SSAValue) -> Box<Instr> {
270         self.fill_dst(ssa.into(), ssa)
271     }
272 }
273 
274 struct SpillChooser<'a> {
275     bl: &'a NextUseBlockLiveness,
276     ip: usize,
277     count: usize,
278     spills: BinaryHeap<Reverse<SSANextUse>>,
279     min_next_use: usize,
280 }
281 
282 struct SpillChoiceIter {
283     spills: BinaryHeap<Reverse<SSANextUse>>,
284 }
285 
286 impl<'a> SpillChooser<'a> {
new(bl: &'a NextUseBlockLiveness, ip: usize, count: usize) -> Self287     pub fn new(bl: &'a NextUseBlockLiveness, ip: usize, count: usize) -> Self {
288         Self {
289             bl: bl,
290             ip: ip,
291             count: count,
292             spills: BinaryHeap::new(),
293             min_next_use: ip + 1,
294         }
295     }
296 
add_candidate(&mut self, ssa: SSAValue)297     pub fn add_candidate(&mut self, ssa: SSAValue) {
298         let next_use = self.bl.next_use_after_or_at_ip(&ssa, self.ip).unwrap();
299 
300         // Ignore anything used sonner than spill options we've already
301         // rejected.
302         if next_use < self.min_next_use {
303             return;
304         }
305 
306         self.spills.push(Reverse(SSANextUse::new(ssa, next_use)));
307 
308         if self.spills.len() > self.count {
309             // Because we reversed the heap, pop actually removes the
310             // one with the lowest next_use which is what we want here.
311             let old = self.spills.pop().unwrap();
312             debug_assert!(self.spills.len() == self.count);
313             self.min_next_use = max(self.min_next_use, old.0.next_use);
314         }
315     }
316 }
317 
318 impl<'a> IntoIterator for SpillChooser<'a> {
319     type Item = SSAValue;
320     type IntoIter = SpillChoiceIter;
321 
into_iter(self) -> SpillChoiceIter322     fn into_iter(self) -> SpillChoiceIter {
323         SpillChoiceIter {
324             spills: self.spills,
325         }
326     }
327 }
328 
329 impl Iterator for SpillChoiceIter {
330     type Item = SSAValue;
331 
size_hint(&self) -> (usize, Option<usize>)332     fn size_hint(&self) -> (usize, Option<usize>) {
333         let len = self.spills.len();
334         (len, Some(len))
335     }
336 
next(&mut self) -> Option<SSAValue>337     fn next(&mut self) -> Option<SSAValue> {
338         self.spills.pop().map(|x| x.0.ssa)
339     }
340 }
341 
342 #[derive(Clone)]
343 struct SSAState {
344     // The set of variables which currently exist in registers
345     w: LiveSet,
346     // The set of variables which have already been spilled.  These don't need
347     // to be spilled again.
348     s: HashSet<SSAValue>,
349 }
350 
spill_values<S: Spill>( func: &mut Function, file: RegFile, limit: u32, spill: S, )351 fn spill_values<S: Spill>(
352     func: &mut Function,
353     file: RegFile,
354     limit: u32,
355     spill: S,
356 ) {
357     let files = RegFileSet::from_iter([file]);
358     let live = NextUseLiveness::for_function(func, &files);
359     let blocks = &mut func.blocks;
360 
361     // Record the set of SSA values used within each loop
362     let mut phi_dst_maps = Vec::new();
363     let mut phi_src_maps = Vec::new();
364     let mut loop_uses = HashMap::new();
365     for b_idx in 0..blocks.len() {
366         phi_dst_maps.push(PhiDstMap::from_block(&blocks[b_idx]));
367         phi_src_maps.push(PhiSrcMap::from_block(&blocks[b_idx]));
368 
369         if let Some(lh_idx) = blocks.loop_header_index(b_idx) {
370             let uses = loop_uses
371                 .entry(lh_idx)
372                 .or_insert_with(|| RefCell::new(HashSet::new()));
373             let uses = uses.get_mut();
374 
375             for instr in &blocks[b_idx].instrs {
376                 instr.for_each_ssa_use(|ssa| {
377                     if ssa.file() == file {
378                         uses.insert(*ssa);
379                     }
380                 });
381             }
382         }
383     }
384 
385     if !loop_uses.is_empty() {
386         // The previous loop only added values to the uses set for the
387         // inner-most loop.  Propagate from inner loops to outer loops.
388         for b_idx in (0..blocks.len()).rev() {
389             let Some(uses) = loop_uses.get(&b_idx) else {
390                 continue;
391             };
392             let uses = uses.borrow();
393 
394             let Some(dom) = blocks.dom_parent_index(b_idx) else {
395                 continue;
396             };
397 
398             let Some(dom_lh_idx) = blocks.loop_header_index(dom) else {
399                 continue;
400             };
401 
402             let mut parent_uses =
403                 loop_uses.get(&dom_lh_idx).unwrap().borrow_mut();
404             for ssa in uses.iter() {
405                 parent_uses.insert(*ssa);
406             }
407         }
408     }
409 
410     let mut spill = SpillCache::new(&mut func.ssa_alloc, spill);
411     let mut spilled_phis = BitSet::new();
412 
413     let mut ssa_state_in: Vec<SSAState> = Vec::new();
414     let mut ssa_state_out: Vec<SSAState> = Vec::new();
415 
416     for b_idx in 0..blocks.len() {
417         let bl = live.block_live(b_idx);
418 
419         let preds = blocks.pred_indices(b_idx).to_vec();
420         let w = if preds.is_empty() {
421             // This is the start block so we start with nothing in
422             // registers.
423             LiveSet::new()
424         } else if preds.len() == 1 {
425             // If we only have one predecessor then it can't possibly be a
426             // loop header and we can just copy the predecessor's w.
427             assert!(!blocks.is_loop_header(b_idx));
428             assert!(preds[0] < b_idx);
429             let p_w = &ssa_state_out[preds[0]].w;
430             LiveSet::from_iter(
431                 p_w.iter().filter(|ssa| bl.is_live_in(ssa)).cloned(),
432             )
433         } else if blocks.is_loop_header(b_idx) {
434             let mut i_b: HashSet<SSAValue> =
435                 HashSet::from_iter(bl.iter_live_in().cloned());
436 
437             if let Some(phi) = blocks[b_idx].phi_dsts() {
438                 for (_, dst) in phi.dsts.iter() {
439                     if let Dst::SSA(vec) = dst {
440                         assert!(vec.comps() == 1);
441                         let ssa = vec[0];
442                         if ssa.file() == file {
443                             i_b.insert(ssa);
444                         }
445                     }
446                 }
447             }
448 
449             let lu = loop_uses.get(&b_idx).unwrap().borrow();
450             let mut w = LiveSet::new();
451 
452             let mut some = BinaryHeap::new();
453             for ssa in i_b.iter() {
454                 if lu.contains(ssa) {
455                     let next_use = bl.first_use(ssa).unwrap();
456                     some.push(Reverse(SSANextUse::new(*ssa, next_use)));
457                 }
458             }
459             while w.count(file) < limit {
460                 let Some(entry) = some.pop() else {
461                     break;
462                 };
463                 w.insert(entry.0.ssa);
464             }
465 
466             // If we still have room, consider values which aren't used
467             // inside the loop.
468             if w.count(file) < limit {
469                 for ssa in i_b.iter() {
470                     debug_assert!(ssa.file() == file);
471                     if !lu.contains(ssa) {
472                         let next_use = bl.first_use(ssa).unwrap();
473                         some.push(Reverse(SSANextUse::new(*ssa, next_use)));
474                     }
475                 }
476 
477                 while w.count(file) < limit {
478                     let Some(entry) = some.pop() else {
479                         break;
480                     };
481                     w.insert(entry.0.ssa);
482                 }
483             }
484 
485             w
486         } else {
487             let phi_dst_map = &phi_dst_maps[b_idx];
488 
489             struct SSAPredInfo {
490                 num_preds: usize,
491                 next_use: usize,
492             }
493             let mut live: HashMap<SSAValue, SSAPredInfo> = HashMap::new();
494 
495             for p_idx in &preds {
496                 let phi_src_map = &phi_src_maps[*p_idx];
497 
498                 for mut ssa in ssa_state_out[*p_idx].w.iter().cloned() {
499                     if let Some(phi) = phi_dst_map.get_phi_idx(&ssa) {
500                         ssa = *phi_src_map.get_src_ssa(phi);
501                     }
502 
503                     if let Some(next_use) = bl.first_use(&ssa) {
504                         live.entry(ssa)
505                             .and_modify(|e| e.num_preds += 1)
506                             .or_insert_with(|| SSAPredInfo {
507                                 num_preds: 1,
508                                 next_use: next_use,
509                             });
510                     }
511                 }
512             }
513 
514             let mut w = LiveSet::new();
515             let mut some = BinaryHeap::new();
516 
517             for (ssa, info) in live.drain() {
518                 if info.num_preds == preds.len() {
519                     // This one is in all the input sets
520                     w.insert(ssa);
521                 } else {
522                     some.push(Reverse(SSANextUse::new(ssa, info.next_use)));
523                 }
524             }
525             while w.count(file) < limit {
526                 let Some(entry) = some.pop() else {
527                     break;
528                 };
529                 let ssa = entry.0.ssa;
530                 assert!(ssa.file() == file);
531                 w.insert(ssa);
532             }
533 
534             w
535         };
536 
537         let s = if preds.is_empty() {
538             HashSet::new()
539         } else if preds.len() == 1 {
540             let p_s = &ssa_state_out[preds[0]].s;
541             HashSet::from_iter(
542                 p_s.iter().filter(|ssa| bl.is_live_in(ssa)).cloned(),
543             )
544         } else {
545             let mut s = HashSet::new();
546             for p_idx in &preds {
547                 if *p_idx >= b_idx {
548                     continue;
549                 }
550 
551                 // We diverge a bit from Braun and Hack here.  They assume that
552                 // S is is a subset of W which is clearly bogus.  Instead, we
553                 // take the union of all forward edge predecessor S_out and
554                 // intersect with live-in for the current block.
555                 for ssa in ssa_state_out[*p_idx].s.iter() {
556                     if bl.is_live_in(ssa) {
557                         s.insert(*ssa);
558                     }
559                 }
560             }
561 
562             // The loop header heuristic sometimes drops stuff from W that has
563             // never been spilled so we need to make sure everything live-in
564             // which isn't in W is included in the spill set so that it gets
565             // properly spilled when we spill across CF edges.
566             if blocks.is_loop_header(b_idx) {
567                 for ssa in bl.iter_live_in() {
568                     if !w.contains(ssa) {
569                         s.insert(*ssa);
570                     }
571                 }
572             }
573 
574             s
575         };
576 
577         for ssa in bl.iter_live_in() {
578             debug_assert!(w.contains(ssa) || s.contains(ssa));
579         }
580 
581         let mut b = SSAState { w: w, s: s };
582 
583         assert!(ssa_state_in.len() == b_idx);
584         ssa_state_in.push(b.clone());
585 
586         let bb = &mut blocks[b_idx];
587 
588         let mut instrs = Vec::new();
589         for (ip, mut instr) in bb.instrs.drain(..).enumerate() {
590             match &mut instr.op {
591                 Op::PhiDsts(phi) => {
592                     // For phis, anything that is not in W needs to be spilled
593                     // by setting the destination to some spill value.
594                     for (idx, dst) in phi.dsts.iter_mut() {
595                         let vec = dst.as_ssa().unwrap();
596                         debug_assert!(vec.comps() == 1);
597                         let ssa = &vec[0];
598 
599                         if ssa.file() == file && !b.w.contains(ssa) {
600                             spilled_phis.insert((*idx).try_into().unwrap());
601                             b.s.insert(*ssa);
602                             *dst = spill.get_spill(*ssa).into();
603                         }
604                     }
605                 }
606                 Op::PhiSrcs(_) => {
607                     // We handle phi sources later.  For now, leave them be.
608                 }
609                 Op::ParCopy(pcopy) => {
610                     let mut num_w_dsts = 0_u32;
611                     for (dst, src) in pcopy.dsts_srcs.iter_mut() {
612                         let dst_vec = dst.as_ssa().unwrap();
613                         debug_assert!(dst_vec.comps() == 1);
614                         let dst_ssa = &dst_vec[0];
615 
616                         debug_assert!(src.src_mod.is_none());
617                         let src_vec = src.src_ref.as_ssa().unwrap();
618                         debug_assert!(src_vec.comps() == 1);
619                         let src_ssa = &src_vec[0];
620 
621                         debug_assert!(dst_ssa.file() == src_ssa.file());
622                         if src_ssa.file() != file {
623                             continue;
624                         }
625 
626                         // If it's not resident, rewrite to just move from one
627                         // spill to another, assuming that copying in spill
628                         // space is efficient
629                         if b.w.contains(src_ssa) {
630                             num_w_dsts += 1;
631                         } else {
632                             debug_assert!(b.s.contains(src_ssa));
633                             b.s.insert(*dst_ssa);
634                             *src = spill.get_spill(*src_ssa).into();
635                             *dst = spill.get_spill(*dst_ssa).into();
636                         }
637                     }
638 
639                     // We can now assume that a source is in W if and only if
640                     // the file matches.  Remove all killed sources from W.
641                     for (_, src) in pcopy.dsts_srcs.iter() {
642                         let src_ssa = &src.src_ref.as_ssa().unwrap()[0];
643                         if !bl.is_live_after_ip(src_ssa, ip) {
644                             b.w.remove(src_ssa);
645                         }
646                     }
647 
648                     let rel_limit = limit - b.w.count(file);
649                     if num_w_dsts > rel_limit {
650                         let count = num_w_dsts - rel_limit;
651                         let count = count.try_into().unwrap();
652 
653                         let mut spills = SpillChooser::new(bl, ip, count);
654                         for (dst, _) in pcopy.dsts_srcs.iter() {
655                             let dst_ssa = &dst.as_ssa().unwrap()[0];
656                             if dst_ssa.file() == file {
657                                 spills.add_candidate(*dst_ssa);
658                             }
659                         }
660 
661                         let spills: HashSet<SSAValue> =
662                             HashSet::from_iter(spills);
663 
664                         for (dst, src) in pcopy.dsts_srcs.iter_mut() {
665                             let dst_ssa = &dst.as_ssa().unwrap()[0];
666                             let src_ssa = &src.src_ref.as_ssa().unwrap()[0];
667                             if spills.contains(dst_ssa) {
668                                 if b.s.insert(*src_ssa) {
669                                     if DEBUG.annotate() {
670                                         instrs.push(Instr::new_boxed(
671                                             OpAnnotate {
672                                                 annotation:
673                                                     "generated by spill_values"
674                                                         .into(),
675                                             },
676                                         ));
677                                     }
678                                     instrs.push(spill.spill(*src_ssa));
679                                 }
680                                 b.s.insert(*dst_ssa);
681                                 *src = spill.get_spill(*src_ssa).into();
682                                 *dst = spill.get_spill(*dst_ssa).into();
683                             }
684                         }
685                     }
686 
687                     for (dst, _) in pcopy.dsts_srcs.iter() {
688                         let dst_ssa = &dst.as_ssa().unwrap()[0];
689                         if dst_ssa.file() == file {
690                             b.w.insert(*dst_ssa);
691                         }
692                     }
693                 }
694                 _ => {
695                     // First compute fills even though those have to come
696                     // after spills.
697                     let mut fills = Vec::new();
698                     instr.for_each_ssa_use(|ssa| {
699                         if ssa.file() == file && !b.w.contains(ssa) {
700                             debug_assert!(b.s.contains(ssa));
701                             fills.push(spill.fill(*ssa));
702                             b.w.insert(*ssa);
703                         }
704                     });
705 
706                     let rel_pressure = bl.get_instr_pressure(ip, &instr)[file];
707                     let abs_pressure =
708                         b.w.count(file) + u32::from(rel_pressure);
709 
710                     if abs_pressure > limit {
711                         let count = abs_pressure - limit;
712                         let count = count.try_into().unwrap();
713 
714                         let mut spills = SpillChooser::new(bl, ip, count);
715                         for ssa in b.w.iter() {
716                             spills.add_candidate(*ssa);
717                         }
718 
719                         for ssa in spills {
720                             debug_assert!(ssa.file() == file);
721                             b.w.remove(&ssa);
722                             if DEBUG.annotate() {
723                                 instrs.push(Instr::new_boxed(OpAnnotate {
724                                     annotation: "generated by spill_values"
725                                         .into(),
726                                 }));
727                             }
728                             instrs.push(spill.spill(ssa));
729                             b.s.insert(ssa);
730                         }
731                     }
732 
733                     if DEBUG.annotate() {
734                         instrs.push(Instr::new_boxed(OpAnnotate {
735                             annotation: "generated by spill_values".into(),
736                         }));
737                     }
738                     instrs.append(&mut fills);
739 
740                     instr.for_each_ssa_use(|ssa| {
741                         if ssa.file() == file {
742                             debug_assert!(b.w.contains(ssa));
743                         }
744                     });
745 
746                     b.w.insert_instr_top_down(ip, &instr, bl);
747                 }
748             }
749 
750             instrs.push(instr);
751         }
752         bb.instrs = instrs;
753 
754         assert!(ssa_state_out.len() == b_idx);
755         ssa_state_out.push(b);
756     }
757 
758     // Now that everthing is spilled, we handle phi sources and connect the
759     // blocks by adding spills and fills as needed along edges.
760     for p_idx in 0..blocks.len() {
761         let succ = blocks.succ_indices(p_idx);
762         if succ.len() != 1 {
763             // We don't have any critical edges
764             for s_idx in succ {
765                 debug_assert!(blocks.pred_indices(*s_idx).len() == 1);
766             }
767             continue;
768         }
769         let s_idx = succ[0];
770 
771         // If blocks[p_idx] is the unique predecessor of blocks[s_idx] then the
772         // spill/fill sets for blocks[s_idx] are just those from blocks[p_idx],
773         // filtered for liveness and there is no phi source.  There's nothing
774         // for us to do here.
775         if blocks.pred_indices(s_idx).len() == 1 {
776             continue;
777         }
778 
779         let pb = &mut blocks[p_idx];
780         let p_out = &ssa_state_out[p_idx];
781         let s_in = &ssa_state_in[s_idx];
782         let phi_dst_map = &phi_dst_maps[s_idx];
783 
784         let mut spills = Vec::new();
785         let mut fills = Vec::new();
786 
787         if let Some(phi) = pb.phi_srcs_mut() {
788             for (idx, src) in phi.srcs.iter_mut() {
789                 debug_assert!(src.src_mod.is_none());
790                 let vec = src.src_ref.as_ssa().unwrap();
791                 debug_assert!(vec.comps() == 1);
792                 let ssa = &vec[0];
793 
794                 if ssa.file() != file {
795                     continue;
796                 }
797 
798                 if spilled_phis.get((*idx).try_into().unwrap()) {
799                     if !p_out.s.contains(ssa) {
800                         spills.push(*ssa);
801                     }
802                     *src = spill.get_spill(*ssa).into();
803                 } else {
804                     if !p_out.w.contains(ssa) {
805                         fills.push(*ssa);
806                     }
807                 }
808             }
809         }
810 
811         for ssa in s_in.s.iter() {
812             if p_out.w.contains(ssa) && !p_out.s.contains(ssa) {
813                 spills.push(*ssa);
814             }
815         }
816 
817         for ssa in s_in.w.iter() {
818             if phi_dst_map.get_phi_idx(ssa).is_some() {
819                 continue;
820             }
821 
822             if !p_out.w.contains(ssa) {
823                 fills.push(*ssa);
824             }
825         }
826 
827         if spills.is_empty() && fills.is_empty() {
828             continue;
829         }
830 
831         // Sort to ensure stability of the algorithm
832         spills.sort_by_key(|ssa| ssa.idx());
833         fills.sort_by_key(|ssa| ssa.idx());
834 
835         let mut instrs = Vec::new();
836         for ssa in spills {
837             instrs.push(spill.spill(ssa));
838         }
839         for ssa in fills {
840             instrs.push(spill.fill(ssa));
841         }
842 
843         // Insert spills and fills right after the phi (if any)
844         let mut ip = pb.instrs.len();
845         while ip > 0 {
846             let instr = &pb.instrs[ip - 1];
847             if !instr.is_branch() {
848                 match instr.op {
849                     Op::PhiSrcs(_) => (),
850                     _ => break,
851                 }
852             }
853             ip -= 1;
854         }
855         pb.instrs.splice(ip..ip, instrs.into_iter());
856     }
857 }
858 
859 impl Function {
860     /// Spill values from @file to fit within @limit registers
861     ///
862     /// This pass assumes that the function is already in CSSA form.  See
863     /// @to_cssa for more details.
864     ///
865     /// The algorithm implemented here is roughly based on "Register Spilling
866     /// and Live-Range Splitting for SSA-Form Programs" by Braun and Hack.  The
867     /// primary contributions of the Braun and Hack paper are the global
868     /// next-use distances which are implemented by @NextUseLiveness and a
869     /// heuristic for computing spill sets at block boundaries.  The paper
870     /// describes two sets:
871     ///
872     ///  - W, the set of variables currently resident
873     ///
874     ///  - S, the set of variables which have been spilled
875     ///
876     /// These sets are tracked as we walk instructions and [un]spill values to
877     /// satisfy the given limit.  When spills are required we spill the value
878     /// with the nighest next-use IP.  At block boundaries, Braun and Hack
879     /// describe a heuristic for determining the starting W and S sets based on
880     /// the W and S from the end of each of the forward edge predecessor blocks.
881     ///
882     /// What Braun and Hack do not describe is how to handle phis and parallel
883     /// copies.  Because we assume the function is already in CSSA form, we can
884     /// use a fairly simple algorithm.  On the first pass, we ignore phi sources
885     /// and assign phi destinations based on W at the start of the block.  If
886     /// the phi destination is in W, we leave it alone.  If it is not in W, then
887     /// we allocate a new spill value and assign it to the phi destination.  In
888     /// a second pass, we handle phi sources based on the destination.  If the
889     /// destination is in W, we leave it alone.  If the destination is spilled,
890     /// we read from the spill value corresponding to the source, spilling first
891     /// if needed.  In the second pass, we also handle spilling across blocks as
892     /// needed for values that do not pass through a phi.
893     ///
894     /// A special case is also required for parallel copies because they can
895     /// have an unbounded number of destinations.  For any source values not in
896     /// W, we allocate a spill value for the destination and copy in the spill
897     /// register file.  For any sources which are in W, we try to leave as much
898     /// in W as possible.  However, since source values may not be killed by the
899     /// copy and because one source value may be copied to arbitrarily many
900     /// destinations, that is not always possible.  Whenever we need to spill
901     /// values, we spill according to the highest next-use of the destination
902     /// and we spill the source first and then parallel copy the source into a
903     /// spilled destination value.
904     ///
905     /// This all assumes that it's better to copy in spill space than to unspill
906     /// just for the sake of a parallel copy.  While this may not be true in
907     /// general, especially not when spilling to memory, the register allocator
908     /// is good at eliding unnecessary copies.
spill_values(&mut self, file: RegFile, limit: u32)909     pub fn spill_values(&mut self, file: RegFile, limit: u32) {
910         match file {
911             RegFile::GPR => {
912                 let spill = SpillGPR::new();
913                 spill_values(self, file, limit, spill);
914             }
915             RegFile::Pred => {
916                 let spill = SpillPred::new();
917                 spill_values(self, file, limit, spill);
918             }
919             RegFile::Bar => {
920                 let spill = SpillBar::new();
921                 spill_values(self, file, limit, spill);
922             }
923             _ => panic!("Don't know how to spill {} registers", file),
924         }
925 
926         self.repair_ssa();
927         self.opt_dce();
928 
929         if DEBUG.print() {
930             eprintln!("NAK IR after spilling {}:\n{}", file, self);
931         }
932     }
933 }
934