• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 use crate::api::{GetDebugFlags, DEBUG};
5 use crate::ir::*;
6 use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness};
7 use crate::union_find::UnionFind;
8 
9 use compiler::bitset::BitSet;
10 use std::cmp::{max, min, Ordering};
11 use std::collections::{HashMap, HashSet};
12 
13 struct KillSet {
14     set: HashSet<SSAValue>,
15     vec: Vec<SSAValue>,
16 }
17 
18 impl KillSet {
new() -> KillSet19     pub fn new() -> KillSet {
20         KillSet {
21             set: HashSet::new(),
22             vec: Vec::new(),
23         }
24     }
25 
len(&self) -> usize26     pub fn len(&self) -> usize {
27         self.vec.len()
28     }
29 
clear(&mut self)30     pub fn clear(&mut self) {
31         self.set.clear();
32         self.vec.clear();
33     }
34 
insert(&mut self, ssa: SSAValue)35     pub fn insert(&mut self, ssa: SSAValue) {
36         if self.set.insert(ssa) {
37             self.vec.push(ssa);
38         }
39     }
40 
iter(&self) -> std::slice::Iter<'_, SSAValue>41     pub fn iter(&self) -> std::slice::Iter<'_, SSAValue> {
42         self.vec.iter()
43     }
44 
is_empty(&self) -> bool45     pub fn is_empty(&self) -> bool {
46         self.vec.is_empty()
47     }
48 }
49 
50 // These two helpers are carefully paired for the purposes of RA.
51 // src_ssa_ref() returns whatever SSARef is present in the source, if any.
52 // src_set_reg() overwrites that SSARef with a RegRef.
53 #[inline]
src_ssa_ref(src: &Src) -> Option<&SSARef>54 fn src_ssa_ref(src: &Src) -> Option<&SSARef> {
55     match &src.src_ref {
56         SrcRef::SSA(ssa) => Some(ssa),
57         SrcRef::CBuf(CBufRef {
58             buf: CBuf::BindlessSSA(ssa),
59             ..
60         }) => Some(ssa),
61         _ => None,
62     }
63 }
64 
65 #[inline]
src_set_reg(src: &mut Src, reg: RegRef)66 fn src_set_reg(src: &mut Src, reg: RegRef) {
67     match &mut src.src_ref {
68         SrcRef::SSA(_) => {
69             src.src_ref = reg.into();
70         }
71         SrcRef::CBuf(cb) => {
72             debug_assert!(matches!(&cb.buf, CBuf::BindlessSSA(_)));
73             debug_assert!(reg.file() == RegFile::UGPR && reg.comps() == 2);
74             cb.buf = CBuf::BindlessUGPR(reg);
75         }
76         _ => (),
77     }
78 }
79 
80 enum SSAUse {
81     FixedReg(u32),
82     Vec(SSARef),
83 }
84 
85 struct SSAUseMap {
86     ssa_map: HashMap<SSAValue, Vec<(usize, SSAUse)>>,
87 }
88 
89 impl SSAUseMap {
add_fixed_reg_use(&mut self, ip: usize, ssa: SSAValue, reg: u32)90     fn add_fixed_reg_use(&mut self, ip: usize, ssa: SSAValue, reg: u32) {
91         let v = self.ssa_map.entry(ssa).or_default();
92         v.push((ip, SSAUse::FixedReg(reg)));
93     }
94 
add_vec_use(&mut self, ip: usize, vec: SSARef)95     fn add_vec_use(&mut self, ip: usize, vec: SSARef) {
96         if vec.comps() == 1 {
97             return;
98         }
99 
100         for ssa in vec.iter() {
101             let v = self.ssa_map.entry(*ssa).or_default();
102             v.push((ip, SSAUse::Vec(vec)));
103         }
104     }
105 
find_vec_use_after(&self, ssa: SSAValue, ip: usize) -> Option<&SSAUse>106     fn find_vec_use_after(&self, ssa: SSAValue, ip: usize) -> Option<&SSAUse> {
107         if let Some(v) = self.ssa_map.get(&ssa) {
108             let p = v.partition_point(|(uip, _)| *uip <= ip);
109             if p == v.len() {
110                 None
111             } else {
112                 let (_, u) = &v[p];
113                 Some(u)
114             }
115         } else {
116             None
117         }
118     }
119 
add_block(&mut self, b: &BasicBlock)120     pub fn add_block(&mut self, b: &BasicBlock) {
121         for (ip, instr) in b.instrs.iter().enumerate() {
122             match &instr.op {
123                 Op::RegOut(op) => {
124                     for (i, src) in op.srcs.iter().enumerate() {
125                         let out_reg = u32::try_from(i).unwrap();
126                         if let Some(ssa) = src_ssa_ref(src) {
127                             assert!(ssa.comps() == 1);
128                             self.add_fixed_reg_use(ip, ssa[0], out_reg);
129                         }
130                     }
131                 }
132                 _ => {
133                     // We don't care about predicates because they're scalar
134                     for src in instr.srcs() {
135                         if let Some(ssa) = src_ssa_ref(src) {
136                             self.add_vec_use(ip, *ssa);
137                         }
138                     }
139                 }
140             }
141         }
142     }
143 
for_block(b: &BasicBlock) -> SSAUseMap144     pub fn for_block(b: &BasicBlock) -> SSAUseMap {
145         let mut am = SSAUseMap {
146             ssa_map: HashMap::new(),
147         };
148         am.add_block(b);
149         am
150     }
151 }
152 
153 /// Tracks the most recent register assigned to a given phi web
154 ///
155 /// During register assignment, we then try to assign this register
156 /// to the next SSAValue in the same web.
157 ///
158 /// This heuristic is inspired by the "Aggressive pre-coalescing" described in
159 /// section 4 of Colombet et al 2011.
160 ///
161 /// Q. Colombet, B. Boissinot, P. Brisk, S. Hack and F. Rastello,
162 ///     "Graph-coloring and treescan register allocation using repairing," 2011
163 ///     Proceedings of the 14th International Conference on Compilers,
164 ///     Architectures and Synthesis for Embedded Systems (CASES), Taipei,
165 ///     Taiwan, 2011, pp. 45-54, doi: 10.1145/2038698.2038708.
166 struct PhiWebs {
167     uf: UnionFind<SSAValue>,
168     assignments: HashMap<SSAValue, u32>,
169 }
170 
171 impl PhiWebs {
new(f: &Function) -> Self172     pub fn new(f: &Function) -> Self {
173         let mut uf = UnionFind::new();
174 
175         // Populate uf with phi equivalence classes
176         //
177         // Note that we intentionally don't pay attention to move instructions
178         // below - the assumption is that any move instructions at this point
179         // were inserted by cssa-conversion and will hurt the coalescing
180         for b_idx in 0..f.blocks.len() {
181             let Some(phi_dsts) = f.blocks[b_idx].phi_dsts() else {
182                 continue;
183             };
184             let dsts: HashMap<u32, &SSARef> = phi_dsts
185                 .dsts
186                 .iter()
187                 .map(|(idx, dst)| {
188                     let ssa_ref = dst.as_ssa().expect("Expected ssa form");
189                     (*idx, ssa_ref)
190                 })
191                 .collect();
192 
193             for pred_idx in f.blocks.pred_indices(b_idx) {
194                 let phi_srcs =
195                     f.blocks[*pred_idx].phi_srcs().expect("Missing phi_srcs");
196                 for (src_idx, src) in phi_srcs.srcs.iter() {
197                     let a = src.as_ssa().expect("Expected ssa form");
198                     let b = dsts[src_idx];
199 
200                     assert_eq!(a.comps(), 1);
201                     assert_eq!(b.comps(), 1);
202 
203                     uf.union(a[0], b[0]);
204                 }
205             }
206         }
207 
208         PhiWebs {
209             uf,
210             assignments: HashMap::new(),
211         }
212     }
213 
get(&mut self, ssa: SSAValue) -> Option<u32>214     pub fn get(&mut self, ssa: SSAValue) -> Option<u32> {
215         let phi_web_id = self.uf.find(ssa);
216         self.assignments.get(&phi_web_id).copied()
217     }
218 
set(&mut self, ssa: SSAValue, reg: u32)219     pub fn set(&mut self, ssa: SSAValue, reg: u32) {
220         let phi_web_id = self.uf.find(ssa);
221         self.assignments.insert(phi_web_id, reg);
222     }
223 }
224 
225 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
226 enum LiveRef {
227     SSA(SSAValue),
228     Phi(u32),
229 }
230 
231 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
232 struct LiveValue {
233     pub live_ref: LiveRef,
234     pub reg_ref: RegRef,
235 }
236 
237 // We need a stable ordering of live values so that RA is deterministic
238 impl Ord for LiveValue {
cmp(&self, other: &Self) -> Ordering239     fn cmp(&self, other: &Self) -> Ordering {
240         let s_file = u8::from(self.reg_ref.file());
241         let o_file = u8::from(other.reg_ref.file());
242         match s_file.cmp(&o_file) {
243             Ordering::Equal => {
244                 let s_idx = self.reg_ref.base_idx();
245                 let o_idx = other.reg_ref.base_idx();
246                 s_idx.cmp(&o_idx)
247             }
248             ord => ord,
249         }
250     }
251 }
252 
253 impl PartialOrd for LiveValue {
partial_cmp(&self, other: &Self) -> Option<Ordering>254     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
255         Some(self.cmp(other))
256     }
257 }
258 
259 #[derive(Clone)]
260 struct RegAllocator {
261     file: RegFile,
262     num_regs: u32,
263     used: BitSet,
264     pinned: BitSet,
265     reg_ssa: Vec<SSAValue>,
266     ssa_reg: HashMap<SSAValue, u32>,
267 }
268 
269 impl RegAllocator {
new(file: RegFile, num_regs: u32) -> Self270     pub fn new(file: RegFile, num_regs: u32) -> Self {
271         Self {
272             file: file,
273             num_regs: num_regs,
274             used: BitSet::new(),
275             pinned: BitSet::new(),
276             reg_ssa: Vec::new(),
277             ssa_reg: HashMap::new(),
278         }
279     }
280 
file(&self) -> RegFile281     fn file(&self) -> RegFile {
282         self.file
283     }
284 
num_regs_used(&self) -> u32285     pub fn num_regs_used(&self) -> u32 {
286         self.ssa_reg.len().try_into().unwrap()
287     }
288 
reg_is_used(&self, reg: u32) -> bool289     pub fn reg_is_used(&self, reg: u32) -> bool {
290         self.used.get(reg.try_into().unwrap())
291     }
292 
reg_is_pinned(&self, reg: u32) -> bool293     pub fn reg_is_pinned(&self, reg: u32) -> bool {
294         self.pinned.get(reg.try_into().unwrap())
295     }
296 
try_get_reg(&self, ssa: SSAValue) -> Option<u32>297     pub fn try_get_reg(&self, ssa: SSAValue) -> Option<u32> {
298         self.ssa_reg.get(&ssa).cloned()
299     }
300 
try_get_ssa(&self, reg: u32) -> Option<SSAValue>301     pub fn try_get_ssa(&self, reg: u32) -> Option<SSAValue> {
302         if self.reg_is_used(reg) {
303             Some(self.reg_ssa[usize::try_from(reg).unwrap()])
304         } else {
305             None
306         }
307     }
308 
try_get_vec_reg(&self, vec: &SSARef) -> Option<u32>309     pub fn try_get_vec_reg(&self, vec: &SSARef) -> Option<u32> {
310         let Some(reg) = self.try_get_reg(vec[0]) else {
311             return None;
312         };
313 
314         let align = u32::from(vec.comps()).next_power_of_two();
315         if reg % align != 0 {
316             return None;
317         }
318 
319         for c in 1..vec.comps() {
320             let ssa = vec[usize::from(c)];
321             if self.try_get_reg(ssa) != Some(reg + u32::from(c)) {
322                 return None;
323             }
324         }
325         Some(reg)
326     }
327 
free_ssa(&mut self, ssa: SSAValue) -> u32328     pub fn free_ssa(&mut self, ssa: SSAValue) -> u32 {
329         assert!(ssa.file() == self.file);
330         let reg = self.ssa_reg.remove(&ssa).unwrap();
331         assert!(self.reg_is_used(reg));
332         let reg_usize = usize::try_from(reg).unwrap();
333         assert!(self.reg_ssa[reg_usize] == ssa);
334         self.used.remove(reg_usize);
335         self.pinned.remove(reg_usize);
336         reg
337     }
338 
assign_reg(&mut self, ssa: SSAValue, reg: u32)339     pub fn assign_reg(&mut self, ssa: SSAValue, reg: u32) {
340         assert!(ssa.file() == self.file);
341         assert!(reg < self.num_regs);
342         assert!(!self.reg_is_used(reg));
343 
344         let reg_usize = usize::try_from(reg).unwrap();
345         if reg_usize >= self.reg_ssa.len() {
346             self.reg_ssa.resize(reg_usize + 1, SSAValue::NONE);
347         }
348         self.reg_ssa[reg_usize] = ssa;
349         let old = self.ssa_reg.insert(ssa, reg);
350         assert!(old.is_none());
351         self.used.insert(reg_usize);
352     }
353 
pin_reg(&mut self, reg: u32)354     pub fn pin_reg(&mut self, reg: u32) {
355         assert!(self.reg_is_used(reg));
356         self.pinned.insert(reg.try_into().unwrap());
357     }
358 
reg_range_is_unset(set: &BitSet, reg: u32, comps: u8) -> bool359     fn reg_range_is_unset(set: &BitSet, reg: u32, comps: u8) -> bool {
360         for c in 0..u32::from(comps) {
361             if set.get((reg + c).try_into().unwrap()) {
362                 return false;
363             }
364         }
365         true
366     }
367 
try_find_unset_reg_range( &self, set: &BitSet, start_reg: u32, align: u32, comps: u8, ) -> Option<u32>368     fn try_find_unset_reg_range(
369         &self,
370         set: &BitSet,
371         start_reg: u32,
372         align: u32,
373         comps: u8,
374     ) -> Option<u32> {
375         assert!(comps > 0 && u32::from(comps) <= self.num_regs);
376 
377         let mut next_reg = start_reg;
378         loop {
379             let reg: u32 = set
380                 .next_unset(usize::try_from(next_reg).unwrap())
381                 .try_into()
382                 .unwrap();
383 
384             // Ensure we're properly aligned
385             let reg = reg.next_multiple_of(align);
386 
387             // Ensure we're in-bounds. This also serves as a check to ensure
388             // that u8::try_from(reg + i) will succeed.
389             if reg > self.num_regs - u32::from(comps) {
390                 return None;
391             }
392 
393             if Self::reg_range_is_unset(set, reg, comps) {
394                 return Some(reg);
395             }
396 
397             next_reg = reg + align;
398         }
399     }
400 
try_find_unused_reg_range( &self, start_reg: u32, align: u32, comps: u8, ) -> Option<u32>401     pub fn try_find_unused_reg_range(
402         &self,
403         start_reg: u32,
404         align: u32,
405         comps: u8,
406     ) -> Option<u32> {
407         self.try_find_unset_reg_range(&self.used, start_reg, align, comps)
408     }
409 
alloc_scalar( &mut self, ip: usize, sum: &SSAUseMap, phi_webs: &mut PhiWebs, ssa: SSAValue, ) -> u32410     pub fn alloc_scalar(
411         &mut self,
412         ip: usize,
413         sum: &SSAUseMap,
414         phi_webs: &mut PhiWebs,
415         ssa: SSAValue,
416     ) -> u32 {
417         // Bias register assignment using the phi coalescing
418         if let Some(reg) = phi_webs.get(ssa) {
419             if !self.reg_is_used(reg) {
420                 self.assign_reg(ssa, reg);
421                 return reg;
422             }
423         }
424 
425         // Otherwise, use SSAUseMap heuristics
426         if let Some(u) = sum.find_vec_use_after(ssa, ip) {
427             match u {
428                 SSAUse::FixedReg(reg) => {
429                     if !self.reg_is_used(*reg) {
430                         self.assign_reg(ssa, *reg);
431                         return *reg;
432                     }
433                 }
434                 SSAUse::Vec(vec) => {
435                     let mut comp = u8::MAX;
436                     for c in 0..vec.comps() {
437                         if vec[usize::from(c)] == ssa {
438                             comp = c;
439                             break;
440                         }
441                     }
442                     assert!(comp < vec.comps());
443 
444                     let align = u32::from(vec.comps()).next_power_of_two();
445                     for c in 0..vec.comps() {
446                         if c == comp {
447                             continue;
448                         }
449 
450                         let other = vec[usize::from(c)];
451                         let Some(other_reg) = self.try_get_reg(other) else {
452                             continue;
453                         };
454 
455                         let vec_reg = other_reg & !(align - 1);
456                         if other_reg != vec_reg + u32::from(c) {
457                             continue;
458                         }
459 
460                         let reg = vec_reg + u32::from(comp);
461                         if reg < self.num_regs && !self.reg_is_used(reg) {
462                             self.assign_reg(ssa, reg);
463                             return reg;
464                         }
465                     }
466 
467                     // We weren't able to pair it with an already allocated
468                     // register but maybe we can at least find an aligned one.
469                     if let Some(reg) =
470                         self.try_find_unused_reg_range(0, align, 1)
471                     {
472                         self.assign_reg(ssa, reg);
473                         return reg;
474                     }
475                 }
476             }
477         }
478 
479         let reg = self
480             .try_find_unused_reg_range(0, 1, 1)
481             .expect("Failed to find free register");
482         self.assign_reg(ssa, reg);
483         reg
484     }
485 }
486 
487 struct VecRegAllocator<'a> {
488     ra: &'a mut RegAllocator,
489     pcopy: OpParCopy,
490     pinned: BitSet,
491     evicted: HashMap<SSAValue, u32>,
492 }
493 
494 impl<'a> VecRegAllocator<'a> {
new(ra: &'a mut RegAllocator) -> Self495     fn new(ra: &'a mut RegAllocator) -> Self {
496         let pinned = ra.pinned.clone();
497         VecRegAllocator {
498             ra,
499             pcopy: OpParCopy::new(),
500             pinned,
501             evicted: HashMap::new(),
502         }
503     }
504 
file(&self) -> RegFile505     fn file(&self) -> RegFile {
506         self.ra.file()
507     }
508 
pin_reg(&mut self, reg: u32)509     fn pin_reg(&mut self, reg: u32) {
510         self.pinned.insert(reg.try_into().unwrap());
511     }
512 
pin_reg_range(&mut self, reg: u32, comps: u8)513     fn pin_reg_range(&mut self, reg: u32, comps: u8) {
514         for c in 0..u32::from(comps) {
515             self.pin_reg(reg + c);
516         }
517     }
518 
reg_is_pinned(&self, reg: u32) -> bool519     fn reg_is_pinned(&self, reg: u32) -> bool {
520         self.pinned.get(reg.try_into().unwrap())
521     }
522 
reg_range_is_unpinned(&self, reg: u32, comps: u8) -> bool523     fn reg_range_is_unpinned(&self, reg: u32, comps: u8) -> bool {
524         RegAllocator::reg_range_is_unset(&self.pinned, reg, comps)
525     }
526 
assign_pin_reg(&mut self, ssa: SSAValue, reg: u32) -> RegRef527     fn assign_pin_reg(&mut self, ssa: SSAValue, reg: u32) -> RegRef {
528         self.pin_reg(reg);
529         self.ra.assign_reg(ssa, reg);
530         RegRef::new(self.file(), reg, 1)
531     }
532 
assign_pin_vec_reg(&mut self, vec: SSARef, reg: u32) -> RegRef533     pub fn assign_pin_vec_reg(&mut self, vec: SSARef, reg: u32) -> RegRef {
534         for c in 0..vec.comps() {
535             let ssa = vec[usize::from(c)];
536             self.assign_pin_reg(ssa, reg + u32::from(c));
537         }
538         RegRef::new(self.file(), reg, vec.comps())
539     }
540 
try_find_unpinned_reg_range( &self, start_reg: u32, align: u32, comps: u8, ) -> Option<u32>541     fn try_find_unpinned_reg_range(
542         &self,
543         start_reg: u32,
544         align: u32,
545         comps: u8,
546     ) -> Option<u32> {
547         self.ra
548             .try_find_unset_reg_range(&self.pinned, start_reg, align, comps)
549     }
550 
evict_ssa(&mut self, ssa: SSAValue, old_reg: u32)551     pub fn evict_ssa(&mut self, ssa: SSAValue, old_reg: u32) {
552         assert!(ssa.file() == self.file());
553         assert!(!self.reg_is_pinned(old_reg));
554         self.evicted.insert(ssa, old_reg);
555     }
556 
evict_reg_if_used(&mut self, reg: u32)557     pub fn evict_reg_if_used(&mut self, reg: u32) {
558         assert!(!self.reg_is_pinned(reg));
559 
560         if let Some(ssa) = self.ra.try_get_ssa(reg) {
561             self.ra.free_ssa(ssa);
562             self.evict_ssa(ssa, reg);
563         }
564     }
565 
move_ssa_to_reg(&mut self, ssa: SSAValue, new_reg: u32)566     fn move_ssa_to_reg(&mut self, ssa: SSAValue, new_reg: u32) {
567         if let Some(old_reg) = self.ra.try_get_reg(ssa) {
568             assert!(self.evicted.get(&ssa).is_none());
569             assert!(!self.reg_is_pinned(old_reg));
570 
571             if new_reg == old_reg {
572                 self.pin_reg(new_reg);
573             } else {
574                 self.ra.free_ssa(ssa);
575                 self.evict_reg_if_used(new_reg);
576 
577                 self.pcopy.push(
578                     RegRef::new(self.file(), new_reg, 1).into(),
579                     RegRef::new(self.file(), old_reg, 1).into(),
580                 );
581 
582                 self.assign_pin_reg(ssa, new_reg);
583             }
584         } else if let Some(old_reg) = self.evicted.remove(&ssa) {
585             self.evict_reg_if_used(new_reg);
586 
587             self.pcopy.push(
588                 RegRef::new(self.file(), new_reg, 1).into(),
589                 RegRef::new(self.file(), old_reg, 1).into(),
590             );
591 
592             self.assign_pin_reg(ssa, new_reg);
593         } else {
594             panic!("Unknown SSA value");
595         }
596     }
597 
finish(mut self, pcopy: &mut OpParCopy)598     fn finish(mut self, pcopy: &mut OpParCopy) {
599         pcopy.dsts_srcs.append(&mut self.pcopy.dsts_srcs);
600 
601         if !self.evicted.is_empty() {
602             // Sort so we get determinism, even if the hash map order changes
603             // from one run to another or due to rust compiler updates.
604             let mut evicted: Vec<_> = self.evicted.drain().collect();
605             evicted.sort_by_key(|(_, reg)| *reg);
606 
607             for (ssa, old_reg) in evicted {
608                 let mut next_reg = 0;
609                 let new_reg = loop {
610                     let reg = self
611                         .ra
612                         .try_find_unused_reg_range(next_reg, 1, 1)
613                         .expect("Failed to find free register");
614                     if !self.reg_is_pinned(reg) {
615                         break reg;
616                     }
617                     next_reg = reg + 1;
618                 };
619 
620                 pcopy.push(
621                     RegRef::new(self.file(), new_reg, 1).into(),
622                     RegRef::new(self.file(), old_reg, 1).into(),
623                 );
624                 self.assign_pin_reg(ssa, new_reg);
625             }
626         }
627     }
628 
try_get_vec_reg(&self, vec: &SSARef) -> Option<u32>629     pub fn try_get_vec_reg(&self, vec: &SSARef) -> Option<u32> {
630         self.ra.try_get_vec_reg(vec)
631     }
632 
collect_vector(&mut self, vec: &SSARef) -> RegRef633     pub fn collect_vector(&mut self, vec: &SSARef) -> RegRef {
634         if let Some(reg) = self.try_get_vec_reg(vec) {
635             self.pin_reg_range(reg, vec.comps());
636             return RegRef::new(self.file(), reg, vec.comps());
637         }
638 
639         let comps = vec.comps();
640         let align = u32::from(comps).next_power_of_two();
641 
642         let reg = self
643             .ra
644             .try_find_unused_reg_range(0, align, comps)
645             .or_else(|| {
646                 for c in 0..comps {
647                     let ssa = vec[usize::from(c)];
648                     let Some(comp_reg) = self.ra.try_get_reg(ssa) else {
649                         continue;
650                     };
651 
652                     let vec_reg = comp_reg & !(align - 1);
653                     if comp_reg != vec_reg + u32::from(c) {
654                         continue;
655                     }
656 
657                     if vec_reg + u32::from(comps) > self.ra.num_regs {
658                         continue;
659                     }
660 
661                     if self.reg_range_is_unpinned(vec_reg, comps) {
662                         return Some(vec_reg);
663                     }
664                 }
665                 None
666             })
667             .or_else(|| self.try_find_unpinned_reg_range(0, align, comps))
668             .expect("Failed to find an unpinned register range");
669 
670         for c in 0..comps {
671             let ssa = vec[usize::from(c)];
672             self.move_ssa_to_reg(ssa, reg + u32::from(c));
673         }
674 
675         RegRef::new(self.file(), reg, comps)
676     }
677 
alloc_vector(&mut self, vec: SSARef) -> RegRef678     pub fn alloc_vector(&mut self, vec: SSARef) -> RegRef {
679         let comps = vec.comps();
680         let align = u32::from(comps).next_power_of_two();
681 
682         if let Some(reg) = self.ra.try_find_unused_reg_range(0, align, comps) {
683             return self.assign_pin_vec_reg(vec, reg);
684         }
685 
686         let reg = self
687             .try_find_unpinned_reg_range(0, align, comps)
688             .expect("Failed to find an unpinned register range");
689 
690         for c in 0..comps {
691             self.evict_reg_if_used(reg + u32::from(c));
692         }
693         self.assign_pin_vec_reg(vec, reg)
694     }
695 
free_killed(&mut self, killed: &KillSet)696     pub fn free_killed(&mut self, killed: &KillSet) {
697         for ssa in killed.iter() {
698             if ssa.file() == self.file() {
699                 self.ra.free_ssa(*ssa);
700             }
701         }
702     }
703 }
704 
705 impl Drop for VecRegAllocator<'_> {
drop(&mut self)706     fn drop(&mut self) {
707         assert!(self.evicted.is_empty());
708     }
709 }
710 
instr_remap_srcs_file(instr: &mut Instr, ra: &mut VecRegAllocator)711 fn instr_remap_srcs_file(instr: &mut Instr, ra: &mut VecRegAllocator) {
712     // Collect vector sources first since those may silently pin some of our
713     // scalar sources.
714     for src in instr.srcs_mut() {
715         if let Some(ssa) = src_ssa_ref(src) {
716             if ssa.file().unwrap() == ra.file() && ssa.comps() > 1 {
717                 let reg = ra.collect_vector(ssa);
718                 src_set_reg(src, reg);
719             }
720         }
721     }
722 
723     if let PredRef::SSA(pred) = instr.pred.pred_ref {
724         if pred.file() == ra.file() {
725             instr.pred.pred_ref = ra.collect_vector(&pred.into()).into();
726         }
727     }
728 
729     for src in instr.srcs_mut() {
730         if let Some(ssa) = src_ssa_ref(src) {
731             if ssa.file().unwrap() == ra.file() && ssa.comps() == 1 {
732                 let reg = ra.collect_vector(ssa);
733                 src_set_reg(src, reg);
734             }
735         }
736     }
737 }
738 
instr_alloc_scalar_dsts_file( instr: &mut Instr, ip: usize, sum: &SSAUseMap, phi_webs: &mut PhiWebs, ra: &mut RegAllocator, )739 fn instr_alloc_scalar_dsts_file(
740     instr: &mut Instr,
741     ip: usize,
742     sum: &SSAUseMap,
743     phi_webs: &mut PhiWebs,
744     ra: &mut RegAllocator,
745 ) {
746     for dst in instr.dsts_mut() {
747         if let Dst::SSA(ssa) = dst {
748             if ssa.file().unwrap() == ra.file() {
749                 assert!(ssa.comps() == 1);
750                 let reg = ra.alloc_scalar(ip, sum, phi_webs, ssa[0]);
751                 *dst = RegRef::new(ra.file(), reg, 1).into();
752             }
753         }
754     }
755 }
756 
instr_assign_regs_file( instr: &mut Instr, ip: usize, sum: &SSAUseMap, phi_webs: &mut PhiWebs, killed: &KillSet, pcopy: &mut OpParCopy, ra: &mut RegAllocator, )757 fn instr_assign_regs_file(
758     instr: &mut Instr,
759     ip: usize,
760     sum: &SSAUseMap,
761     phi_webs: &mut PhiWebs,
762     killed: &KillSet,
763     pcopy: &mut OpParCopy,
764     ra: &mut RegAllocator,
765 ) {
766     struct VecDst {
767         dst_idx: usize,
768         comps: u8,
769         killed: Option<SSARef>,
770         reg: u32,
771     }
772 
773     let mut vec_dsts = Vec::new();
774     let mut vec_dst_comps = 0;
775     for (i, dst) in instr.dsts().iter().enumerate() {
776         if let Dst::SSA(ssa) = dst {
777             if ssa.file().unwrap() == ra.file() && ssa.comps() > 1 {
778                 vec_dsts.push(VecDst {
779                     dst_idx: i,
780                     comps: ssa.comps(),
781                     killed: None,
782                     reg: u32::MAX,
783                 });
784                 vec_dst_comps += ssa.comps();
785             }
786         }
787     }
788 
789     // No vector destinations is the easy case
790     if vec_dst_comps == 0 {
791         let mut vra = VecRegAllocator::new(ra);
792         instr_remap_srcs_file(instr, &mut vra);
793         vra.free_killed(killed);
794         vra.finish(pcopy);
795         instr_alloc_scalar_dsts_file(instr, ip, sum, phi_webs, ra);
796         return;
797     }
798 
799     // Predicates can't be vectors.  This lets us ignore instr.pred in our
800     // analysis for the cases below. Only the easy case above needs to care
801     // about them.
802     assert!(!ra.file().is_predicate());
803 
804     let mut avail = killed.set.clone();
805     let mut killed_vecs = Vec::new();
806     for src in instr.srcs() {
807         if let Some(vec) = src_ssa_ref(src) {
808             if vec.comps() > 1 {
809                 let mut vec_killed = true;
810                 for ssa in vec.iter() {
811                     if ssa.file() != ra.file() || !avail.contains(ssa) {
812                         vec_killed = false;
813                         break;
814                     }
815                 }
816                 if vec_killed {
817                     for ssa in vec.iter() {
818                         avail.remove(ssa);
819                     }
820                     killed_vecs.push(*vec);
821                 }
822             }
823         }
824     }
825 
826     vec_dsts.sort_by_key(|v| v.comps);
827     killed_vecs.sort_by_key(|v| v.comps());
828 
829     let mut next_dst_reg = 0;
830     let mut vec_dsts_map_to_killed_srcs = true;
831     let mut could_trivially_allocate = true;
832     for vec_dst in vec_dsts.iter_mut().rev() {
833         while let Some(src) = killed_vecs.pop() {
834             if src.comps() >= vec_dst.comps {
835                 vec_dst.killed = Some(src);
836                 break;
837             }
838         }
839         if vec_dst.killed.is_none() {
840             vec_dsts_map_to_killed_srcs = false;
841         }
842 
843         let align = u32::from(vec_dst.comps).next_power_of_two();
844         if let Some(reg) =
845             ra.try_find_unused_reg_range(next_dst_reg, align, vec_dst.comps)
846         {
847             vec_dst.reg = reg;
848             next_dst_reg = reg + u32::from(vec_dst.comps);
849         } else {
850             could_trivially_allocate = false;
851         }
852     }
853 
854     if vec_dsts_map_to_killed_srcs {
855         let mut vra = VecRegAllocator::new(ra);
856         instr_remap_srcs_file(instr, &mut vra);
857 
858         for vec_dst in &mut vec_dsts {
859             let src_vec = vec_dst.killed.as_ref().unwrap();
860             vec_dst.reg = vra.try_get_vec_reg(src_vec).unwrap();
861         }
862 
863         vra.free_killed(killed);
864 
865         for vec_dst in vec_dsts {
866             let dst = &mut instr.dsts_mut()[vec_dst.dst_idx];
867             *dst = vra
868                 .assign_pin_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg)
869                 .into();
870         }
871 
872         vra.finish(pcopy);
873 
874         instr_alloc_scalar_dsts_file(instr, ip, sum, phi_webs, ra);
875     } else if could_trivially_allocate {
876         let mut vra = VecRegAllocator::new(ra);
877         for vec_dst in vec_dsts {
878             let dst = &mut instr.dsts_mut()[vec_dst.dst_idx];
879             *dst = vra
880                 .assign_pin_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg)
881                 .into();
882         }
883 
884         instr_remap_srcs_file(instr, &mut vra);
885         vra.free_killed(killed);
886         vra.finish(pcopy);
887         instr_alloc_scalar_dsts_file(instr, ip, sum, phi_webs, ra);
888     } else {
889         let mut vra = VecRegAllocator::new(ra);
890         instr_remap_srcs_file(instr, &mut vra);
891 
892         // Allocate vector destinations first so we have the most freedom.
893         // Scalar destinations can fill in holes.
894         for dst in instr.dsts_mut() {
895             if let Dst::SSA(ssa) = dst {
896                 if ssa.file().unwrap() == vra.file() && ssa.comps() > 1 {
897                     *dst = vra.alloc_vector(*ssa).into();
898                 }
899             }
900         }
901 
902         vra.free_killed(killed);
903         vra.finish(pcopy);
904 
905         instr_alloc_scalar_dsts_file(instr, ip, sum, phi_webs, ra);
906     }
907 }
908 
909 impl PerRegFile<RegAllocator> {
assign_reg(&mut self, ssa: SSAValue, reg: RegRef)910     pub fn assign_reg(&mut self, ssa: SSAValue, reg: RegRef) {
911         assert!(reg.file() == ssa.file());
912         assert!(reg.comps() == 1);
913         self[ssa.file()].assign_reg(ssa, reg.base_idx());
914     }
915 
free_killed(&mut self, killed: &KillSet)916     pub fn free_killed(&mut self, killed: &KillSet) {
917         for ssa in killed.iter() {
918             self[ssa.file()].free_ssa(*ssa);
919         }
920     }
921 }
922 
923 struct AssignRegsBlock {
924     ra: PerRegFile<RegAllocator>,
925     pcopy_tmp_gprs: u8,
926     live_in: Vec<LiveValue>,
927     phi_out: HashMap<u32, SrcRef>,
928 }
929 
930 impl AssignRegsBlock {
new(num_regs: &PerRegFile<u32>, pcopy_tmp_gprs: u8) -> AssignRegsBlock931     fn new(num_regs: &PerRegFile<u32>, pcopy_tmp_gprs: u8) -> AssignRegsBlock {
932         AssignRegsBlock {
933             ra: PerRegFile::new_with(|file| {
934                 RegAllocator::new(file, num_regs[file])
935             }),
936             pcopy_tmp_gprs: pcopy_tmp_gprs,
937             live_in: Vec::new(),
938             phi_out: HashMap::new(),
939         }
940     }
941 
get_scalar(&self, ssa: SSAValue) -> RegRef942     fn get_scalar(&self, ssa: SSAValue) -> RegRef {
943         let ra = &self.ra[ssa.file()];
944         let reg = ra.try_get_reg(ssa).expect("Unknown SSA value");
945         RegRef::new(ssa.file(), reg, 1)
946     }
947 
alloc_scalar( &mut self, ip: usize, sum: &SSAUseMap, phi_webs: &mut PhiWebs, ssa: SSAValue, ) -> RegRef948     fn alloc_scalar(
949         &mut self,
950         ip: usize,
951         sum: &SSAUseMap,
952         phi_webs: &mut PhiWebs,
953         ssa: SSAValue,
954     ) -> RegRef {
955         let ra = &mut self.ra[ssa.file()];
956         let reg = ra.alloc_scalar(ip, sum, phi_webs, ssa);
957         RegRef::new(ssa.file(), reg, 1)
958     }
959 
pin_vector(&mut self, reg: RegRef)960     fn pin_vector(&mut self, reg: RegRef) {
961         let ra = &mut self.ra[reg.file()];
962         for c in 0..reg.comps() {
963             ra.pin_reg(reg.comp(c).base_idx());
964         }
965     }
966 
try_coalesce(&mut self, ssa: SSAValue, src: &Src) -> bool967     fn try_coalesce(&mut self, ssa: SSAValue, src: &Src) -> bool {
968         debug_assert!(src.src_mod.is_none());
969         let SrcRef::Reg(src_reg) = src.src_ref else {
970             return false;
971         };
972         debug_assert!(src_reg.comps() == 1);
973 
974         if src_reg.file() != ssa.file() {
975             return false;
976         }
977 
978         let ra = &mut self.ra[src_reg.file()];
979         if ra.reg_is_used(src_reg.base_idx()) {
980             return false;
981         }
982 
983         ra.assign_reg(ssa, src_reg.base_idx());
984         true
985     }
986 
pcopy_tmp(&self) -> Option<RegRef>987     fn pcopy_tmp(&self) -> Option<RegRef> {
988         if self.pcopy_tmp_gprs > 0 {
989             Some(RegRef::new(
990                 RegFile::GPR,
991                 self.ra[RegFile::GPR].num_regs,
992                 self.pcopy_tmp_gprs,
993             ))
994         } else {
995             None
996         }
997     }
998 
assign_regs_instr( &mut self, mut instr: Box<Instr>, ip: usize, sum: &SSAUseMap, phi_webs: &mut PhiWebs, srcs_killed: &KillSet, dsts_killed: &KillSet, pcopy: &mut OpParCopy, ) -> Option<Box<Instr>>999     fn assign_regs_instr(
1000         &mut self,
1001         mut instr: Box<Instr>,
1002         ip: usize,
1003         sum: &SSAUseMap,
1004         phi_webs: &mut PhiWebs,
1005         srcs_killed: &KillSet,
1006         dsts_killed: &KillSet,
1007         pcopy: &mut OpParCopy,
1008     ) -> Option<Box<Instr>> {
1009         match &mut instr.op {
1010             Op::Undef(undef) => {
1011                 if let Dst::SSA(ssa) = undef.dst {
1012                     assert!(ssa.comps() == 1);
1013                     self.alloc_scalar(ip, sum, phi_webs, ssa[0]);
1014                 }
1015                 assert!(srcs_killed.is_empty());
1016                 self.ra.free_killed(dsts_killed);
1017                 None
1018             }
1019             Op::PhiSrcs(phi) => {
1020                 for (id, src) in phi.srcs.iter() {
1021                     assert!(src.src_mod.is_none());
1022                     if let Some(ssa) = src_ssa_ref(src) {
1023                         assert!(ssa.comps() == 1);
1024                         let reg = self.get_scalar(ssa[0]);
1025                         self.phi_out.insert(*id, reg.into());
1026                     } else {
1027                         self.phi_out.insert(*id, src.src_ref);
1028                     }
1029                 }
1030                 assert!(dsts_killed.is_empty());
1031                 None
1032             }
1033             Op::PhiDsts(phi) => {
1034                 assert!(instr.pred.is_true());
1035 
1036                 for (id, dst) in phi.dsts.iter() {
1037                     if let Dst::SSA(ssa) = dst {
1038                         assert!(ssa.comps() == 1);
1039                         let reg = self.alloc_scalar(ip, sum, phi_webs, ssa[0]);
1040                         self.live_in.push(LiveValue {
1041                             live_ref: LiveRef::Phi(*id),
1042                             reg_ref: reg,
1043                         });
1044                     }
1045                 }
1046                 assert!(srcs_killed.is_empty());
1047                 self.ra.free_killed(dsts_killed);
1048 
1049                 None
1050             }
1051             Op::Break(op) => {
1052                 for src in op.srcs_as_mut_slice() {
1053                     if let Some(ssa) = src_ssa_ref(src) {
1054                         assert!(ssa.comps() == 1);
1055                         let reg = self.get_scalar(ssa[0]);
1056                         src_set_reg(src, reg);
1057                     }
1058                 }
1059 
1060                 self.ra.free_killed(srcs_killed);
1061 
1062                 if let Dst::SSA(ssa) = &op.bar_out {
1063                     let reg = *op.bar_in.src_ref.as_reg().unwrap();
1064                     self.ra.assign_reg(ssa[0], reg);
1065                     op.bar_out = reg.into();
1066                 }
1067 
1068                 self.ra.free_killed(dsts_killed);
1069 
1070                 Some(instr)
1071             }
1072             Op::BSSy(op) => {
1073                 for src in op.srcs_as_mut_slice() {
1074                     if let Some(ssa) = src_ssa_ref(src) {
1075                         assert!(ssa.comps() == 1);
1076                         let reg = self.get_scalar(ssa[0]);
1077                         src_set_reg(src, reg);
1078                     }
1079                 }
1080 
1081                 self.ra.free_killed(srcs_killed);
1082 
1083                 if let Dst::SSA(ssa) = &op.bar_out {
1084                     let reg = *op.bar_in.src_ref.as_reg().unwrap();
1085                     self.ra.assign_reg(ssa[0], reg);
1086                     op.bar_out = reg.into();
1087                 }
1088 
1089                 self.ra.free_killed(dsts_killed);
1090 
1091                 Some(instr)
1092             }
1093             Op::Copy(copy) => {
1094                 if let Some(ssa) = src_ssa_ref(&copy.src) {
1095                     // This may be a Cbuf::BindlessSSA source so we need to
1096                     // support vectors because cbuf handles are vec2s. However,
1097                     // since we only have a single scalar destination, we can
1098                     // just allocate and free killed up-front.
1099                     let ra = &mut self.ra[ssa.file().unwrap()];
1100                     let mut vra = VecRegAllocator::new(ra);
1101                     let reg = vra.collect_vector(ssa);
1102                     vra.free_killed(srcs_killed);
1103                     vra.finish(pcopy);
1104                     src_set_reg(&mut copy.src, reg);
1105                 }
1106 
1107                 let mut del_copy = false;
1108                 if let Dst::SSA(dst_vec) = &mut copy.dst {
1109                     debug_assert!(dst_vec.comps() == 1);
1110                     let dst_ssa = &dst_vec[0];
1111 
1112                     if self.try_coalesce(*dst_ssa, &copy.src) {
1113                         del_copy = true;
1114                     } else {
1115                         copy.dst = self
1116                             .alloc_scalar(ip, sum, phi_webs, *dst_ssa)
1117                             .into();
1118                     }
1119                 }
1120 
1121                 self.ra.free_killed(dsts_killed);
1122 
1123                 if del_copy {
1124                     None
1125                 } else {
1126                     Some(instr)
1127                 }
1128             }
1129             Op::Pin(OpPin { src, dst }) | Op::Unpin(OpUnpin { src, dst }) => {
1130                 assert!(instr.pred.is_true());
1131 
1132                 // These basically act as a vector version of OpCopy except that
1133                 // they only work on SSA values and we pin the destination if
1134                 // it's OpPin.
1135                 let src_vec = src.as_ssa().unwrap();
1136                 let dst_vec = dst.as_ssa().unwrap();
1137                 assert!(src_vec.comps() == dst_vec.comps());
1138 
1139                 if srcs_killed.len() == src_vec.comps().into()
1140                     && src_vec.file() == dst_vec.file()
1141                 {
1142                     let ra = &mut self.ra[src_vec.file().unwrap()];
1143                     let mut vra = VecRegAllocator::new(ra);
1144                     let reg = vra.collect_vector(src_vec);
1145                     vra.finish(pcopy);
1146                     for c in 0..src_vec.comps() {
1147                         let c_reg = ra.free_ssa(src_vec[usize::from(c)]);
1148                         debug_assert!(c_reg == reg.comp(c).base_idx());
1149                         ra.assign_reg(dst_vec[usize::from(c)], c_reg);
1150                     }
1151 
1152                     if matches!(&instr.op, Op::Pin(_)) {
1153                         self.pin_vector(reg);
1154                     }
1155                     self.ra.free_killed(dsts_killed);
1156 
1157                     None
1158                 } else {
1159                     // Otherwise, turn into a parallel copy
1160                     //
1161                     // We can always allocate the destination first in this
1162                     // case.
1163                     assert!(dst_vec.comps() > 1 || srcs_killed.is_empty());
1164 
1165                     let dst_ra = &mut self.ra[dst_vec.file().unwrap()];
1166                     let mut vra = VecRegAllocator::new(dst_ra);
1167                     let dst_reg = vra.alloc_vector(*dst_vec);
1168                     vra.finish(pcopy);
1169 
1170                     let mut pin_copy = OpParCopy::new();
1171                     for c in 0..dst_reg.comps() {
1172                         let src_reg = self.get_scalar(src_vec[usize::from(c)]);
1173                         pin_copy.push(dst_reg.comp(c).into(), src_reg.into());
1174                     }
1175 
1176                     if matches!(&instr.op, Op::Pin(_)) {
1177                         self.pin_vector(dst_reg);
1178                     }
1179                     self.ra.free_killed(srcs_killed);
1180                     self.ra.free_killed(dsts_killed);
1181 
1182                     Some(Instr::new_boxed(pin_copy))
1183                 }
1184             }
1185             Op::ParCopy(pcopy) => {
1186                 for (_, src) in pcopy.dsts_srcs.iter_mut() {
1187                     if let Some(src_vec) = src_ssa_ref(src) {
1188                         debug_assert!(src_vec.comps() == 1);
1189                         let reg = self.get_scalar(src_vec[0]).into();
1190                         src_set_reg(src, reg);
1191                     }
1192                 }
1193 
1194                 self.ra.free_killed(srcs_killed);
1195 
1196                 // Try to coalesce destinations into sources, if possible
1197                 pcopy.dsts_srcs.retain(|dst, src| match dst {
1198                     Dst::None => false,
1199                     Dst::SSA(dst_vec) => {
1200                         debug_assert!(dst_vec.comps() == 1);
1201                         !self.try_coalesce(dst_vec[0], src)
1202                     }
1203                     Dst::Reg(_) => true,
1204                 });
1205 
1206                 for (dst, _) in pcopy.dsts_srcs.iter_mut() {
1207                     if let Dst::SSA(dst_vec) = dst {
1208                         debug_assert!(dst_vec.comps() == 1);
1209                         *dst = self
1210                             .alloc_scalar(ip, sum, phi_webs, dst_vec[0])
1211                             .into();
1212                     }
1213                 }
1214 
1215                 self.ra.free_killed(dsts_killed);
1216 
1217                 pcopy.tmp = self.pcopy_tmp();
1218                 if pcopy.is_empty() {
1219                     None
1220                 } else {
1221                     Some(instr)
1222                 }
1223             }
1224             Op::RegOut(out) => {
1225                 for src in out.srcs.iter_mut() {
1226                     if let Some(src_vec) = src_ssa_ref(src) {
1227                         debug_assert!(src_vec.comps() == 1);
1228                         let reg = self.get_scalar(src_vec[0]).into();
1229                         src_set_reg(src, reg);
1230                     }
1231                 }
1232 
1233                 self.ra.free_killed(srcs_killed);
1234                 assert!(dsts_killed.is_empty());
1235 
1236                 // This should be the last instruction and its sources should
1237                 // be the last free GPRs.
1238                 debug_assert!(self.ra[RegFile::GPR].num_regs_used() == 0);
1239 
1240                 for (i, src) in out.srcs.iter().enumerate() {
1241                     let reg = u32::try_from(i).unwrap();
1242                     let dst = RegRef::new(RegFile::GPR, reg, 1);
1243                     pcopy.push(dst.into(), *src);
1244                 }
1245 
1246                 None
1247             }
1248             _ => {
1249                 for file in self.ra.values_mut() {
1250                     instr_assign_regs_file(
1251                         &mut instr,
1252                         ip,
1253                         sum,
1254                         phi_webs,
1255                         srcs_killed,
1256                         pcopy,
1257                         file,
1258                     );
1259                 }
1260                 self.ra.free_killed(dsts_killed);
1261                 Some(instr)
1262             }
1263         }
1264     }
1265 
first_pass<BL: BlockLiveness>( &mut self, b: &mut BasicBlock, bl: &BL, pred_ra: Option<&PerRegFile<RegAllocator>>, phi_webs: &mut PhiWebs, )1266     fn first_pass<BL: BlockLiveness>(
1267         &mut self,
1268         b: &mut BasicBlock,
1269         bl: &BL,
1270         pred_ra: Option<&PerRegFile<RegAllocator>>,
1271         phi_webs: &mut PhiWebs,
1272     ) {
1273         // Populate live in from the register file we're handed.  We'll add more
1274         // live in when we process the OpPhiDst, if any.
1275         if let Some(pred_ra) = pred_ra {
1276             for (raf, pred_raf) in self.ra.values_mut().zip(pred_ra.values()) {
1277                 for (ssa, reg) in &pred_raf.ssa_reg {
1278                     if bl.is_live_in(ssa) {
1279                         raf.assign_reg(*ssa, *reg);
1280                         if pred_raf.reg_is_pinned(*reg) {
1281                             raf.pin_reg(*reg);
1282                         }
1283                         self.live_in.push(LiveValue {
1284                             live_ref: LiveRef::SSA(*ssa),
1285                             reg_ref: RegRef::new(raf.file(), *reg, 1),
1286                         });
1287                     }
1288                 }
1289             }
1290         }
1291 
1292         let sum = SSAUseMap::for_block(b);
1293 
1294         let mut instrs = Vec::new();
1295         let mut srcs_killed = KillSet::new();
1296         let mut dsts_killed = KillSet::new();
1297 
1298         for (ip, instr) in b.instrs.drain(..).enumerate() {
1299             // Build up the kill set
1300             srcs_killed.clear();
1301             if let PredRef::SSA(ssa) = &instr.pred.pred_ref {
1302                 if !bl.is_live_after_ip(ssa, ip) {
1303                     srcs_killed.insert(*ssa);
1304                 }
1305             }
1306             for src in instr.srcs() {
1307                 for ssa in src.iter_ssa() {
1308                     if !bl.is_live_after_ip(ssa, ip) {
1309                         srcs_killed.insert(*ssa);
1310                     }
1311                 }
1312             }
1313 
1314             dsts_killed.clear();
1315             for dst in instr.dsts() {
1316                 if let Dst::SSA(vec) = dst {
1317                     for ssa in vec.iter() {
1318                         if !bl.is_live_after_ip(ssa, ip) {
1319                             dsts_killed.insert(*ssa);
1320                         }
1321                     }
1322                 }
1323             }
1324 
1325             let mut pcopy = OpParCopy::new();
1326             pcopy.tmp = self.pcopy_tmp();
1327 
1328             let instr = self.assign_regs_instr(
1329                 instr,
1330                 ip,
1331                 &sum,
1332                 phi_webs,
1333                 &srcs_killed,
1334                 &dsts_killed,
1335                 &mut pcopy,
1336             );
1337 
1338             if !pcopy.is_empty() {
1339                 if DEBUG.annotate() {
1340                     instrs.push(Instr::new_boxed(OpAnnotate {
1341                         annotation: "generated by assign_regs".into(),
1342                     }));
1343                 }
1344                 if !b.uniform {
1345                     for dst in pcopy.dsts_as_slice() {
1346                         if let Dst::Reg(reg) = dst {
1347                             assert!(!reg.is_uniform());
1348                         }
1349                     }
1350                 }
1351                 instrs.push(Instr::new_boxed(pcopy));
1352             }
1353 
1354             if let Some(instr) = instr {
1355                 instrs.push(instr);
1356             }
1357         }
1358 
1359         // Update phi_webs with the registers assigned in this block
1360         for ra in self.ra.values() {
1361             for (ssa, reg) in &ra.ssa_reg {
1362                 phi_webs.set(*ssa, *reg);
1363             }
1364         }
1365 
1366         // Sort live-in to maintain determinism
1367         self.live_in.sort();
1368 
1369         b.instrs = instrs;
1370     }
1371 
second_pass(&self, target: &AssignRegsBlock, b: &mut BasicBlock)1372     fn second_pass(&self, target: &AssignRegsBlock, b: &mut BasicBlock) {
1373         let mut pcopy = OpParCopy::new();
1374         pcopy.tmp = self.pcopy_tmp();
1375 
1376         for lv in &target.live_in {
1377             let src = match lv.live_ref {
1378                 LiveRef::SSA(ssa) => SrcRef::from(self.get_scalar(ssa)),
1379                 LiveRef::Phi(phi) => *self.phi_out.get(&phi).unwrap(),
1380             };
1381             let dst = lv.reg_ref;
1382             if let SrcRef::Reg(src_reg) = src {
1383                 if dst == src_reg {
1384                     continue;
1385                 }
1386             }
1387             pcopy.push(dst.into(), src.into());
1388         }
1389 
1390         if DEBUG.annotate() {
1391             b.instrs.push(Instr::new_boxed(OpAnnotate {
1392                 annotation: "generated by assign_regs".into(),
1393             }));
1394         }
1395         if b.branch().is_some() {
1396             b.instrs.insert(b.instrs.len() - 1, Instr::new_boxed(pcopy));
1397         } else {
1398             b.instrs.push(Instr::new_boxed(pcopy));
1399         }
1400     }
1401 }
1402 
1403 impl Shader<'_> {
assign_regs(&mut self)1404     pub fn assign_regs(&mut self) {
1405         assert!(self.functions.len() == 1);
1406         let f = &mut self.functions[0];
1407 
1408         // Convert to CSSA before we spill or assign registers
1409         f.to_cssa();
1410 
1411         let mut live = SimpleLiveness::for_function(f);
1412         let mut max_live = live.calc_max_live(f);
1413 
1414         // We want at least one temporary GPR reserved for parallel copies.
1415         let mut tmp_gprs = 1_u8;
1416 
1417         let spill_files =
1418             [RegFile::UPred, RegFile::Pred, RegFile::UGPR, RegFile::Bar];
1419         for file in spill_files {
1420             let num_regs = self.sm.num_regs(file);
1421             if max_live[file] > num_regs {
1422                 f.spill_values(file, num_regs);
1423 
1424                 // Re-calculate liveness after we spill
1425                 live = SimpleLiveness::for_function(f);
1426                 max_live = live.calc_max_live(f);
1427 
1428                 if file == RegFile::Bar {
1429                     tmp_gprs = max(tmp_gprs, 2);
1430                 }
1431             }
1432         }
1433 
1434         // An instruction can have at most 4 vector sources/destinations.  In
1435         // order to ensure we always succeed at allocation, regardless of
1436         // arbitrary choices, we need at least 16 GPRs.
1437         let mut gpr_limit = max(max_live[RegFile::GPR], 16);
1438         let mut total_gprs = gpr_limit + u32::from(tmp_gprs);
1439 
1440         let mut max_gprs = if DEBUG.spill() {
1441             // We need at least 16 registers to satisfy RA constraints for
1442             // texture ops and another 2 for parallel copy lowering
1443             18
1444         } else {
1445             self.sm.num_regs(RegFile::GPR)
1446         };
1447 
1448         if let ShaderStageInfo::Compute(cs_info) = &self.info.stage {
1449             max_gprs = min(
1450                 max_gprs,
1451                 gpr_limit_from_local_size(&cs_info.local_size)
1452                     - self.sm.hw_reserved_gprs(),
1453             );
1454         }
1455 
1456         if total_gprs > max_gprs {
1457             // If we're spilling GPRs, we need to reserve 2 GPRs for OpParCopy
1458             // lowering because it needs to be able lower Mem copies which
1459             // require a temporary
1460             tmp_gprs = max(tmp_gprs, 2);
1461             total_gprs = max_gprs;
1462             gpr_limit = total_gprs - u32::from(tmp_gprs);
1463 
1464             f.spill_values(RegFile::GPR, gpr_limit);
1465 
1466             // Re-calculate liveness one last time
1467             live = SimpleLiveness::for_function(f);
1468         }
1469 
1470         self.info.num_gprs = total_gprs.try_into().unwrap();
1471 
1472         let limit = PerRegFile::new_with(|file| {
1473             if file == RegFile::GPR {
1474                 gpr_limit
1475             } else {
1476                 self.sm.num_regs(file)
1477             }
1478         });
1479 
1480         let mut phi_webs = PhiWebs::new(f);
1481 
1482         let mut blocks: Vec<AssignRegsBlock> = Vec::new();
1483         for b_idx in 0..f.blocks.len() {
1484             let pred = f.blocks.pred_indices(b_idx);
1485             let pred_ra = if pred.is_empty() {
1486                 None
1487             } else {
1488                 // Start with the previous block's.
1489                 Some(&blocks[pred[0]].ra)
1490             };
1491 
1492             let bl = live.block_live(b_idx);
1493 
1494             let mut arb = AssignRegsBlock::new(&limit, tmp_gprs);
1495             arb.first_pass(&mut f.blocks[b_idx], bl, pred_ra, &mut phi_webs);
1496 
1497             assert!(blocks.len() == b_idx);
1498             blocks.push(arb);
1499         }
1500 
1501         for b_idx in 0..f.blocks.len() {
1502             let arb = &blocks[b_idx];
1503             for sb_idx in f.blocks.succ_indices(b_idx).to_vec() {
1504                 arb.second_pass(&blocks[sb_idx], &mut f.blocks[b_idx]);
1505             }
1506         }
1507     }
1508 }
1509